{ "cells": [ { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "\"\"\"1D struktura podataka\"\"\"\n", "\n", "s1 = pd.Series([1,2,3,5]) #pravljenje serije pomocu liste i automatsko dodeljivanje oznaka redovima" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 1\n", "1 2\n", "2 3\n", "3 5\n", "dtype: int64\n" ] } ], "source": [ "print(s1)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s1[2] 3\n", "s1[1] 2\n" ] } ], "source": [ "#pristup elementima serija[oznaka]\n", "print('s1[2]', s1[2])\n", "print('s1[1]', s1.get(1))\n" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s1[8] nan\n" ] } ], "source": [ "import numpy as np # modul zbog nan vrednosti\n", "print('s1[8]', s1.get(8, np.nan))\n" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "# pravljenje serije pomocu liste sa zadatim oznakama za redove\n", "s2 = pd.Series([1,2,3,5], index=['a', 'b', 'c', 'd'])\n" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s2['d'] 5\n" ] } ], "source": [ "print(\"s2['d']\" ,s2['d'])\n" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "e 2\n", "a 1\n", "b 2\n", "c 3\n", "d 5\n", "dtype: int64\n", "s3['a'] 1\n" ] } ], "source": [ "s3 = pd.Series({'e':2, 'a':1,'b':2,'c':3,'d':5, }) # pravljenje serije pomocu recnika\n", "print(s3)\n", "print(\"s3['a']\", s3['a'])\n" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['e', 'a', 'b', 'c', 'd']\n", "Index(['e', 'a', 'b', 'c', 'd'], dtype='object')\n" ] } ], "source": [ "#izdvajanje oznaka\n", "print(s3.index.tolist())\n", "#ili\n", "print(s3.keys())\n" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2 1 2 3 5]\n" ] } ], "source": [ "#izdvajanje vrednosti\n", "print(s3.values)\n", "\n" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "int64\n" ] } ], "source": [ "#tip elemenata\n", "print(s3.dtype)\n" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2 2\n", "1 1\n", "3 1\n", "5 1\n", "dtype: int64\n" ] } ], "source": [ "#broj pojavljivanja svake vrednosti\n", "print(s3.value_counts())\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 1\n", "b 2\n", "c 3\n", "d 5\n", "dtype: int64" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "e 2\n", "a 1\n", "b 2\n", "c 3\n", "d 5\n", "dtype: int64" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s3" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2.0\n", "b 4.0\n", "c 6.0\n", "d 10.0\n", "e NaN\n", "dtype: float64" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2+s3" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2\n", "b 4\n", "c 6\n", "d 10\n", "dtype: int64" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2+s2" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df1\n", " prva druga\n", "a 1 x\n", "b 2 y\n", "c 3 z\n" ] } ], "source": [ "\"\"\"2D struktura podataka DataFrame\"\"\"\n", "\n", "d1= {\n", " 'prva': pd.Series([1,2,3], index=['a', 'b', 'c']),\n", " 'druga': pd.Series(['x', 'y', 'z'], index=['a', 'b', 'c'])\n", "}\n", "df1 = pd.DataFrame(d1)\n", "\n", "print(\"df1\")\n", "print(df1)\n" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "d2= {\n", " 'prva': pd.Series([3,2,1], index=['a', 'b', 'c']),\n", " 'druga': pd.Series(['x', 'z', 'y'], index=['a', 'b', 'd']),\n", " 'treca': pd.Series(['m', 'n', 'p'], index=['a', 'b', 'e'])\n", " \n", "}\n" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "df2 = pd.DataFrame(d2)\n" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
prvadrugatreca
a3.0xm
b2.0zn
c1.0NaNNaN
dNaNyNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " prva druga treca\n", "a 3.0 x m\n", "b 2.0 z n\n", "c 1.0 NaN NaN\n", "d NaN y NaN\n", "e NaN NaN p" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "#df3 = pd.DataFrame( [(1,2,'X'), (2,3,'C')], columns=['A', 'B','C'])\n", "df3 = pd.DataFrame( [(1,2,'X'), (2,3,'C')]) \n" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
012X
123C
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1 2 X\n", "1 2 3 C" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "#sortiranje po indeksima (oznakama)\n", "#df2.sort_index(ascending=False, inplace=True) # sortiranje redova\n", "df2.sort_index(ascending=True, inplace=True) # sortiranje redova\n" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
prvadrugatreca
a3.0xm
b2.0zn
c1.0NaNNaN
dNaNyNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " prva druga treca\n", "a 3.0 x m\n", "b 2.0 z n\n", "c 1.0 NaN NaN\n", "d NaN y NaN\n", "e NaN NaN p" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "df2.sort_index(ascending=True, axis=1, inplace=True) #sortiranje kolona\n" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drugaprvatreca
ax3.0m
bz2.0n
cNaN1.0NaN
dyNaNNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " druga prva treca\n", "a x 3.0 m\n", "b z 2.0 n\n", "c NaN 1.0 NaN\n", "d y NaN NaN\n", "e NaN NaN p" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [], "source": [ "#sortiranje po vrednosti kolone\n", "df2.sort_values(by='prva', inplace=True) #by=[lista kolona]\n" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drugaprvatreca
cNaN1.0NaN
bz2.0n
ax3.0m
dyNaNNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " druga prva treca\n", "c NaN 1.0 NaN\n", "b z 2.0 n\n", "a x 3.0 m\n", "d y NaN NaN\n", "e NaN NaN p" ] }, "execution_count": 111, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "df4 = pd.DataFrame({\n", " 'number' : [1,2,2,3],\n", " 'object' : ['c', 'd', 'e', 'c']\n", " })\n" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "number int64\n", "object object\n", "dtype: object\n" ] } ], "source": [ "print(df4.dtypes)" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Opis svih kolona\n", " number\n", "count 4.000000\n", "mean 2.000000\n", "std 0.816497\n", "min 1.000000\n", "25% 1.750000\n", "50% 2.000000\n", "75% 2.250000\n", "max 3.000000\n" ] } ], "source": [ "print(\"Opis svih kolona\")\n", "#print(df4.describe(include='all'))\n", "print(df4.describe())\n" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Opis numerickih kolona\n", " number\n", "count 4.000000\n", "mean 2.000000\n", "std 0.816497\n", "min 1.000000\n", "25% 1.750000\n", "50% 2.000000\n", "75% 2.250000\n", "max 3.000000\n" ] } ], "source": [ "print(\"Opis numerickih kolona\")\n", "print(df4.describe(include=['number']))\n" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Opis kolona object\n", " object\n", "count 4\n", "unique 3\n", "top c\n", "freq 2\n" ] } ], "source": [ "print(\"Opis kolona object\")\n", "print(df4.describe(include=['object']))\n" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drugaprvatreca
cNaN1.0NaN
bz2.0n
ax3.0m
dyNaNNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " druga prva treca\n", "c NaN 1.0 NaN\n", "b z 2.0 n\n", "a x 3.0 m\n", "d y NaN NaN\n", "e NaN NaN p" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Srednje vrednosti\n", "prva 2.0\n", "dtype: float64\n" ] } ], "source": [ "#neke deskriptivne statistike\n", "print(\"Srednje vrednosti\")\n", "print(df2.mean())\n" ] }, { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Srednje vrednosti uzimajuci u obzir NaN\n", "prva NaN\n", "dtype: float64\n" ] } ], "source": [ "print(\"Srednje vrednosti uzimajuci u obzir NaN\")\n", "print(df2.mean(skipna=False))\n" ] }, { "cell_type": "code", "execution_count": 129, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a b\n", "0 1 2\n", "1 2 8\n", "2 3 10\n", "Srednja vrednost za svaki red\n", "0 1.5\n", "1 5.0\n", "2 6.5\n", "dtype: float64\n" ] } ], "source": [ "\n", "df5=pd.DataFrame({'a':[1,2,3], 'b':[2,8,10]})\n", "print(df5)\n", "print(\"Srednja vrednost za svaki red\")\n", "print(df5.mean(axis=1))\n" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Zbir\n", "a 6\n", "b 20\n", "dtype: int64\n" ] } ], "source": [ "print(\"Zbir po kolonama\")\n", "print(df5.sum())\n" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Zbir\n", "0 3\n", "1 10\n", "2 13\n", "dtype: int64\n" ] } ], "source": [ "print(\"Zbir po redovima\")\n", "print(df5.sum(axis=1))\n" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Percentili\n", "a 2.0\n", "b 8.0\n", "Name: 0.5, dtype: float64\n", "a 1.5\n", "b 5.0\n", "Name: 0.25, dtype: float64\n", " a b\n", "0.25 1.5 5.0\n", "0.50 2.0 8.0\n", "0.75 2.5 9.0\n" ] } ], "source": [ "print(\"Percentili\")\n", "print(df5.quantile())\n", "print(df5.quantile(0.25))\n", "print(df5.quantile([0.25, 0.5,0.75]))\n" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Najzastupljenije vrednosti\n", " druga prva treca\n", "0 x 1.0 m\n", "1 y 2.0 n\n", "2 z 3.0 p\n" ] } ], "source": [ "print(\"Najzastupljenije vrednosti\")\n", "print(df2.mode())\n" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [], "source": [ "d2= {\n", " 'prva': pd.Series([3,2,1], index=['a', 'b', 'c']),\n", " 'druga': pd.Series(['x', 'x', 'y'], index=['a', 'b', 'd']),\n", " 'treca': pd.Series(['m', 'n', 'p'], index=['a', 'b', 'e'])\n", " \n", "}\n", "df2=pd.DataFrame(d2)" ] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Najzastupljenije vrednosti\n", " prva druga treca\n", "0 1.0 x m\n", "1 2.0 NaN n\n", "2 3.0 NaN p\n" ] } ], "source": [ "print(\"Najzastupljenije vrednosti\")\n", "print(df2.mode())" ] }, { "cell_type": "code", "execution_count": 147, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Razlicite vrednosti\n", "prva 3\n", "druga 2\n", "treca 3\n", "dtype: int64\n" ] } ], "source": [ "print(\"Razlicite vrednosti\")\n", "print(df2.nunique())\n" ] }, { "cell_type": "code", "execution_count": 149, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2['prva']\n", "a 3.0\n", "b 2.0\n", "c 1.0\n", "d NaN\n", "e NaN\n", "Name: prva, dtype: float64\n" ] } ], "source": [ "#izdvajanje kolone\n", "print(\"df2['prva']\")\n", "print(df2['prva'])" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2['prva']['a'] 3.0\n", "df2.iloc[0, 0] 3.0\n" ] } ], "source": [ "#izdvajanje celije\n", "print(\"df2['prva']['a']\", df2['prva']['a'])\n", "print(\"df2.iloc[0, 0]\", df2.iloc[0, 0])\n" ] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.iloc[0, 0:2]\n", "prva 3.0\n", "druga x\n", "Name: a, dtype: object\n" ] } ], "source": [ "print(\"df2.iloc[0, 0:2]\")\n", "print(df2.iloc[0, 0:2]) # primetiti: rezultat je serija\n" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.iloc[[0], 0:2]\n", " prva druga\n", "a 3.0 x\n" ] } ], "source": [ "print(\"df2.iloc[[0], 0:2]\") # primetiti: rezultat je tabela DataFrame\n", "print(df2.iloc[[0], 0:2])\n" ] }, { "cell_type": "code", "execution_count": 154, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.iloc[[0,2], 0:2]\n", " prva druga\n", "a 3.0 x\n", "c 1.0 NaN\n" ] } ], "source": [ "print(\"df2.iloc[[0,2], 0:2]\")\n", "print(df2.iloc[[0,2], 0:2])\n" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.iloc[:3, :]\n", " prva druga treca\n", "a 3.0 x m\n", "b 2.0 x n\n", "c 1.0 NaN NaN\n" ] } ], "source": [ "print(\"df2.iloc[:3, :]\")\n", "print(df2.iloc[:3, :])\n" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.loc['a']\n", "prva 3.0\n", "druga x\n", "treca m\n", "Name: a, dtype: object\n" ] } ], "source": [ "print(\"df2.loc['a']\")\n", "print(df2.loc['a'])\n" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.loc['a':'c']\n", " prva druga treca\n", "a 3.0 x m\n", "b 2.0 x n\n", "c 1.0 NaN NaN\n" ] } ], "source": [ "print(\"df2.loc['a':'c']\")\n", "print(df2.loc['a':'c']) #rezultat zavisi od sortiranja\n" ] }, { "cell_type": "code", "execution_count": 158, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.loc[['a', 'c'], ['prva','treca']]\n", " prva treca\n", "a 3.0 m\n", "c 1.0 NaN\n" ] } ], "source": [ "print(\"df2.loc[['a', 'c'], ['prva','treca']]\")\n", "print(df2.loc[['a', 'c'], ['prva','treca']])\n" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a True\n", "b True\n", "c False\n", "d False\n", "e False\n", "Name: prva, dtype: bool" ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2['prva']>1" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2[df2['prva']>1]\n", " prva druga treca\n", "a 3.0 x m\n", "b 2.0 x n\n" ] } ], "source": [ "print(\"df2[df2['prva']>1]\")\n", "print(df2[df2['prva']>1])\n" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2[df2['prva']>1][['prva','treca']]\n", " prva treca\n", "a 3.0 m\n", "b 2.0 n\n" ] } ], "source": [ "print(\"df2[df2['prva']>1][['prva','treca']]\")\n", "print(df2[df2['prva']>1][['prva','treca']])\n" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.loc[df2['prva']>2, 'prva']\n", "a 3.0\n", "Name: prva, dtype: float64\n" ] } ], "source": [ "print(\"df2.loc[df2['prva']>2, 'prva']\")\n", "print(df2.loc[df2['prva']>2, 'prva']) #vraca objekat Series\n" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2.loc[df2['prva']>2, ['prva']]\n", " prva\n", "a 3.0\n" ] } ], "source": [ "print(\"df2.loc[df2['prva']>2, ['prva']]\")\n", "print(df2.loc[df2['prva']>2, ['prva']]) #vraca objekat DataFrame\n" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2['treca']\n", "a m\n", "b n\n", "c NaN\n", "d NaN\n", "e p\n", "Name: treca, dtype: object\n", "df2[idx]\n", " prva druga treca\n", "a 3.0 x m\n" ] } ], "source": [ "#radi preglednosti, uslov za izdvajanje moze biti i izdvojen\n", "print(\"df2['treca']\")\n", "print(df2['treca'])\n", "idx = df2['treca']=='m'\n", "print(\"df2[idx]\")\n", "print(df2[idx])\n" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2\n", " prva druga treca\n", "a 3.0 x m\n", "b 2.0 x n\n", "c 1.0 NaN NaN\n", "d NaN y NaN\n", "e NaN NaN p\n", "df2.loc['a', 'prva']=8\n" ] } ], "source": [ "#dodeljivanje vrednosti\n", "print(\"df2\")\n", "print(df2)\n", "\n", "print(\"df2.loc['a', 'prva']=8\")\n", "df2.loc['a', 'prva']=8\n" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2 posle promene vrednosti\n", " prva druga treca\n", "a 8.0 x m\n", "b 2.0 x n\n", "c 1.0 NaN NaN\n", "d NaN y NaN\n", "e NaN NaN p\n" ] } ], "source": [ "print(\"df2 posle promene vrednosti\")\n", "print(df2)\n" ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [], "source": [ "#zamena vrednosti\n", "df2.loc[df2['treca'].isna(),'treca']='l'\n" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " prva druga treca\n", "a 8.0 x m\n", "b 2.0 x n\n", "c 1.0 NaN l\n", "d NaN y l\n", "e NaN NaN p\n" ] } ], "source": [ "print(df2)" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df2 nakon zamene l sa NaN\n", " prva druga treca\n", "a 8.0 x m\n", "b 2.0 x n\n", "c 1.0 NaN NaN\n", "d NaN y NaN\n", "e NaN NaN p\n" ] } ], "source": [ "df2.replace('l', np.nan, inplace=True)\n", "print(\"df2 nakon zamene l sa NaN\")\n", "print(df2)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.15" } }, "nbformat": 4, "nbformat_minor": 5 }