{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "unique-intellectual",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "shared-fitness",
"metadata": {},
"outputs": [],
"source": [
"\"\"\"1D struktura podataka\"\"\"\n",
"\n",
"s1 = pd.Series([1,2,3,5]) #pravljenje serije pomocu liste i automatsko dodeljivanje oznaka redovima"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "mighty-fifteen",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 1\n",
"1 2\n",
"2 3\n",
"3 5\n",
"dtype: int64\n"
]
}
],
"source": [
"print(s1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "signed-pencil",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"s1[2] 3\n",
"s1[1] 2\n"
]
}
],
"source": [
"#pristup elementima serija[oznaka]\n",
"print('s1[2]', s1[2])\n",
"print('s1[1]', s1.get(1))\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "genetic-hurricane",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"s1[8] nan\n"
]
}
],
"source": [
"import numpy as np # modul zbog nan vrednosti\n",
"print('s1[8]', s1.get(8, np.nan))\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "handmade-montgomery",
"metadata": {},
"outputs": [],
"source": [
"# pravljenje serije pomocu liste sa zadatim oznakama za redove\n",
"s2 = pd.Series([1,2,3,5], index=['a', 'b', 'c', 'd'])\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "exterior-plain",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"s2['d'] 5\n"
]
}
],
"source": [
"print(\"s2['d']\" ,s2['d'])\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ahead-application",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"e 2\n",
"a 1\n",
"b 2\n",
"c 3\n",
"d 5\n",
"dtype: int64\n",
"s3['a'] 1\n"
]
}
],
"source": [
"s3 = pd.Series({'e':2, 'a':1,'b':2,'c':3,'d':5, }) # pravljenje serije pomocu recnika\n",
"print(s3)\n",
"print(\"s3['a']\", s3['a'])\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "unlikely-blond",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['e', 'a', 'b', 'c', 'd']\n",
"Index(['e', 'a', 'b', 'c', 'd'], dtype='object')\n"
]
}
],
"source": [
"#izdvajanje oznaka\n",
"print(s3.index.tolist())\n",
"#ili\n",
"print(s3.keys())\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "structured-multimedia",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2 1 2 3 5]\n"
]
}
],
"source": [
"#izdvajanje vrednosti\n",
"print(s3.values)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "advisory-latitude",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"int64\n"
]
}
],
"source": [
"#tip elemenata\n",
"print(s3.dtype)\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "excellent-logic",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 2\n",
"1 1\n",
"3 1\n",
"5 1\n",
"dtype: int64\n"
]
}
],
"source": [
"#broj pojavljivanja svake vrednosti\n",
"print(s3.value_counts())\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "average-thailand",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 1\n",
"b 2\n",
"c 3\n",
"d 5\n",
"dtype: int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s2"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "younger-algeria",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"e 2\n",
"a 1\n",
"b 2\n",
"c 3\n",
"d 5\n",
"dtype: int64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s3"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "fallen-painting",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 2.0\n",
"b 4.0\n",
"c 6.0\n",
"d 10.0\n",
"e NaN\n",
"dtype: float64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s2+s3"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "contained-regular",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 2\n",
"b 4\n",
"c 6\n",
"d 10\n",
"dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s2+s2"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "framed-assistant",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"df1\n",
" prva druga\n",
"a 1 x\n",
"b 2 y\n",
"c 3 z\n"
]
}
],
"source": [
"\"\"\"2D struktura podataka DataFrame\"\"\"\n",
"\n",
"d1= {\n",
" 'prva': pd.Series([1,2,3], index=['a', 'b', 'c']),\n",
" 'druga': pd.Series(['x', 'y', 'z'], index=['a', 'b', 'c'])\n",
"}\n",
"df1 = pd.DataFrame(d1)\n",
"\n",
"print(\"df1\")\n",
"print(df1)\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "solid-guinea",
"metadata": {},
"outputs": [],
"source": [
"d2= {\n",
" 'prva': pd.Series([3,2,1], index=['a', 'b', 'c']),\n",
" 'druga': pd.Series(['x', 'z', 'y'], index=['a', 'b', 'd']),\n",
" 'treca': pd.Series(['m', 'n', 'p'], index=['a', 'b', 'e'])\n",
" \n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "radio-chocolate",
"metadata": {},
"outputs": [],
"source": [
"df2 = pd.DataFrame(d2)\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "objective-rating",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" prva | \n",
" druga | \n",
" treca | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 3.0 | \n",
" x | \n",
" m | \n",
"
\n",
" \n",
" b | \n",
" 2.0 | \n",
" z | \n",
" n | \n",
"
\n",
" \n",
" c | \n",
" 1.0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" d | \n",
" NaN | \n",
" y | \n",
" NaN | \n",
"
\n",
" \n",
" e | \n",
" NaN | \n",
" NaN | \n",
" p | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" prva druga treca\n",
"a 3.0 x m\n",
"b 2.0 z n\n",
"c 1.0 NaN NaN\n",
"d NaN y NaN\n",
"e NaN NaN p"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "printable-breast",
"metadata": {},
"outputs": [],
"source": [
"#df3 = pd.DataFrame( [(1,2,'X'), (2,3,'C')], columns=['A', 'B','C'])\n",
"df3 = pd.DataFrame( [(1,2,'X'), (2,3,'C')]) \n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "robust-politics",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" X | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" C | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2\n",
"0 1 2 X\n",
"1 2 3 C"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "careful-resort",
"metadata": {},
"outputs": [],
"source": [
"#sortiranje po indeksima (oznakama)\n",
"#df2.sort_index(ascending=False, inplace=True) # sortiranje redova\n",
"df2.sort_index(ascending=True, inplace=True) # sortiranje redova\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "enormous-protest",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" prva | \n",
" druga | \n",
" treca | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" 3.0 | \n",
" x | \n",
" m | \n",
"
\n",
" \n",
" b | \n",
" 2.0 | \n",
" z | \n",
" n | \n",
"
\n",
" \n",
" c | \n",
" 1.0 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" d | \n",
" NaN | \n",
" y | \n",
" NaN | \n",
"
\n",
" \n",
" e | \n",
" NaN | \n",
" NaN | \n",
" p | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" prva druga treca\n",
"a 3.0 x m\n",
"b 2.0 z n\n",
"c 1.0 NaN NaN\n",
"d NaN y NaN\n",
"e NaN NaN p"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "artificial-roots",
"metadata": {},
"outputs": [],
"source": [
"df2.sort_index(ascending=True, axis=1, inplace=True) #sortiranje kolona\n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "patient-wayne",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" druga | \n",
" prva | \n",
" treca | \n",
"
\n",
" \n",
" \n",
" \n",
" a | \n",
" x | \n",
" 3.0 | \n",
" m | \n",
"
\n",
" \n",
" b | \n",
" z | \n",
" 2.0 | \n",
" n | \n",
"
\n",
" \n",
" c | \n",
" NaN | \n",
" 1.0 | \n",
" NaN | \n",
"
\n",
" \n",
" d | \n",
" y | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" e | \n",
" NaN | \n",
" NaN | \n",
" p | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" druga prva treca\n",
"a x 3.0 m\n",
"b z 2.0 n\n",
"c NaN 1.0 NaN\n",
"d y NaN NaN\n",
"e NaN NaN p"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "expressed-array",
"metadata": {},
"outputs": [],
"source": [
"#sortiranje po vrednosti kolone\n",
"df2.sort_values(by='prva', inplace=True) #by=[lista kolona]\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "appointed-summer",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" druga | \n",
" prva | \n",
" treca | \n",
"
\n",
" \n",
" \n",
" \n",
" c | \n",
" NaN | \n",
" 1.0 | \n",
" NaN | \n",
"
\n",
" \n",
" b | \n",
" z | \n",
" 2.0 | \n",
" n | \n",
"
\n",
" \n",
" a | \n",
" x | \n",
" 3.0 | \n",
" m | \n",
"
\n",
" \n",
" d | \n",
" y | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" e | \n",
" NaN | \n",
" NaN | \n",
" p | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" druga prva treca\n",
"c NaN 1.0 NaN\n",
"b z 2.0 n\n",
"a x 3.0 m\n",
"d y NaN NaN\n",
"e NaN NaN p"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "narrow-barbados",
"metadata": {},
"outputs": [],
"source": [
"df4 = pd.DataFrame({\n",
" 'number' : [1,2,2,3],\n",
" 'object' : ['c', 'd', 'e', 'c']\n",
" })\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "solved-dover",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"number int64\n",
"object object\n",
"dtype: object\n"
]
}
],
"source": [
"print(df4.dtypes)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "specialized-reviewer",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Opis svih kolona\n",
" number\n",
"count 4.000000\n",
"mean 2.000000\n",
"std 0.816497\n",
"min 1.000000\n",
"25% 1.750000\n",
"50% 2.000000\n",
"75% 2.250000\n",
"max 3.000000\n"
]
}
],
"source": [
"print(\"Opis svih kolona\")\n",
"#print(df4.describe(include='all'))\n",
"print(df4.describe())\n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "legendary-throw",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Opis numerickih kolona\n",
" number\n",
"count 4.000000\n",
"mean 2.000000\n",
"std 0.816497\n",
"min 1.000000\n",
"25% 1.750000\n",
"50% 2.000000\n",
"75% 2.250000\n",
"max 3.000000\n"
]
}
],
"source": [
"print(\"Opis numerickih kolona\")\n",
"print(df4.describe(include=['number']))\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "fabulous-label",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Opis kolona object\n",
" object\n",
"count 4\n",
"unique 3\n",
"top c\n",
"freq 2\n"
]
}
],
"source": [
"print(\"Opis kolona object\")\n",
"print(df4.describe(include=['object']))\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "nonprofit-fabric",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" druga | \n",
" prva | \n",
" treca | \n",
"
\n",
" \n",
" \n",
" \n",
" c | \n",
" NaN | \n",
" 1.0 | \n",
" NaN | \n",
"
\n",
" \n",
" b | \n",
" z | \n",
" 2.0 | \n",
" n | \n",
"
\n",
" \n",
" a | \n",
" x | \n",
" 3.0 | \n",
" m | \n",
"
\n",
" \n",
" d | \n",
" y | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" e | \n",
" NaN | \n",
" NaN | \n",
" p | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" druga prva treca\n",
"c NaN 1.0 NaN\n",
"b z 2.0 n\n",
"a x 3.0 m\n",
"d y NaN NaN\n",
"e NaN NaN p"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "sexual-museum",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Srednje vrednosti\n",
"prva 2.0\n",
"dtype: float64\n"
]
}
],
"source": [
"#neke deskriptivne statistike\n",
"print(\"Srednje vrednosti\")\n",
"print(df2.mean())\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "preceding-friendly",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Srednje vrednosti uzimajuci u obzir NaN\n",
"prva NaN\n",
"dtype: float64\n"
]
}
],
"source": [
"print(\"Srednje vrednosti uzimajuci u obzir NaN\")\n",
"print(df2.mean(skipna=False))\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "monthly-munich",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" a b\n",
"0 1 2\n",
"1 2 8\n",
"2 3 10\n",
"Srednja vrednost za svaki red\n",
"0 1.5\n",
"1 5.0\n",
"2 6.5\n",
"dtype: float64\n"
]
}
],
"source": [
"\n",
"df5=pd.DataFrame({'a':[1,2,3], 'b':[2,8,10]})\n",
"print(df5)\n",
"print(\"Srednja vrednost za svaki red\")\n",
"print(df5.mean(axis=1))\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "fourth-corruption",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Zbir po kolonama\n",
"a 6\n",
"b 20\n",
"dtype: int64\n"
]
}
],
"source": [
"print(\"Zbir po kolonama\")\n",
"print(df5.sum())\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "vocal-activity",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Zbir po redovima\n",
"0 3\n",
"1 10\n",
"2 13\n",
"dtype: int64\n"
]
}
],
"source": [
"print(\"Zbir po redovima\")\n",
"print(df5.sum(axis=1))\n"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "minus-visibility",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Percentili\n",
"a 2.0\n",
"b 8.0\n",
"Name: 0.5, dtype: float64\n",
"a 1.5\n",
"b 5.0\n",
"Name: 0.25, dtype: float64\n",
" a b\n",
"0.25 1.5 5.0\n",
"0.50 2.0 8.0\n",
"0.75 2.5 9.0\n"
]
}
],
"source": [
"print(\"Percentili\")\n",
"print(df5.quantile())\n",
"print(df5.quantile(0.25))\n",
"print(df5.quantile([0.25, 0.5,0.75]))\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "humanitarian-router",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Najzastupljenije vrednosti\n",
" druga prva treca\n",
"0 x 1.0 m\n",
"1 y 2.0 n\n",
"2 z 3.0 p\n"
]
}
],
"source": [
"print(\"Najzastupljenije vrednosti\")\n",
"print(df2.mode())\n"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "available-agreement",
"metadata": {},
"outputs": [],
"source": [
"d2= {\n",
" 'prva': pd.Series([3,2,1], index=['a', 'b', 'c']),\n",
" 'druga': pd.Series(['x', 'x', 'y'], index=['a', 'b', 'd']),\n",
" 'treca': pd.Series(['m', 'n', 'p'], index=['a', 'b', 'e'])\n",
" \n",
"}\n",
"df2=pd.DataFrame(d2)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "measured-scotland",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Najzastupljenije vrednosti\n",
" prva druga treca\n",
"0 1.0 x m\n",
"1 2.0 NaN n\n",
"2 3.0 NaN p\n"
]
}
],
"source": [
"print(\"Najzastupljenije vrednosti\")\n",
"print(df2.mode())"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "steady-insertion",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Razlicite vrednosti\n",
"prva 3\n",
"druga 2\n",
"treca 3\n",
"dtype: int64\n"
]
}
],
"source": [
"print(\"Razlicite vrednosti\")\n",
"print(df2.nunique())\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "brief-devon",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}