{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "unique-intellectual", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "shared-fitness", "metadata": {}, "outputs": [], "source": [ "\"\"\"1D struktura podataka\"\"\"\n", "\n", "s1 = pd.Series([1,2,3,5]) #pravljenje serije pomocu liste i automatsko dodeljivanje oznaka redovima" ] }, { "cell_type": "code", "execution_count": 3, "id": "mighty-fifteen", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 1\n", "1 2\n", "2 3\n", "3 5\n", "dtype: int64\n" ] } ], "source": [ "print(s1)" ] }, { "cell_type": "code", "execution_count": 4, "id": "signed-pencil", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s1[2] 3\n", "s1[1] 2\n" ] } ], "source": [ "#pristup elementima serija[oznaka]\n", "print('s1[2]', s1[2])\n", "print('s1[1]', s1.get(1))\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "genetic-hurricane", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s1[8] nan\n" ] } ], "source": [ "import numpy as np # modul zbog nan vrednosti\n", "print('s1[8]', s1.get(8, np.nan))\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "handmade-montgomery", "metadata": {}, "outputs": [], "source": [ "# pravljenje serije pomocu liste sa zadatim oznakama za redove\n", "s2 = pd.Series([1,2,3,5], index=['a', 'b', 'c', 'd'])\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "exterior-plain", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s2['d'] 5\n" ] } ], "source": [ "print(\"s2['d']\" ,s2['d'])\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "ahead-application", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "e 2\n", "a 1\n", "b 2\n", "c 3\n", "d 5\n", "dtype: int64\n", "s3['a'] 1\n" ] } ], "source": [ "s3 = pd.Series({'e':2, 'a':1,'b':2,'c':3,'d':5, }) # pravljenje serije pomocu recnika\n", "print(s3)\n", "print(\"s3['a']\", s3['a'])\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "unlikely-blond", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['e', 'a', 'b', 'c', 'd']\n", "Index(['e', 'a', 'b', 'c', 'd'], dtype='object')\n" ] } ], "source": [ "#izdvajanje oznaka\n", "print(s3.index.tolist())\n", "#ili\n", "print(s3.keys())\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "structured-multimedia", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2 1 2 3 5]\n" ] } ], "source": [ "#izdvajanje vrednosti\n", "print(s3.values)\n", "\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "advisory-latitude", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "int64\n" ] } ], "source": [ "#tip elemenata\n", "print(s3.dtype)\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "excellent-logic", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2 2\n", "1 1\n", "3 1\n", "5 1\n", "dtype: int64\n" ] } ], "source": [ "#broj pojavljivanja svake vrednosti\n", "print(s3.value_counts())\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "average-thailand", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 1\n", "b 2\n", "c 3\n", "d 5\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2" ] }, { "cell_type": "code", "execution_count": 14, "id": "younger-algeria", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "e 2\n", "a 1\n", "b 2\n", "c 3\n", "d 5\n", "dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s3" ] }, { "cell_type": "code", "execution_count": 15, "id": "fallen-painting", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2.0\n", "b 4.0\n", "c 6.0\n", "d 10.0\n", "e NaN\n", "dtype: float64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2+s3" ] }, { "cell_type": "code", "execution_count": 16, "id": "contained-regular", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "a 2\n", "b 4\n", "c 6\n", "d 10\n", "dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2+s2" ] }, { "cell_type": "code", "execution_count": 17, "id": "framed-assistant", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df1\n", " prva druga\n", "a 1 x\n", "b 2 y\n", "c 3 z\n" ] } ], "source": [ "\"\"\"2D struktura podataka DataFrame\"\"\"\n", "\n", "d1= {\n", " 'prva': pd.Series([1,2,3], index=['a', 'b', 'c']),\n", " 'druga': pd.Series(['x', 'y', 'z'], index=['a', 'b', 'c'])\n", "}\n", "df1 = pd.DataFrame(d1)\n", "\n", "print(\"df1\")\n", "print(df1)\n" ] }, { "cell_type": "code", "execution_count": 18, "id": "solid-guinea", "metadata": {}, "outputs": [], "source": [ "d2= {\n", " 'prva': pd.Series([3,2,1], index=['a', 'b', 'c']),\n", " 'druga': pd.Series(['x', 'z', 'y'], index=['a', 'b', 'd']),\n", " 'treca': pd.Series(['m', 'n', 'p'], index=['a', 'b', 'e'])\n", " \n", "}\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "radio-chocolate", "metadata": {}, "outputs": [], "source": [ "df2 = pd.DataFrame(d2)\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "objective-rating", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
prvadrugatreca
a3.0xm
b2.0zn
c1.0NaNNaN
dNaNyNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " prva druga treca\n", "a 3.0 x m\n", "b 2.0 z n\n", "c 1.0 NaN NaN\n", "d NaN y NaN\n", "e NaN NaN p" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 21, "id": "printable-breast", "metadata": {}, "outputs": [], "source": [ "#df3 = pd.DataFrame( [(1,2,'X'), (2,3,'C')], columns=['A', 'B','C'])\n", "df3 = pd.DataFrame( [(1,2,'X'), (2,3,'C')]) \n" ] }, { "cell_type": "code", "execution_count": 22, "id": "robust-politics", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
012X
123C
\n", "
" ], "text/plain": [ " 0 1 2\n", "0 1 2 X\n", "1 2 3 C" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3" ] }, { "cell_type": "code", "execution_count": 23, "id": "careful-resort", "metadata": {}, "outputs": [], "source": [ "#sortiranje po indeksima (oznakama)\n", "#df2.sort_index(ascending=False, inplace=True) # sortiranje redova\n", "df2.sort_index(ascending=True, inplace=True) # sortiranje redova\n" ] }, { "cell_type": "code", "execution_count": 24, "id": "enormous-protest", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
prvadrugatreca
a3.0xm
b2.0zn
c1.0NaNNaN
dNaNyNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " prva druga treca\n", "a 3.0 x m\n", "b 2.0 z n\n", "c 1.0 NaN NaN\n", "d NaN y NaN\n", "e NaN NaN p" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 25, "id": "artificial-roots", "metadata": {}, "outputs": [], "source": [ "df2.sort_index(ascending=True, axis=1, inplace=True) #sortiranje kolona\n" ] }, { "cell_type": "code", "execution_count": 26, "id": "patient-wayne", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drugaprvatreca
ax3.0m
bz2.0n
cNaN1.0NaN
dyNaNNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " druga prva treca\n", "a x 3.0 m\n", "b z 2.0 n\n", "c NaN 1.0 NaN\n", "d y NaN NaN\n", "e NaN NaN p" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 27, "id": "expressed-array", "metadata": {}, "outputs": [], "source": [ "#sortiranje po vrednosti kolone\n", "df2.sort_values(by='prva', inplace=True) #by=[lista kolona]\n" ] }, { "cell_type": "code", "execution_count": 28, "id": "appointed-summer", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drugaprvatreca
cNaN1.0NaN
bz2.0n
ax3.0m
dyNaNNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " druga prva treca\n", "c NaN 1.0 NaN\n", "b z 2.0 n\n", "a x 3.0 m\n", "d y NaN NaN\n", "e NaN NaN p" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 29, "id": "narrow-barbados", "metadata": {}, "outputs": [], "source": [ "df4 = pd.DataFrame({\n", " 'number' : [1,2,2,3],\n", " 'object' : ['c', 'd', 'e', 'c']\n", " })\n" ] }, { "cell_type": "code", "execution_count": 30, "id": "solved-dover", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "number int64\n", "object object\n", "dtype: object\n" ] } ], "source": [ "print(df4.dtypes)" ] }, { "cell_type": "code", "execution_count": 31, "id": "specialized-reviewer", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Opis svih kolona\n", " number\n", "count 4.000000\n", "mean 2.000000\n", "std 0.816497\n", "min 1.000000\n", "25% 1.750000\n", "50% 2.000000\n", "75% 2.250000\n", "max 3.000000\n" ] } ], "source": [ "print(\"Opis svih kolona\")\n", "#print(df4.describe(include='all'))\n", "print(df4.describe())\n" ] }, { "cell_type": "code", "execution_count": 32, "id": "legendary-throw", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Opis numerickih kolona\n", " number\n", "count 4.000000\n", "mean 2.000000\n", "std 0.816497\n", "min 1.000000\n", "25% 1.750000\n", "50% 2.000000\n", "75% 2.250000\n", "max 3.000000\n" ] } ], "source": [ "print(\"Opis numerickih kolona\")\n", "print(df4.describe(include=['number']))\n" ] }, { "cell_type": "code", "execution_count": 33, "id": "fabulous-label", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Opis kolona object\n", " object\n", "count 4\n", "unique 3\n", "top c\n", "freq 2\n" ] } ], "source": [ "print(\"Opis kolona object\")\n", "print(df4.describe(include=['object']))\n" ] }, { "cell_type": "code", "execution_count": 34, "id": "nonprofit-fabric", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
drugaprvatreca
cNaN1.0NaN
bz2.0n
ax3.0m
dyNaNNaN
eNaNNaNp
\n", "
" ], "text/plain": [ " druga prva treca\n", "c NaN 1.0 NaN\n", "b z 2.0 n\n", "a x 3.0 m\n", "d y NaN NaN\n", "e NaN NaN p" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 35, "id": "sexual-museum", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Srednje vrednosti\n", "prva 2.0\n", "dtype: float64\n" ] } ], "source": [ "#neke deskriptivne statistike\n", "print(\"Srednje vrednosti\")\n", "print(df2.mean())\n" ] }, { "cell_type": "code", "execution_count": 36, "id": "preceding-friendly", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Srednje vrednosti uzimajuci u obzir NaN\n", "prva NaN\n", "dtype: float64\n" ] } ], "source": [ "print(\"Srednje vrednosti uzimajuci u obzir NaN\")\n", "print(df2.mean(skipna=False))\n" ] }, { "cell_type": "code", "execution_count": 37, "id": "monthly-munich", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " a b\n", "0 1 2\n", "1 2 8\n", "2 3 10\n", "Srednja vrednost za svaki red\n", "0 1.5\n", "1 5.0\n", "2 6.5\n", "dtype: float64\n" ] } ], "source": [ "\n", "df5=pd.DataFrame({'a':[1,2,3], 'b':[2,8,10]})\n", "print(df5)\n", "print(\"Srednja vrednost za svaki red\")\n", "print(df5.mean(axis=1))\n" ] }, { "cell_type": "code", "execution_count": 38, "id": "fourth-corruption", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Zbir po kolonama\n", "a 6\n", "b 20\n", "dtype: int64\n" ] } ], "source": [ "print(\"Zbir po kolonama\")\n", "print(df5.sum())\n" ] }, { "cell_type": "code", "execution_count": 39, "id": "vocal-activity", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Zbir po redovima\n", "0 3\n", "1 10\n", "2 13\n", "dtype: int64\n" ] } ], "source": [ "print(\"Zbir po redovima\")\n", "print(df5.sum(axis=1))\n" ] }, { "cell_type": "code", "execution_count": 40, "id": "minus-visibility", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Percentili\n", "a 2.0\n", "b 8.0\n", "Name: 0.5, dtype: float64\n", "a 1.5\n", "b 5.0\n", "Name: 0.25, dtype: float64\n", " a b\n", "0.25 1.5 5.0\n", "0.50 2.0 8.0\n", "0.75 2.5 9.0\n" ] } ], "source": [ "print(\"Percentili\")\n", "print(df5.quantile())\n", "print(df5.quantile(0.25))\n", "print(df5.quantile([0.25, 0.5,0.75]))\n" ] }, { "cell_type": "code", "execution_count": 41, "id": "humanitarian-router", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Najzastupljenije vrednosti\n", " druga prva treca\n", "0 x 1.0 m\n", "1 y 2.0 n\n", "2 z 3.0 p\n" ] } ], "source": [ "print(\"Najzastupljenije vrednosti\")\n", "print(df2.mode())\n" ] }, { "cell_type": "code", "execution_count": 42, "id": "available-agreement", "metadata": {}, "outputs": [], "source": [ "d2= {\n", " 'prva': pd.Series([3,2,1], index=['a', 'b', 'c']),\n", " 'druga': pd.Series(['x', 'x', 'y'], index=['a', 'b', 'd']),\n", " 'treca': pd.Series(['m', 'n', 'p'], index=['a', 'b', 'e'])\n", " \n", "}\n", "df2=pd.DataFrame(d2)" ] }, { "cell_type": "code", "execution_count": 43, "id": "measured-scotland", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Najzastupljenije vrednosti\n", " prva druga treca\n", "0 1.0 x m\n", "1 2.0 NaN n\n", "2 3.0 NaN p\n" ] } ], "source": [ "print(\"Najzastupljenije vrednosti\")\n", "print(df2.mode())" ] }, { "cell_type": "code", "execution_count": 44, "id": "steady-insertion", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Razlicite vrednosti\n", "prva 3\n", "druga 2\n", "treca 3\n", "dtype: int64\n" ] } ], "source": [ "print(\"Razlicite vrednosti\")\n", "print(df2.nunique())\n" ] }, { "cell_type": "code", "execution_count": null, "id": "brief-devon", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 5 }