{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "welsh-marking",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "surgical-recipe",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame({'a':[1, 2, 3, np.nan, np.nan], 'b':[1, 8, 5, 10, np.nan]})"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "bacterial-model",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" 10.0 | \n",
"
\n",
" \n",
" 4 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1.0 1.0\n",
"1 2.0 8.0\n",
"2 3.0 5.0\n",
"3 NaN 10.0\n",
"4 NaN NaN"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "brave-roommate",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 2\n",
"b 1\n",
"dtype: int64"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum() #broj nedostajucih po kolini"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "undefined-cedar",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 3\n",
"b 4\n",
"dtype: int64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.nunique()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "twenty-photographer",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 False False\n",
"1 False False\n",
"2 False False\n",
"3 True False\n",
"4 True True"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isnull()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "loose-sierra",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" True | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 False False\n",
"1 False False\n",
"2 False False\n",
"3 True False\n",
"4 True True"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "entire-coordinator",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isnull().any().any()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "korean-thumbnail",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1.0 1.0\n",
"1 2.0 8.0\n",
"2 3.0 5.0"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "determined-delay",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" 10.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1.0 1.0\n",
"1 2.0 8.0\n",
"2 3.0 5.0\n",
"3 NaN 10.0"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dropna(how='all')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "aerial-share",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" 10.0 | \n",
"
\n",
" \n",
" 4 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1.0 1.0\n",
"1 2.0 8.0\n",
"2 3.0 5.0\n",
"3 NaN 10.0\n",
"4 NaN NaN"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "automated-genealogy",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 4 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"1 2.0 8.0\n",
"2 3.0 5.0\n",
"4 NaN NaN"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop([0,3])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "binding-polish",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 10.0 | \n",
"
\n",
" \n",
" 4 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" b\n",
"0 1.0\n",
"1 8.0\n",
"2 5.0\n",
"3 10.0\n",
"4 NaN"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop(['a'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "unauthorized-westminster",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" 10.0 | \n",
"
\n",
" \n",
" 4 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"2 3.0 5.0\n",
"3 NaN 10.0\n",
"4 NaN NaN"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.drop(df[df['a']<3].index)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "computational-knight",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" NaN | \n",
" 10.0 | \n",
"
\n",
" \n",
" 4 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1.0 1.0\n",
"1 2.0 8.0\n",
"2 3.0 5.0\n",
"3 NaN 10.0\n",
"4 NaN NaN"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "documentary-peeing",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 25.0 | \n",
" 10.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 25.0 | \n",
" 25.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1.0 1.0\n",
"1 2.0 8.0\n",
"2 3.0 5.0\n",
"3 25.0 10.0\n",
"4 25.0 25.0"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.fillna(25)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "classical-hospital",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 3.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 25.0 | \n",
" 10.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 25.0 | \n",
" 25.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1.0 1.0\n",
"1 2.0 8.0\n",
"2 3.0 5.0\n",
"3 25.0 10.0\n",
"4 25.0 25.0"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.replace(np.nan, 25)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "spoken-cherry",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 1.0\n",
"1 2.0\n",
"2 3.0\n",
"3 2.0\n",
"4 2.0\n",
"Name: a, dtype: float64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['a'].replace(np.nan, df['a'].mean())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "french-occasions",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}