{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "beneficial-treat",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.naive_bayes import CategoricalNB\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.preprocessing import OrdinalEncoder"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "unexpected-justice",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('C:/Users/student/Desktop/ipIndustija4/ballons.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "outside-cornell",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" color | \n",
" size | \n",
" act | \n",
" age | \n",
" inflated | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
"
\n",
" \n",
" unique | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" top | \n",
" YELLOW | \n",
" SMALL | \n",
" DIP | \n",
" ADULT | \n",
" F | \n",
"
\n",
" \n",
" freq | \n",
" 40 | \n",
" 40 | \n",
" 38 | \n",
" 38 | \n",
" 41 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" color size act age inflated\n",
"count 76 76 76 76 76\n",
"unique 2 2 2 2 2\n",
"top YELLOW SMALL DIP ADULT F\n",
"freq 40 40 38 38 41"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" color | \n",
" size | \n",
" act | \n",
" age | \n",
" inflated | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
"
\n",
" \n",
" unique | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" top | \n",
" YELLOW | \n",
" SMALL | \n",
" DIP | \n",
" ADULT | \n",
" F | \n",
"
\n",
" \n",
" freq | \n",
" 40 | \n",
" 40 | \n",
" 38 | \n",
" 38 | \n",
" 41 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" color size act age inflated\n",
"count 76 76 76 76 76\n",
"unique 2 2 2 2 2\n",
"top YELLOW SMALL DIP ADULT F\n",
"freq 40 40 38 38 41"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" color | \n",
" size | \n",
" act | \n",
" age | \n",
" inflated | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
" 76 | \n",
"
\n",
" \n",
" unique | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" top | \n",
" YELLOW | \n",
" SMALL | \n",
" DIP | \n",
" ADULT | \n",
" F | \n",
"
\n",
" \n",
" freq | \n",
" 40 | \n",
" 40 | \n",
" 38 | \n",
" 38 | \n",
" 41 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" color size act age inflated\n",
"count 76 76 76 76 76\n",
"unique 2 2 2 2 2\n",
"top YELLOW SMALL DIP ADULT F\n",
"freq 40 40 38 38 41"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "prescription-german",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().any().any()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "thorough-royal",
"metadata": {},
"outputs": [],
"source": [
"features=df.columns[:-1].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "constant-there",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['color', 'size', 'act', 'age']"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"['color', 'size', 'act', 'age']"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"['color', 'size', 'act', 'age']"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"features"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "impossible-dublin",
"metadata": {},
"outputs": [],
"source": [
"x=df[features]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "returning-makeup",
"metadata": {},
"outputs": [],
"source": [
"y=df.iloc[:,-1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "quick-flash",
"metadata": {},
"outputs": [],
"source": [
"x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "incredible-collectible",
"metadata": {},
"outputs": [],
"source": [
"oe = OrdinalEncoder()\n",
"#moze i\n",
"#oe = OrdinalEncoder(categories=[['YELLOW', 'PURPLE'], ['LARGE', 'SMALL'],['DIP', 'STRETCH'], ['ADULT', 'CHILD']])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "color-carbon",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OrdinalEncoder()"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"OrdinalEncoder()"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"OrdinalEncoder()"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"oe.fit(x_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "allied-commissioner",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[array(['PURPLE', 'YELLOW'], dtype=object),\n",
" array(['LARGE', 'SMALL'], dtype=object),\n",
" array(['DIP', 'STRETCH'], dtype=object),\n",
" array(['ADULT', 'CHILD'], dtype=object)]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"[array(['PURPLE', 'YELLOW'], dtype=object),\n",
" array(['LARGE', 'SMALL'], dtype=object),\n",
" array(['DIP', 'STRETCH'], dtype=object),\n",
" array(['ADULT', 'CHILD'], dtype=object)]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"[array(['PURPLE', 'YELLOW'], dtype=object),\n",
" array(['LARGE', 'SMALL'], dtype=object),\n",
" array(['DIP', 'STRETCH'], dtype=object),\n",
" array(['ADULT', 'CHILD'], dtype=object)]"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"oe.categories_"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "instant-declaration",
"metadata": {},
"outputs": [],
"source": [
"x_train_transform = pd.DataFrame(oe.fit_transform(x_train), columns=features)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "removable-journalism",
"metadata": {},
"outputs": [],
"source": [
"x_test_transform = pd.DataFrame(oe.transform(x_test), columns=features)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "present-tissue",
"metadata": {},
"outputs": [],
"source": [
"clf=CategoricalNB()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "forbidden-desktop",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CategoricalNB()"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"CategoricalNB()"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"CategoricalNB()"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clf.fit(x_train_transform, y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "standing-campaign",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"color\n",
" PURPLE YELLOW\n",
"F 16.0 12.0\n",
"T 9.0 16.0\n",
"\n",
"size\n",
" LARGE SMALL\n",
"F 13.0 15.0\n",
"T 9.0 16.0\n",
"\n",
"act\n",
" DIP STRETCH\n",
"F 20.0 8.0\n",
"T 8.0 17.0\n",
"\n",
"age\n",
" ADULT CHILD\n",
"F 8.0 20.0\n",
"T 18.0 7.0\n",
"\n",
"color\n",
" PURPLE YELLOW\n",
"F 16.0 12.0\n",
"T 9.0 16.0\n",
"\n",
"size\n",
" LARGE SMALL\n",
"F 13.0 15.0\n",
"T 9.0 16.0\n",
"\n",
"act\n",
" DIP STRETCH\n",
"F 20.0 8.0\n",
"T 8.0 17.0\n",
"\n",
"age\n",
" ADULT CHILD\n",
"F 8.0 20.0\n",
"T 18.0 7.0\n",
"\n",
"color\n",
" PURPLE YELLOW\n",
"F 16.0 12.0\n",
"T 9.0 16.0\n",
"\n",
"size\n",
" LARGE SMALL\n",
"F 13.0 15.0\n",
"T 9.0 16.0\n",
"\n",
"act\n",
" DIP STRETCH\n",
"F 20.0 8.0\n",
"T 8.0 17.0\n",
"\n",
"age\n",
" ADULT CHILD\n",
"F 8.0 20.0\n",
"T 18.0 7.0\n",
"\n"
]
}
],
"source": [
"#izvestaj o zastupljenosti \n",
"for i in range(len(features)):\n",
" print(features[i])\n",
" print(pd.DataFrame(clf.category_count_[i], index=clf.classes_, columns=oe.categories_[i]))\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "greatest-leave",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"F 28.0\n",
"T 25.0\n",
"dtype: float64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"F 28.0\n",
"T 25.0\n",
"dtype: float64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"F 28.0\n",
"T 25.0\n",
"dtype: float64"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.Series(clf.class_count_, index=clf.classes_)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "immune-estonia",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Trening skup\n",
" precision recall f1-score support\n",
"\n",
" F 0.79 0.79 0.79 28\n",
" T 0.76 0.76 0.76 25\n",
"\n",
" accuracy 0.77 53\n",
" macro avg 0.77 0.77 0.77 53\n",
"weighted avg 0.77 0.77 0.77 53\n",
"\n",
"Trening skup\n",
" precision recall f1-score support\n",
"\n",
" F 0.79 0.79 0.79 28\n",
" T 0.76 0.76 0.76 25\n",
"\n",
" accuracy 0.77 53\n",
" macro avg 0.77 0.77 0.77 53\n",
"weighted avg 0.77 0.77 0.77 53\n",
"\n",
"Trening skup\n",
" precision recall f1-score support\n",
"\n",
" F 0.79 0.79 0.79 28\n",
" T 0.76 0.76 0.76 25\n",
"\n",
" accuracy 0.77 53\n",
" macro avg 0.77 0.77 0.77 53\n",
"weighted avg 0.77 0.77 0.77 53\n",
"\n"
]
}
],
"source": [
"print('Trening skup')\n",
"print(classification_report(y_train, clf.predict(x_train_transform)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "sublime-image",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test skupTest skupTest skup\n",
" precision recall f1-score support\n",
"\n",
" F 0.85 0.85 0.85 13\n",
" T 0.80 0.80 0.80 10\n",
"\n",
" accuracy 0.83 23\n",
" macro avg 0.82 0.82 0.82 23\n",
"weighted avg 0.83 0.83 0.83 23\n",
"\n",
"\n",
" precision recall f1-score support\n",
"\n",
" F 0.85 0.85 0.85 13\n",
" T 0.80 0.80 0.80 10\n",
"\n",
" accuracy 0.83 23\n",
" macro avg 0.82 0.82 0.82 23\n",
"weighted avg 0.83 0.83 0.83 23\n",
"\n",
"\n",
" precision recall f1-score support\n",
"\n",
" F 0.85 0.85 0.85 13\n",
" T 0.80 0.80 0.80 10\n",
"\n",
" accuracy 0.83 23\n",
" macro avg 0.82 0.82 0.82 23\n",
"weighted avg 0.83 0.83 0.83 23\n",
"\n"
]
}
],
"source": [
"print('Test skup')\n",
"print(classification_report(y_test, clf.predict(x_test_transform)))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}