{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "modified-actor", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.metrics import classification_report\n", "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": 2, "id": "painful-emerald", "metadata": {}, "outputs": [], "source": [ "#ucitavanje skupa o perunikama\n", "df = pd.read_csv(\"C:/Users/student/Desktop/ipVezbe72022/iris.csv\")\n", "\n", "#izdvajanje skupa sa atributima za predvidjanje\n", "featurs = df.columns[:4].tolist()\n", "x=df[featurs]\n", "\n", "#izdvajanje skupa sa ciljnim atributom\n", "y=df[\"Species\"]\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "vietnamese-theta", "metadata": {}, "outputs": [], "source": [ "# podela podataka na trening i test skup\n", "#70% instanci ce biti u trening skupu, a 30% instanci u test skupu\n", "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "preliminary-commander", "metadata": {}, "outputs": [], "source": [ "# vrednosti parametara klase DecisionTreeClassifier za unakrsnu validacuju\n", "parameters = [{'criterion': ['gini', 'entropy'],\n", " 'max_depth': [2, 3, 4, 5],\n", " 'min_samples_split':[15, 10, 15],\n", " 'min_samples_leaf': [2, 4, 6]\n", " }]\n", "\n", "#za svaki parametar se zadaje lista vrednosti (napomena: ako se zadaje samo\n", "# jedna vrednost za neki parametar, mora se zadati kao element liste, npr.\n", "# 'criterion': ['gini'])\n", "\n", "# moguce je praviti i vise recnika u okviru liste, npr.\n", "# [{'criterion': ['gini', 'entropy'],\n", "# 'min_samples_split':[15, 10, 15]} ,\n", "# { 'min_samples_leaf': [2, 4, 6],\n", "# 'max_depth': [2, 3, 4, 5],\n", "# }]\n", "# tada ce se praviti modeli na osnovu kombinacija za parametre zadate\n", "# u prvom recnika, a zatim modeli na osnovu kombinacija za parametre\n", "# zadate u drugom recniku\n" ] }, { "cell_type": "code", "execution_count": 5, "id": "precise-compact", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),\n", " param_grid=[{'criterion': ['gini', 'entropy'],\n", " 'max_depth': [2, 3, 4, 5],\n", " 'min_samples_leaf': [2, 4, 6],\n", " 'min_samples_split': [15, 10, 15]}])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = GridSearchCV(DecisionTreeClassifier(), parameters, cv=5)\n", "\n", "#pravi se model sa optimalnim vrednostima za parametre\n", "clf.fit(x_train, y_train)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "south-hearts", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'criterion': 'gini',\n", " 'max_depth': 2,\n", " 'min_samples_leaf': 2,\n", " 'min_samples_split': 15}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf.best_params_" ] }, { "cell_type": "code", "execution_count": 7, "id": "suspected-chase", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Ocena uspeha po klasifikatorima:\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 2, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 2, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 10}\n", "0.933 (+/-0.076) za {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 15}\n", "\n" ] } ], "source": [ "print(\"Ocena uspeha po klasifikatorima:\")\n", " \n", "means = clf.cv_results_['mean_test_score'] # za svaku koriscenu kombinaciju\n", " # vrednosti za parametre dobija se\n", " # srednja vrednost mere za ocenu modela dobijena\n", " # na osnovu k test delova\n", "\n", "stds = clf.cv_results_['std_test_score'] # za svaku koriscenu kombinaciju\n", " # vrednosti za parametre dobija se\n", " # standardna devijacija vrednosti mere\n", " # za ocenu modela dobijena\n", " # na osnovu k test delova\n", "\n", "#za svaku koriscenu kombinaciju vrednosti za parametre se izdvaja izvestaj\n", "# (srednja vrednost mere za ocenu modela +/- 2 standardne devijacije)\n", "for mean, std, params in zip(means, stds, clf.cv_results_['params']):\n", " print(\"%0.3f (+/-%0.03f) za %s\" % (mean, std * 2, params))\n", "print()\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "specific-passport", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Izvestaj za test skup:\n", " precision recall f1-score support\n", "\n", " setosa 1.00 1.00 1.00 13\n", " versicolor 0.88 1.00 0.94 15\n", " virginica 1.00 0.88 0.94 17\n", "\n", " accuracy 0.96 45\n", " macro avg 0.96 0.96 0.96 45\n", "weighted avg 0.96 0.96 0.96 45\n", "\n", "\n" ] } ], "source": [ "# model napravljen koriscenjem najboljih parametara se\n", "# primenjuje na test skup i izdvaja izvestaj o klasifikaciji\n", "# (krajnja ocena modela)\n", "print(\"Izvestaj za test skup:\")\n", "y_true, y_pred = y_test, clf.predict(x_test)\n", "print(classification_report(y_true, y_pred))\n", "print()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "together-presentation", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 5 }