{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "external-draft", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.metrics import classification_report\n", "from sklearn.svm import SVC\n", "import numpy as np\n", "from sklearn.preprocessing import MinMaxScaler\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "respiratory-detail", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"C:/Users/student/Desktop/ipIndustija4/ipVezbe92021/vezbe9/iris.csv\")\n", "\n", "features = df.columns[:4].tolist()\n", "x=df[features]\n", "y=df[\"Species\"]\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "serious-convenience", "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "several-provider", "metadata": {}, "outputs": [], "source": [ "# Parametri za unakrsnu validacuju\n", "\"\"\"\n", "\n", "SVM\n", "C : default=1.0\n", "parametar za regularizaciju\n", "\n", "kernel : default=’rbf’\n", " ‘linear’ ( ),\n", "\n", " ‘poly’ : ( gamma* + coef0)^degree\n", " vezani parametri:\n", " degree (stepen): default=3,\n", " gamma (koeficijent) : default= 1/n_features\n", " coef0 (nezavisni term) default=0.0\n", "\n", " ‘rbf’, exp(-gamma*|x-x'|^2)\n", " vezani parametri:\n", " gamma (koeficijent) : default= 1/n_features\n", " gamma>0\n", "\n", " ‘sigmoid’, (tanh(gamma* + coef0)\n", " vezani parametri:\n", " gamma (koeficijent) : default= 1/n_features\n", " coef0 (nezavisni term) default=0.0\n", "\n", "\n", "atributi:\n", "support_ -indeksi podrzavajucih vektora\n", "support_vectors_ : podrzavajuci vektori\n", "n_support_ : broj podrzavajucih vektora za svaku klasu\n", "dual_coef_ : niz oblika [n_class-1, n_SV]\n", "koeficijenti podrzavajucih vektora.\n", "Ukoliko postoji vise klasa, postoje koeficijenti za sve 1-vs-1 klasifikatore.\n", "intercept_ : konstane u funckiji odlucivanja\n", " \"\"\"\n", "\n", "parameters = [{'C': [pow(2,x) for x in range(-6,10,2)],\n", " 'kernel' : ['linear']\n", " },\n", "\n", " {'C': [pow(2,x) for x in range(-6,10,2)],\n", " 'kernel': ['poly'],\n", " 'degree': [2, 3, 4, 5],\n", " 'gamma': np.arange(0.1, 1.1, 0.1),\n", " 'coef0': np.arange(0, 2, 0.5)\n", " },\n", "\n", " {'C': [pow(2,x) for x in range(-6,10,2)],\n", " 'kernel' : ['rbf'],\n", " 'gamma': np.arange(0.1, 1.1, 0.1),\n", " },\n", "\n", " {'C': [pow(2,x) for x in range(-6,10,2)],\n", " 'kernel' : ['sigmoid'],\n", " 'gamma': np.arange(0.1, 1.1, 0.1),\n", " 'coef0': np.arange(0, 2, 0.5)\n", " }]\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "attached-control", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GridSearchCV(cv=5, estimator=SVC(),\n", " param_grid=[{'C': [0.015625, 0.0625, 0.25, 1, 4, 16, 64, 256],\n", " 'kernel': ['linear']},\n", " {'C': [0.015625, 0.0625, 0.25, 1, 4, 16, 64, 256],\n", " 'coef0': array([0. , 0.5, 1. , 1.5]),\n", " 'degree': [2, 3, 4, 5],\n", " 'gamma': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),\n", " 'kernel': ['poly']},\n", " {'C': [0.015625, 0.0625, 0.25, 1, 4, 16, 64, 256],\n", " 'gamma': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),\n", " 'kernel': ['rbf']},\n", " {'C': [0.015625, 0.0625, 0.25, 1, 4, 16, 64, 256],\n", " 'coef0': array([0. , 0.5, 1. , 1.5]),\n", " 'gamma': array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]),\n", " 'kernel': ['sigmoid']}],\n", " scoring='f1_macro')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = GridSearchCV(SVC(), parameters, cv=5, scoring='f1_macro')\n", "clf.fit(x_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "forty-concentrate", "metadata": {}, "outputs": [], "source": [ "print(\"Ocena uspeha po klasifikatorima:\")\n", "means = clf.cv_results_['mean_test_score']\n", "stds = clf.cv_results_['std_test_score']\n", "for mean, std, params in zip(means, stds, clf.cv_results_['params']):\n", " print(\"%0.3f (+/-%0.03f) za %s\" % (mean, std * 2, params))\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "royal-prompt", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Najbolji parametri:\n", "{'C': 0.015625, 'coef0': 0.0, 'degree': 2, 'gamma': 0.6, 'kernel': 'poly'}\n", "['setosa' 'versicolor' 'virginica']\n", "Broj podrzavajucih vektora [ 2 11 9]\n" ] } ], "source": [ "print(\"Najbolji parametri:\")\n", "print(clf.best_params_)\n", "\n", "print(clf.best_estimator_.classes_)\n", "print('Broj podrzavajucih vektora', clf.best_estimator_.n_support_)\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "interim-kidney", "metadata": {}, "outputs": [], "source": [ "#clf.best_estimator_.dual_coef_" ] }, { "cell_type": "code", "execution_count": 12, "id": "swedish-nepal", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Izvestaj za trening skup:\n", " precision recall f1-score support\n", "\n", " setosa 1.00 1.00 1.00 32\n", " versicolor 1.00 0.97 0.99 38\n", " virginica 0.97 1.00 0.99 35\n", "\n", " accuracy 0.99 105\n", " macro avg 0.99 0.99 0.99 105\n", "weighted avg 0.99 0.99 0.99 105\n", "\n" ] } ], "source": [ "print(\"Izvestaj za trening skup:\")\n", "y_true, y_pred = y_train, clf.predict(x_train)\n", "print(classification_report(y_true, y_pred))\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "dominant-buffer", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Izvestaj za test skup:\n", " precision recall f1-score support\n", "\n", " setosa 1.00 1.00 1.00 18\n", " versicolor 1.00 0.92 0.96 12\n", " virginica 0.94 1.00 0.97 15\n", "\n", " accuracy 0.98 45\n", " macro avg 0.98 0.97 0.97 45\n", "weighted avg 0.98 0.98 0.98 45\n", "\n" ] } ], "source": [ "print(\"Izvestaj za test skup:\")\n", "y_true, y_pred = y_test, clf.predict(x_test)\n", "print(classification_report(y_true, y_pred))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "departmental-signal", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 5 }