{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "classified-delicious", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn import preprocessing\n", "import sklearn.metrics as met\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "fluid-lawrence", "metadata": {}, "outputs": [], "source": [ "#ucitavanje i podela na opis instanci i klase\n", "df = pd.read_csv(\"C:/Users/student/Desktop/ipIndustija4/ipVezbe92021/vezbe9/iris.csv\")\n", "\n", "features = df.columns[:4].tolist()\n", "\n", "x=df[features]\n", "x.columns = features\n", "y=df[\"Species\"]\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "still-drink", "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, stratify=y)\n" ] }, { "cell_type": "code", "execution_count": 20, "id": "sixth-allergy", "metadata": {}, "outputs": [], "source": [ "scaler = preprocessing.MinMaxScaler().fit(x_train)\n", "x_train =pd.DataFrame(scaler.transform(x_train))\n", "x_train.columns = features\n", "\n", "x_test =pd.DataFrame(scaler.transform(x_test))\n", "x_test.columns = features" ] }, { "cell_type": "code", "execution_count": 21, "id": "subject-thunder", "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "hidden_layer_sizes - brojevi neurona u skrivenim slojevima\n", " default=100\n", "\n", "activation - aktivaciona fja\n", " identity f(x) = x\n", " logistic sigmoidna fja f(x) = 1 / (1 + exp(-x))\n", " tanh tangens hiperbolicki f(x) = tanh(x)\n", " relu f(x) = max(0, x)\n", "\n", "solver - resavac za optimizaciju tezina\n", " sgd stohastickog opadajuceg gradijenta\n", "\n", "batch_size - velicina serija: broj instanci u jednom koraku za racunanje gradijenta\n", " default = 200\n", "\n", "learning_rate - stopa ucenja pri azuriranju tezina\n", " constant konstantna, zadata sa learning_rate_init\n", " invscaling postepeno smanjenje stope ucenja u koraku t, effective_learning_rate = learning_rate_init / pow(t, power_t)\n", " adaptive stopa ucenja se ne menja dok se vrednost fje gubitka smanjuje.\n", " Kad se u dva uzastopna koraka gubitak ne smanji za bar vrednost tol,\n", " ili se precisnost nad skupom za validaciju (ako je zadato da postoji takav skup)\n", " za bar vrednost tol, stopa ucenja se seli sa\n", "\n", "learning_rate_init - inicijalna stopa ucenja\n", " default=0.001\n", "power_t\n", " default=0.5\n", "\n", "max_iter - maksimalan broj iteracija\n", " default=200\n", "\n", "tol - tolerancija optimizacije za gubitak ili preciznost\n", " default = 1e-4\n", "\n", "shuffle - da li izvrsiti mesanje instanci za svaku iteraciju\n", " default=False\n", "\n", "verbose - da li ispisati poruke o progresu na standardni izlaz\n", "\n", "early_stopping - da li izvrsiti rano zaustavljanje kada se preciznost nad skupom za validaciju\n", " ne povecava, za validaciju se onda uzima 10% trening skupa\n", " default=False\n", "\n", "validation_fraction - koji deo skupa za treniranje se koristi za validaciju\n", " Primenjivo ako je early_stopping=True\n", " default=0.1\n", "\"\"\"\n", "\n", "params = [{'solver':['sgd'],\n", " 'learning_rate':['constant', 'invscaling', 'adaptive'],\n", " 'learning_rate_init':[0.01, 0.005, 0.002],\n", " 'activation' : ['identity', 'logistic', 'tanh', 'relu' ],\n", " 'hidden_layer_sizes' : [(10,3), (10,10), (50,50)],\n", " 'max_iter': [500]\n", "\n", " }]\n" ] }, { "cell_type": "code", "execution_count": 28, "id": "outer-kitchen", "metadata": {}, "outputs": [], "source": [ "clf = GridSearchCV(MLPClassifier(), params, cv=5)\n", "clf.fit(x_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "pleasant-layout", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Najbolji parametri:\n", "{'activation': 'tanh', 'hidden_layer_sizes': (10, 3), 'learning_rate': 'adaptive', 'learning_rate_init': 0.005, 'max_iter': 500, 'solver': 'sgd'}\n" ] } ], "source": [ "print(\"Najbolji parametri:\")\n", "print(clf.best_params_)\n" ] }, { "cell_type": "code", "execution_count": 29, "id": "chinese-trailer", "metadata": {}, "outputs": [], "source": [ "print(\"Ocena uspeha po klasifikatorima:\")\n", "means = clf.cv_results_['mean_test_score']\n", "stds = clf.cv_results_['std_test_score']\n", "for mean, std, params in zip(means, stds, clf.cv_results_['params']):\n", " print(\"%0.3f (+/-%0.03f) za %s\" % (mean, std * 2, params))\n" ] }, { "cell_type": "code", "execution_count": 25, "id": "level-legislation", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Izvestaj klasifikacije\n", " precision recall f1-score support\n", "\n", " setosa 1.00 1.00 1.00 35\n", " versicolor 1.00 0.91 0.96 35\n", " virginica 0.92 1.00 0.96 35\n", "\n", " accuracy 0.97 105\n", " macro avg 0.97 0.97 0.97 105\n", "weighted avg 0.97 0.97 0.97 105\n", "\n" ] } ], "source": [ "class_report = met.classification_report(y_train,clf.predict(x_train), target_names=clf.classes_)\n", "print(\"Izvestaj klasifikacije\", class_report, sep=\"\\n\")\n" ] }, { "cell_type": "code", "execution_count": 26, "id": "cooperative-individual", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Izvestaj za test skup:\n", "Matrica konfuzije\n", " setosa versicolor virginica\n", "setosa 15 0 0\n", "versicolor 0 14 1\n", "virginica 0 1 14\n", "Preciznost 0.9555555555555556\n", "Izvestaj klasifikacije\n", " precision recall f1-score support\n", "\n", " setosa 1.00 1.00 1.00 15\n", " versicolor 0.93 0.93 0.93 15\n", " virginica 0.93 0.93 0.93 15\n", "\n", " accuracy 0.96 45\n", " macro avg 0.96 0.96 0.96 45\n", "weighted avg 0.96 0.96 0.96 45\n", "\n" ] } ], "source": [ "print(\"Izvestaj za test skup:\")\n", "y_true, y_pred = y_test, clf.predict(x_test)\n", "cnf_matrix = pd.DataFrame(met.confusion_matrix(y_test, y_pred), \n", " index=clf.classes_, columns=clf.classes_)\n", "print(\"Matrica konfuzije\", cnf_matrix, sep=\"\\n\")\n", "\n", "accuracy = met.accuracy_score(y_test, y_pred)\n", "print(\"Preciznost\", accuracy)\n", "\n", "class_report = met.classification_report(y_test, y_pred, target_names=clf.classes_)\n", "print(\"Izvestaj klasifikacije\", class_report, sep=\"\\n\")\n" ] }, { "cell_type": "code", "execution_count": 27, "id": "straight-broadway", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Broj iteracija: 500\n", "Broj slojeva: 4\n", "Koeficijenti:\n", "[array([[ 0.09609942, -0.12107981, 0.52610371, -0.10938628, -0.16833315,\n", " -0.42834138, 0.17156497, 0.04643681, 0.22878301, -0.06538772],\n", " [ 0.69561624, -0.49214875, 0.15457957, 0.0236978 , -0.59198475,\n", " -0.27723742, -0.56185542, 0.75560583, 0.48884849, -0.90377294],\n", " [ 0.07851986, 0.44308248, 0.53631944, -0.75359555, 0.03385684,\n", " 0.04014964, -0.33472234, -0.41884165, -0.03386422, 1.05764312],\n", " [ 0.11417256, -0.10446146, 0.27688751, -0.29819946, 0.54929703,\n", " 1.24175719, 0.04239881, -1.12974437, -0.39983181, 0.97105697]]), array([[ 0.2075621 , 0.31920742, 0.32525437],\n", " [-0.0596762 , -0.52400645, 0.18480043],\n", " [-0.50914915, 0.3451839 , 0.96860342],\n", " [ 0.66393844, 0.55856935, -0.28125955],\n", " [-0.70175595, 0.18274997, -0.25434607],\n", " [-0.58552113, 0.2588536 , 1.03944674],\n", " [-0.43725795, 0.29142633, 0.03932311],\n", " [ 1.03917037, -0.14936963, -1.13931136],\n", " [-0.25450928, 0.44221902, -0.12070449],\n", " [-1.29443488, -0.33781047, 1.47022426]]), array([[ 1.09407382, -2.17552486, -1.04997447],\n", " [ 0.85509813, 1.08902429, 0.45228107],\n", " [-1.59110901, -1.44840442, 2.26039193]])]\n", "Bias:\n", "[array([-0.42282816, -0.50298868, -0.63980681, 0.14074351, 0.66907186,\n", " -0.27769468, -0.22428855, 0.31534276, 0.44085793, -0.61747506]), array([-0.46101268, 0.54489294, -1.04013071]), array([ 0.07280579, -0.13869141, 0.85157448])]\n" ] } ], "source": [ "print('Broj iteracija: ', clf.best_estimator_.n_iter_)\n", "print('Broj slojeva: ', clf.best_estimator_.n_layers_)\n", "print('Koeficijenti:', clf.best_estimator_.coefs_, sep='\\n')\n", "print('Bias:', clf.best_estimator_.intercepts_, sep='\\n')\n" ] }, { "cell_type": "code", "execution_count": null, "id": "greenhouse-teacher", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 5 }