import pandas as pd from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split from sklearn import preprocessing import sklearn.metrics as met from termcolor import colored def prediciton_info(x_train, y_train, x_test, y_test, y_pred, indices, distances): k=len(indices[0]) for i in range(0, len(x_test)): print(colored("test_point: ", "blue"), colored(x_test.iloc[i: (i + 1)], "blue"), sep="\n") if (y_test.iloc[i] == y_pred[i]): color = "green" else: color = "red" print("actual: ", colored(y_test.iloc[i], color)) print("predicted: ", colored(y_pred[i], color)) print("\n", colored("neighbours: ", "blue")) for j in range(0, k): print(x_train.iloc[indices[i][j]: (indices[i][j] + 1)]) print("class:", colored(y_train.iloc[indices[i][j]], "yellow")) print("distance: ", distances[i][j], "\n") print("\n") def class_info(clf, x_train, y_train, x_test, y_test): clf.fit(x_train, y_train) distances, indices = clf.kneighbors(x_test) y_pred = clf.predict(x_test) cnf_matrix = met.confusion_matrix(y_test, y_pred) print("Matrica konfuzije", cnf_matrix, sep="\n") print("\n") accuracy = met.accuracy_score(y_test, y_pred) print("Preciznost", accuracy) print("\n") class_report = met.classification_report(y_test, y_pred, target_names=df["Species"].unique()) print("Izvestaj klasifikacije", class_report, sep="\n") option = input("Da li zelite informacije o klasifikacije svake instance? (1 za da, 0 za ne)") print(option) if(option=="1"): prediciton_info(x_train, y_train, x_test, y_test, y_pred, indices, distances) df = pd.read_csv("iris_pandas.csv") featurs = df.columns[:4].tolist() #ako zelimo da izdvojimo odredjene atribute za klasifikaciju #featurs = ["Petal_Length", "Petal_Width"] x_original=df[featurs] #standardizacija atributa x=pd.DataFrame(preprocessing.scale(x_original)) #normalizacija #x=pd.DataFrame(preprocessing.MinMaxScaler().fit_transform(x_original)) #dodeljivanje imena kolonama x.columns = featurs y=df["Species"] #podela na trening i test skup x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, stratify=y) """ Paramerti za KNN: n_neighbors : broj suseda default=5 weights : tezine suseda default='uniform' moguce: 'uniform' : svi susedi imaju podjednak uticaj 'distance' : blizi susedi imaju veci uticaj na odredjivanje klase algorithm: default: 'auto' moguce: 'brute' 'kd_tree' 'ball_tree' 'auto' leaf_size: velicina listova u drvetu (za 'kd_tree' i 'ball_tree') metric: metrika default : 'minkowski' p: parametar za Minkowski rastojanje (p=1 za Menhetn, p=2 za Euklidsko) """ k_values = range(3,10) p_values = [1, 2] weights_values = ['uniform', 'distance'] for k in k_values: for p in p_values: for weight in weights_values: clf = KNeighborsClassifier(n_neighbors=k, p=p, weights=weight) print(colored("k="+ str(k), "blue")) print(colored("p="+str(p), "blue")) print(colored("weight=" + weight, "blue") ) class_info(clf, x_train, y_train, x_test, y_test)