import pandas as pd from sklearn.tree import DecisionTreeClassifier, export_graphviz from sklearn.model_selection import train_test_split import subprocess import sklearn.metrics as met def visualize_tree(tree, feature_names, class_names): with open("dt.dot", "w") as f: export_graphviz(tree, out_file=f, feature_names=feature_names, class_names=class_names, filled=True, rounded=True) f.close() subprocess.call("dot -Tpng dt.dot -o tree.png", shell=True) #ucitavanje podataka df = pd.read_csv("iris.csv") #prikaz imena kolona + 5 prvih instanci print('Prvih 5 instanci', df.head(), sep='\n') print('\n\n') print('Opis podataka', df.describe(), sep='\n') print('\n\n') print('Korelacija', df.corr(), sep='\n') print('\n\n') #prikaz imena kolona + 5 poslednjih instanci #print(df.tail()) #print('\n\n') #prikaz klasa print("Klase: ", df["Species"].unique()) print('\n\n') featurs = df.columns[:4].tolist() print('Atributi za pravljenje modela:', featurs) print('\n\n') x=df[featurs] y=df["Species"] x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=100, stratify=y) #dt = DecisionTreeClassifier() #dt = DecisionTreeClassifier(criterion='entropy') #dt = DecisionTreeClassifier(max_depth=2) #dt = DecisionTreeClassifier(min_samples_split=20, max_depth=4) dt = DecisionTreeClassifier(min_samples_split=20, max_depth=4, max_leaf_nodes=4) #dt = DecisionTreeClassifier(min_impurity_split=0.05) #pravljenje drveta odlucivanja na osnovu trening skupa dt.fit(x_train, y_train) print('Predvidjena verovatnoca', dt.predict_proba(x_test), sep='\n') print('\n\n') #graficki prikaz drveta visualize_tree(dt, featurs, df['Species'].unique()) #primena modela na test podacima y_pred = dt.predict(x_test) cnf_matrix = met.confusion_matrix(y_test, y_pred) print('Matrica konfuzije', cnf_matrix, sep='\n') print('\n\n') accuracy = met.accuracy_score(y_test, y_pred) print('Preciznost', accuracy) accuracy = met.accuracy_score(y_test, y_pred, normalize=False) print('Preciznost u broju instanci', accuracy, ) print('\n\n') class_report = met.classification_report(y_test, y_pred, target_names=df['Species'].unique()) print('Izvestaj klasifikacije', class_report, sep='\n')