import pandas as pd import sklearn.metrics as met from sklearn import preprocessing from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN import matplotlib.pyplot as plt import matplotlib df = pd.read_csv("unbalance.csv") #prikaz imena kolona + 5 prvih instanci print('Prvih 5 instanci', df.head(), sep='\n') print('\n\n') featurs = df.columns[:2].tolist() print(featurs) x_original=df[featurs] #standardizacija atributa x=pd.DataFrame(preprocessing.scale(x_original)) #normalizacija #x=pd.DataFrame(preprocessing.MinMaxScaler().fit_transform(x_original)) #dodeljivanje imena kolonama x.columns = featurs colors = ['darkcyan', 'red', 'green', 'gold', 'blue', 'm', 'plum', 'orange', 'black'] font = {'family' : 'normal', 'size' : 6} matplotlib.rc('font', **font) fig = plt.figure() plt_ind=1 for i in range(5, 9): estimators= { 'K_means': KMeans(n_clusters=i), 'hijerarhijsko': AgglomerativeClustering(n_clusters=i, linkage='average'), 'DBSCAN': DBSCAN(eps=(i-2)*0.1) } for name, est in estimators.items(): est.fit(x) df['labels']= est.labels_ fig.add_subplot(4, 3, plt_ind) if name=='DBSCAN': num_clusters = max(est.labels_) + 1 min=-1 else: num_clusters=i min=0 for j in range(min,num_clusters): cluster= df.loc[lambda x: x['labels'] == j, :] plt.scatter(cluster['x'], cluster['y'], color=colors[j], s=10, marker='o', label="cluster %d"%j) plt.title('Algorithm %s, num clasters: %d'%(name, num_clusters), fontsize=8) plt_ind += 1 plt.tight_layout() plt.show()