import pandas as pd
import numpy as np
from collections import Counter
from matplotlib import pyplot as plt
import os
import cv2
%matplotlib inline
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
def load_filepaths():
imdir_ideology = 'ideology_image_dataset/'
ideology_files=os.listdir('ideology_image_dataset/')
ideology_files_path=[os.path.join(imdir_ideology,file) for file in ideology_files ]
return ideology_files_path
ideology_files_path=load_filepaths()
len(ideology_files_path)
2942
def showClustering(predicted_labels,label):
label_indexs= np.where(predicted_labels==label)[0]
print("CLUSTER--> ",label,"TOTAL IMAGES--> ",len(label_indexs))
if(len(label_indexs)>=500):
fig=plt.figure(figsize=(10, 400))
elif(len(label_indexs)>100 and len(label_indexs)<500):
fig=plt.figure(figsize=(10, 70))
elif(len(label_indexs)>=50 and len(label_indexs)<100):
fig=plt.figure(figsize=(10, 30))
elif(len(label_indexs)>=20 and len(label_indexs)<50):
fig=plt.figure(figsize=(10, 20))
elif(len(label_indexs)>=0 and len(label_indexs)<20):
fig=plt.figure(figsize=(10, 8))
for i,index in enumerate(label_indexs):
image = cv2.imread(ideology_files_path[index])
image= cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
columns = 4
rows = np.ceil(len(label_indexs)/float(columns))
fig.add_subplot(rows,columns, i+1)
plt.imshow(image)
plt.show()
Results for the vgg16 are given below , all have very less few clusters
results_df=pd.read_csv('image-results-pca/ideology_ideology_model_vgg_16_pca.npy.csv')
results_df
Unnamed: 0 | n_clusters | silhouette | calinski | davies | |
---|---|---|---|---|---|
0 | Agglomerative clustering | 3 | 0.073573 | 226.456332 | 3.276230 |
1 | DBSCAN | 1 | 0.108875 | 183.589658 | 3.913978 |
2 | Mean-shift | 4 | 0.453307 | 7.895188 | 0.596651 |
3 | Optics | 1 | 0.096056 | 36.870376 | 1.573101 |
4 | Agglomerative clustering-scipy | 18 | 0.136637 | 7.209098 | 0.930475 |
5 | HDBSCAN | 2 | 0.075164 | 103.028909 | 3.392240 |
Results for the vgg19 are given below , all have very less few clusters
results_df=pd.read_csv('image-results-pca/ideology_ideology_model_vgg_19_pca.npy.csv')
results_df
Unnamed: 0 | n_clusters | silhouette | calinski | davies | |
---|---|---|---|---|---|
0 | Agglomerative clustering | 2 | 0.124657 | 472.069456 | 2.121375 |
1 | DBSCAN | 1 | 0.121619 | 178.199106 | 3.761432 |
2 | Mean-shift | 3 | 0.440457 | 9.961915 | 0.750822 |
3 | Optics | 1 | -0.090059 | 29.809418 | 1.973613 |
4 | Agglomerative clustering-scipy | 13 | 0.200859 | 7.206933 | 0.830232 |
5 | HDBSCAN | 2 | 0.127450 | 65.346445 | 3.418582 |
Results for the vgg16 are given below , all have very less few clusters except agglomerative-scipy
results_df=pd.read_csv('image-results-pca/ideology_ideology_model_resnet50_pca.npy.csv')
results_df
Unnamed: 0 | n_clusters | silhouette | calinski | davies | |
---|---|---|---|---|---|
0 | Agglomerative clustering | 1 | -1.000000 | -1.000000 | -1.000000 |
1 | DBSCAN | 2 | 0.039348 | 73.061200 | 4.003025 |
2 | Mean-shift | 3 | 0.303749 | 5.922630 | 1.112865 |
3 | Optics | 2 | -0.020987 | 35.818776 | 1.486827 |
4 | Agglomerative clustering-scipy | 88 | 0.037100 | 13.610160 | 1.316000 |
5 | HDBSCAN | 13 | -0.129986 | 24.641644 | 2.137722 |
predicted_Labels=np.load('image-results-pca/ideology_ideology_model_resnet50_pca.npy_agg-scipy_labels.npy')
unique_labels=set(predicted_Labels)
for label in unique_labels:
if(label!=-1):
showClustering(predicted_Labels,label)
CLUSTER--> 1 TOTAL IMAGES--> 1
CLUSTER--> 2 TOTAL IMAGES--> 1
CLUSTER--> 3 TOTAL IMAGES--> 1
CLUSTER--> 4 TOTAL IMAGES--> 1
CLUSTER--> 5 TOTAL IMAGES--> 2
CLUSTER--> 6 TOTAL IMAGES--> 2
CLUSTER--> 7 TOTAL IMAGES--> 4
CLUSTER--> 8 TOTAL IMAGES--> 6
CLUSTER--> 9 TOTAL IMAGES--> 1
CLUSTER--> 10 TOTAL IMAGES--> 4
CLUSTER--> 11 TOTAL IMAGES--> 6
CLUSTER--> 12 TOTAL IMAGES--> 1
CLUSTER--> 13 TOTAL IMAGES--> 6
CLUSTER--> 14 TOTAL IMAGES--> 1
CLUSTER--> 15 TOTAL IMAGES--> 3
CLUSTER--> 16 TOTAL IMAGES--> 1
CLUSTER--> 17 TOTAL IMAGES--> 2
CLUSTER--> 18 TOTAL IMAGES--> 1
CLUSTER--> 19 TOTAL IMAGES--> 15
CLUSTER--> 20 TOTAL IMAGES--> 9
CLUSTER--> 21 TOTAL IMAGES--> 3
CLUSTER--> 22 TOTAL IMAGES--> 1
CLUSTER--> 23 TOTAL IMAGES--> 7
CLUSTER--> 24 TOTAL IMAGES--> 2
CLUSTER--> 25 TOTAL IMAGES--> 4
CLUSTER--> 26 TOTAL IMAGES--> 13
CLUSTER--> 27 TOTAL IMAGES--> 14
CLUSTER--> 28 TOTAL IMAGES--> 3
CLUSTER--> 29 TOTAL IMAGES--> 2
CLUSTER--> 30 TOTAL IMAGES--> 4
CLUSTER--> 31 TOTAL IMAGES--> 7
CLUSTER--> 32 TOTAL IMAGES--> 1
CLUSTER--> 33 TOTAL IMAGES--> 1
CLUSTER--> 34 TOTAL IMAGES--> 4
CLUSTER--> 35 TOTAL IMAGES--> 1
CLUSTER--> 36 TOTAL IMAGES--> 2
CLUSTER--> 37 TOTAL IMAGES--> 1
CLUSTER--> 38 TOTAL IMAGES--> 10
CLUSTER--> 39 TOTAL IMAGES--> 2
CLUSTER--> 40 TOTAL IMAGES--> 1
CLUSTER--> 41 TOTAL IMAGES--> 1
CLUSTER--> 42 TOTAL IMAGES--> 7
CLUSTER--> 43 TOTAL IMAGES--> 3
CLUSTER--> 44 TOTAL IMAGES--> 4
CLUSTER--> 45 TOTAL IMAGES--> 3
CLUSTER--> 46 TOTAL IMAGES--> 1
CLUSTER--> 47 TOTAL IMAGES--> 19
CLUSTER--> 48 TOTAL IMAGES--> 11
CLUSTER--> 49 TOTAL IMAGES--> 6
CLUSTER--> 50 TOTAL IMAGES--> 6
CLUSTER--> 51 TOTAL IMAGES--> 39
CLUSTER--> 52 TOTAL IMAGES--> 3
CLUSTER--> 53 TOTAL IMAGES--> 12
CLUSTER--> 54 TOTAL IMAGES--> 6
CLUSTER--> 55 TOTAL IMAGES--> 4
CLUSTER--> 56 TOTAL IMAGES--> 28
CLUSTER--> 57 TOTAL IMAGES--> 21
CLUSTER--> 58 TOTAL IMAGES--> 4
CLUSTER--> 59 TOTAL IMAGES--> 7
CLUSTER--> 60 TOTAL IMAGES--> 1
CLUSTER--> 61 TOTAL IMAGES--> 7
CLUSTER--> 62 TOTAL IMAGES--> 2
CLUSTER--> 63 TOTAL IMAGES--> 8
CLUSTER--> 64 TOTAL IMAGES--> 4
CLUSTER--> 65 TOTAL IMAGES--> 153
CLUSTER--> 66 TOTAL IMAGES--> 9
CLUSTER--> 67 TOTAL IMAGES--> 3
CLUSTER--> 68 TOTAL IMAGES--> 49
CLUSTER--> 69 TOTAL IMAGES--> 14
CLUSTER--> 70 TOTAL IMAGES--> 14
CLUSTER--> 71 TOTAL IMAGES--> 96
CLUSTER--> 72 TOTAL IMAGES--> 2
CLUSTER--> 73 TOTAL IMAGES--> 6
CLUSTER--> 74 TOTAL IMAGES--> 12
CLUSTER--> 75 TOTAL IMAGES--> 1
CLUSTER--> 76 TOTAL IMAGES--> 2
CLUSTER--> 77 TOTAL IMAGES--> 1081