In [1]:
import pandas as pd
import numpy as np
from collections import Counter
from matplotlib import pyplot as plt
import os
import cv2
%matplotlib inline
In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
In [3]:
def load_filepaths():
    imdir_ideology = 'ideology_image_dataset/'
    ideology_files=os.listdir('ideology_image_dataset/')
    ideology_files_path=[os.path.join(imdir_ideology,file) for file in ideology_files ]
    
    return ideology_files_path
In [4]:
ideology_files_path=load_filepaths()
len(ideology_files_path)
Out[4]:
2942
In [5]:
def showClustering(predicted_labels,label):
    label_indexs= np.where(predicted_labels==label)[0]
    print("CLUSTER--> ",label,"TOTAL IMAGES--> ",len(label_indexs))
    if(len(label_indexs)>=500):
        fig=plt.figure(figsize=(10, 400))
        
        
    elif(len(label_indexs)>100 and len(label_indexs)<500):
        fig=plt.figure(figsize=(10, 70))
    elif(len(label_indexs)>=50 and len(label_indexs)<100):
        fig=plt.figure(figsize=(10, 30))
        
    elif(len(label_indexs)>=20 and len(label_indexs)<50):
        fig=plt.figure(figsize=(10, 20))
    
    elif(len(label_indexs)>=0 and len(label_indexs)<20):
        fig=plt.figure(figsize=(10, 8))
    
    for i,index in enumerate(label_indexs):
       
        
        image = cv2.imread(ideology_files_path[index])
        image= cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        columns = 4
        rows = np.ceil(len(label_indexs)/float(columns))
        
        fig.add_subplot(rows,columns, i+1)
        plt.imshow(image)
    
   
    plt.show()
    
    

VGG16

Results for the vgg16 are given below , all have very less few clusters

In [6]:
results_df=pd.read_csv('image-results-pca/ideology_ideology_model_vgg_16_pca.npy.csv')
results_df
Out[6]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 3 0.073573 226.456332 3.276230
1 DBSCAN 1 0.108875 183.589658 3.913978
2 Mean-shift 4 0.453307 7.895188 0.596651
3 Optics 1 0.096056 36.870376 1.573101
4 Agglomerative clustering-scipy 18 0.136637 7.209098 0.930475
5 HDBSCAN 2 0.075164 103.028909 3.392240

VGG19

Results for the vgg19 are given below , all have very less few clusters

In [7]:
results_df=pd.read_csv('image-results-pca/ideology_ideology_model_vgg_19_pca.npy.csv')
results_df
Out[7]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 2 0.124657 472.069456 2.121375
1 DBSCAN 1 0.121619 178.199106 3.761432
2 Mean-shift 3 0.440457 9.961915 0.750822
3 Optics 1 -0.090059 29.809418 1.973613
4 Agglomerative clustering-scipy 13 0.200859 7.206933 0.830232
5 HDBSCAN 2 0.127450 65.346445 3.418582

RESNET 50

Results for the vgg16 are given below , all have very less few clusters except agglomerative-scipy

In [8]:
results_df=pd.read_csv('image-results-pca/ideology_ideology_model_resnet50_pca.npy.csv')
results_df
Out[8]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 1 -1.000000 -1.000000 -1.000000
1 DBSCAN 2 0.039348 73.061200 4.003025
2 Mean-shift 3 0.303749 5.922630 1.112865
3 Optics 2 -0.020987 35.818776 1.486827
4 Agglomerative clustering-scipy 88 0.037100 13.610160 1.316000
5 HDBSCAN 13 -0.129986 24.641644 2.137722
In [ ]:
predicted_Labels=np.load('image-results-pca/ideology_ideology_model_resnet50_pca.npy_agg-scipy_labels.npy')
unique_labels=set(predicted_Labels)
for label in unique_labels:
    if(label!=-1):
        showClustering(predicted_Labels,label)
CLUSTER-->  1 TOTAL IMAGES-->  1
CLUSTER-->  2 TOTAL IMAGES-->  1
CLUSTER-->  3 TOTAL IMAGES-->  1
CLUSTER-->  4 TOTAL IMAGES-->  1
CLUSTER-->  5 TOTAL IMAGES-->  2
CLUSTER-->  6 TOTAL IMAGES-->  2
CLUSTER-->  7 TOTAL IMAGES-->  4
CLUSTER-->  8 TOTAL IMAGES-->  6
CLUSTER-->  9 TOTAL IMAGES-->  1
CLUSTER-->  10 TOTAL IMAGES-->  4
CLUSTER-->  11 TOTAL IMAGES-->  6
CLUSTER-->  12 TOTAL IMAGES-->  1
CLUSTER-->  13 TOTAL IMAGES-->  6
CLUSTER-->  14 TOTAL IMAGES-->  1
CLUSTER-->  15 TOTAL IMAGES-->  3
CLUSTER-->  16 TOTAL IMAGES-->  1
CLUSTER-->  17 TOTAL IMAGES-->  2
CLUSTER-->  18 TOTAL IMAGES-->  1
CLUSTER-->  19 TOTAL IMAGES-->  15
CLUSTER-->  20 TOTAL IMAGES-->  9
CLUSTER-->  21 TOTAL IMAGES-->  3
CLUSTER-->  22 TOTAL IMAGES-->  1
CLUSTER-->  23 TOTAL IMAGES-->  7
CLUSTER-->  24 TOTAL IMAGES-->  2
CLUSTER-->  25 TOTAL IMAGES-->  4
CLUSTER-->  26 TOTAL IMAGES-->  13
CLUSTER-->  27 TOTAL IMAGES-->  14
CLUSTER-->  28 TOTAL IMAGES-->  3
CLUSTER-->  29 TOTAL IMAGES-->  2
CLUSTER-->  30 TOTAL IMAGES-->  4
CLUSTER-->  31 TOTAL IMAGES-->  7
CLUSTER-->  32 TOTAL IMAGES-->  1
CLUSTER-->  33 TOTAL IMAGES-->  1
CLUSTER-->  34 TOTAL IMAGES-->  4
CLUSTER-->  35 TOTAL IMAGES-->  1
CLUSTER-->  36 TOTAL IMAGES-->  2
CLUSTER-->  37 TOTAL IMAGES-->  1
CLUSTER-->  38 TOTAL IMAGES-->  10
CLUSTER-->  39 TOTAL IMAGES-->  2
CLUSTER-->  40 TOTAL IMAGES-->  1
CLUSTER-->  41 TOTAL IMAGES-->  1
CLUSTER-->  42 TOTAL IMAGES-->  7
CLUSTER-->  43 TOTAL IMAGES-->  3
CLUSTER-->  44 TOTAL IMAGES-->  4
CLUSTER-->  45 TOTAL IMAGES-->  3
CLUSTER-->  46 TOTAL IMAGES-->  1
CLUSTER-->  47 TOTAL IMAGES-->  19
CLUSTER-->  48 TOTAL IMAGES-->  11
CLUSTER-->  49 TOTAL IMAGES-->  6
CLUSTER-->  50 TOTAL IMAGES-->  6
CLUSTER-->  51 TOTAL IMAGES-->  39
CLUSTER-->  52 TOTAL IMAGES-->  3
CLUSTER-->  53 TOTAL IMAGES-->  12
CLUSTER-->  54 TOTAL IMAGES-->  6
CLUSTER-->  55 TOTAL IMAGES-->  4
CLUSTER-->  56 TOTAL IMAGES-->  28
CLUSTER-->  57 TOTAL IMAGES-->  21
CLUSTER-->  58 TOTAL IMAGES-->  4
CLUSTER-->  59 TOTAL IMAGES-->  7
CLUSTER-->  60 TOTAL IMAGES-->  1
CLUSTER-->  61 TOTAL IMAGES-->  7
CLUSTER-->  62 TOTAL IMAGES-->  2
CLUSTER-->  63 TOTAL IMAGES-->  8
CLUSTER-->  64 TOTAL IMAGES-->  4
CLUSTER-->  65 TOTAL IMAGES-->  153
CLUSTER-->  66 TOTAL IMAGES-->  9
CLUSTER-->  67 TOTAL IMAGES-->  3
CLUSTER-->  68 TOTAL IMAGES-->  49
CLUSTER-->  69 TOTAL IMAGES-->  14
CLUSTER-->  70 TOTAL IMAGES-->  14
CLUSTER-->  71 TOTAL IMAGES-->  96
CLUSTER-->  72 TOTAL IMAGES-->  2
CLUSTER-->  73 TOTAL IMAGES-->  6
CLUSTER-->  74 TOTAL IMAGES-->  12
CLUSTER-->  75 TOTAL IMAGES-->  1
CLUSTER-->  76 TOTAL IMAGES-->  2
CLUSTER-->  77 TOTAL IMAGES-->  1081
CLUSTER-->  78 TOTAL IMAGES-->  1045
CLUSTER-->  83 TOTAL IMAGES-->  1
CLUSTER-->  84 TOTAL IMAGES-->  1
CLUSTER-->  85 TOTAL IMAGES-->  1
CLUSTER-->  86 TOTAL IMAGES-->  1
CLUSTER-->  87 TOTAL IMAGES-->  1
CLUSTER-->  88 TOTAL IMAGES-->  1

Inception v3

Results for the vgg16 are given below , all have very less few clusters

In [9]:
results_df=pd.read_csv('image-results-pca/ideology_ideology_model_inceptionv3_pca.npy.csv')
results_df
Out[9]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 5 0.446428 7.988402 0.345338
1 DBSCAN 1 0.344462 1756.575069 1.174964
2 Mean-shift 5 0.411145 366.167223 1.584272
3 Optics 1 0.296131 74.464018 0.818838
4 Agglomerative clustering-scipy 1 -1.000000 -1.000000 -1.000000
5 HDBSCAN 4 0.025580 460.236241 1.373870