In [22]:
import pandas as pd
import numpy as np
from collections import Counter
from matplotlib import pyplot as plt
import os
import cv2
%matplotlib inline
In [24]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
In [2]:
def load_filepaths():
    imdir_ideology = 'ideology_image_dataset/'
    ideology_files=os.listdir('ideology_image_dataset/')
    ideology_files_path=[os.path.join(imdir_ideology,file) for file in ideology_files ]
    
    return ideology_files_path
In [3]:
ideology_files_path=load_filepaths()
len(ideology_files_path)
Out[3]:
2942
In [21]:
def showClustering(predicted_labels,label):
    label_indexs= np.where(predicted_labels==label)[0]
    print("CLUSTER--> ",label,"TOTAL IMAGES--> ",len(label_indexs))
    if(len(label_indexs)>=500):
        fig=plt.figure(figsize=(10, 400))
        
        
    elif(len(label_indexs)>100 and len(label_indexs)<500):
        fig=plt.figure(figsize=(10, 70))
    elif(len(label_indexs)>=50 and len(label_indexs)<100):
        fig=plt.figure(figsize=(10, 30))
        
    elif(len(label_indexs)>=20 and len(label_indexs)<50):
        fig=plt.figure(figsize=(10, 20))
    
    elif(len(label_indexs)>=0 and len(label_indexs)<20):
        fig=plt.figure(figsize=(10, 8))
    
    for i,index in enumerate(label_indexs):
       
        
        image = cv2.imread(ideology_files_path[index])
        image= cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        columns = 4
        rows = np.ceil(len(label_indexs)/float(columns))
        
        fig.add_subplot(rows,columns, i+1)
        plt.imshow(image)
    
   
    plt.show()
    
    

IMAGE CLUSTERING RESULTS WITH GRIDSEARCH

VGG-16

The results of vgg16 in the dataset are shown below , we will not visualize the clustering reuslts because the rest resulted of few clusters

In [6]:
results_df=pd.read_csv('image-results/ideology_model_vgg_16.npy.csv')
results_df
Out[6]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 3 0.060619 201.476814 3.509592
1 DBSCAN 1 0.125873 135.000139 4.317671
2 Mean-shift 8 0.310763 8.671081 0.786358
3 Optics 1 0.064625 30.831560 1.613224
4 Agglomerative clustering-scipy 30 0.109012 6.717517 0.991022
5 HDBSCAN 2 0.117461 67.867594 3.463518

VGG-19

The results of vgg16 in the dataset are shown below , we will not visualize the clustering reuslts because the rest resulted of few clusters

In [12]:
results_df=pd.read_csv('image-results/ideology_model_vgg_19.npy.csv')
results_df
Out[12]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 3 0.086125 288.004901 3.041691
1 DBSCAN 1 0.135645 144.731200 3.944478
2 Mean-shift 4 0.318129 6.334227 0.698924
3 Optics 1 -0.105464 17.878038 1.783931
4 Agglomerative clustering-scipy 17 0.173857 5.670576 0.762570
5 HDBSCAN 3 -0.054606 75.892894 3.302271

Resnet-50

The results for resnet are below , we can see that agglomerative clustering with scipy resulted good number of clusters , lets visualize that

In [15]:
results_df=pd.read_csv('image-results/ideology_model_resnet50.npy.csv')
results_df
Out[15]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 1 -1.000000 -1.000000 -1.000000
1 DBSCAN 1 0.088976 96.115231 5.169620
2 Mean-shift 6 0.257239 3.265291 0.559079
3 Optics 1 -0.055711 40.784068 1.747502
4 Agglomerative clustering-scipy 211 0.025470 5.737732 1.150174
5 HDBSCAN 12 -0.128384 18.965519 2.631275
In [25]:
predicted_Labels=np.load('image-results/ideology_model_resnet50.npy_agg-scipy_labels.npy')
unique_labels=set(predicted_Labels)
for label in unique_labels:
    if(label!=-1):
        showClustering(predicted_Labels,label)
CLUSTER-->  1 TOTAL IMAGES-->  1
CLUSTER-->  2 TOTAL IMAGES-->  1
CLUSTER-->  3 TOTAL IMAGES-->  1
CLUSTER-->  4 TOTAL IMAGES-->  1
CLUSTER-->  5 TOTAL IMAGES-->  1
CLUSTER-->  6 TOTAL IMAGES-->  1
CLUSTER-->  7 TOTAL IMAGES-->  1
CLUSTER-->  8 TOTAL IMAGES-->  1
CLUSTER-->  9 TOTAL IMAGES-->  1
CLUSTER-->  10 TOTAL IMAGES-->  2
CLUSTER-->  11 TOTAL IMAGES-->  1
CLUSTER-->  12 TOTAL IMAGES-->  1
CLUSTER-->  13 TOTAL IMAGES-->  1
CLUSTER-->  14 TOTAL IMAGES-->  1
CLUSTER-->  15 TOTAL IMAGES-->  1
CLUSTER-->  16 TOTAL IMAGES-->  1
CLUSTER-->  17 TOTAL IMAGES-->  1
CLUSTER-->  18 TOTAL IMAGES-->  3
CLUSTER-->  19 TOTAL IMAGES-->  1
CLUSTER-->  20 TOTAL IMAGES-->  1
CLUSTER-->  21 TOTAL IMAGES-->  1
CLUSTER-->  22 TOTAL IMAGES-->  1
CLUSTER-->  23 TOTAL IMAGES-->  1
CLUSTER-->  24 TOTAL IMAGES-->  1
CLUSTER-->  25 TOTAL IMAGES-->  4
CLUSTER-->  26 TOTAL IMAGES-->  5
CLUSTER-->  27 TOTAL IMAGES-->  1
CLUSTER-->  28 TOTAL IMAGES-->  1
CLUSTER-->  29 TOTAL IMAGES-->  1
CLUSTER-->  30 TOTAL IMAGES-->  2
CLUSTER-->  31 TOTAL IMAGES-->  1
CLUSTER-->  32 TOTAL IMAGES-->  7
CLUSTER-->  33 TOTAL IMAGES-->  1
CLUSTER-->  34 TOTAL IMAGES-->  1
CLUSTER-->  35 TOTAL IMAGES-->  1
CLUSTER-->  36 TOTAL IMAGES-->  2
CLUSTER-->  37 TOTAL IMAGES-->  4
CLUSTER-->  38 TOTAL IMAGES-->  5
CLUSTER-->  39 TOTAL IMAGES-->  1
CLUSTER-->  40 TOTAL IMAGES-->  1
CLUSTER-->  41 TOTAL IMAGES-->  2
CLUSTER-->  42 TOTAL IMAGES-->  5
CLUSTER-->  43 TOTAL IMAGES-->  1
CLUSTER-->  44 TOTAL IMAGES-->  5
CLUSTER-->  45 TOTAL IMAGES-->  1
CLUSTER-->  46 TOTAL IMAGES-->  1
CLUSTER-->  47 TOTAL IMAGES-->  2
CLUSTER-->  48 TOTAL IMAGES-->  2
CLUSTER-->  49 TOTAL IMAGES-->  2
CLUSTER-->  50 TOTAL IMAGES-->  1
CLUSTER-->  51 TOTAL IMAGES-->  1
CLUSTER-->  52 TOTAL IMAGES-->  1
CLUSTER-->  53 TOTAL IMAGES-->  2
CLUSTER-->  54 TOTAL IMAGES-->  1
CLUSTER-->  55 TOTAL IMAGES-->  1
CLUSTER-->  56 TOTAL IMAGES-->  1
CLUSTER-->  57 TOTAL IMAGES-->  2
CLUSTER-->  58 TOTAL IMAGES-->  1
CLUSTER-->  59 TOTAL IMAGES-->  6
CLUSTER-->  60 TOTAL IMAGES-->  1
CLUSTER-->  61 TOTAL IMAGES-->  1
CLUSTER-->  62 TOTAL IMAGES-->  1
CLUSTER-->  63 TOTAL IMAGES-->  1
CLUSTER-->  64 TOTAL IMAGES-->  3
CLUSTER-->  65 TOTAL IMAGES-->  2
CLUSTER-->  66 TOTAL IMAGES-->  4
CLUSTER-->  67 TOTAL IMAGES-->  2
CLUSTER-->  68 TOTAL IMAGES-->  1
CLUSTER-->  69 TOTAL IMAGES-->  1
CLUSTER-->  70 TOTAL IMAGES-->  1
CLUSTER-->  71 TOTAL IMAGES-->  1
CLUSTER-->  72 TOTAL IMAGES-->  1
CLUSTER-->  73 TOTAL IMAGES-->  1
CLUSTER-->  74 TOTAL IMAGES-->  4
CLUSTER-->  75 TOTAL IMAGES-->  4
CLUSTER-->  76 TOTAL IMAGES-->  1
CLUSTER-->  77 TOTAL IMAGES-->  1
CLUSTER-->  78 TOTAL IMAGES-->  1
CLUSTER-->  79 TOTAL IMAGES-->  1
CLUSTER-->  80 TOTAL IMAGES-->  2
CLUSTER-->  81 TOTAL IMAGES-->  1
CLUSTER-->  82 TOTAL IMAGES-->  1
CLUSTER-->  83 TOTAL IMAGES-->  2
CLUSTER-->  84 TOTAL IMAGES-->  1
CLUSTER-->  85 TOTAL IMAGES-->  1
CLUSTER-->  86 TOTAL IMAGES-->  2
CLUSTER-->  87 TOTAL IMAGES-->  7
CLUSTER-->  88 TOTAL IMAGES-->  2
CLUSTER-->  89 TOTAL IMAGES-->  4
CLUSTER-->  90 TOTAL IMAGES-->  1
CLUSTER-->  91 TOTAL IMAGES-->  3
CLUSTER-->  92 TOTAL IMAGES-->  1
CLUSTER-->  93 TOTAL IMAGES-->  1
CLUSTER-->  94 TOTAL IMAGES-->  1
CLUSTER-->  95 TOTAL IMAGES-->  3
CLUSTER-->  96 TOTAL IMAGES-->  2
CLUSTER-->  97 TOTAL IMAGES-->  2
CLUSTER-->  98 TOTAL IMAGES-->  7
CLUSTER-->  99 TOTAL IMAGES-->  1
CLUSTER-->  100 TOTAL IMAGES-->  2
CLUSTER-->  101 TOTAL IMAGES-->  2
CLUSTER-->  102 TOTAL IMAGES-->  3
CLUSTER-->  103 TOTAL IMAGES-->  3
CLUSTER-->  104 TOTAL IMAGES-->  5
CLUSTER-->  105 TOTAL IMAGES-->  1
CLUSTER-->  106 TOTAL IMAGES-->  2
CLUSTER-->  107 TOTAL IMAGES-->  8
CLUSTER-->  108 TOTAL IMAGES-->  2
CLUSTER-->  109 TOTAL IMAGES-->  7
CLUSTER-->  110 TOTAL IMAGES-->  4
CLUSTER-->  111 TOTAL IMAGES-->  3
CLUSTER-->  112 TOTAL IMAGES-->  21
CLUSTER-->  113 TOTAL IMAGES-->  1
CLUSTER-->  114 TOTAL IMAGES-->  1
CLUSTER-->  115 TOTAL IMAGES-->  1
CLUSTER-->  116 TOTAL IMAGES-->  2
CLUSTER-->  117 TOTAL IMAGES-->  11
CLUSTER-->  118 TOTAL IMAGES-->  1
CLUSTER-->  119 TOTAL IMAGES-->  3
CLUSTER-->  120 TOTAL IMAGES-->  11
CLUSTER-->  121 TOTAL IMAGES-->  5
CLUSTER-->  122 TOTAL IMAGES-->  1
CLUSTER-->  123 TOTAL IMAGES-->  1
CLUSTER-->  124 TOTAL IMAGES-->  2
CLUSTER-->  125 TOTAL IMAGES-->  2
CLUSTER-->  126 TOTAL IMAGES-->  2
CLUSTER-->  127 TOTAL IMAGES-->  3
CLUSTER-->  128 TOTAL IMAGES-->  81
CLUSTER-->  129 TOTAL IMAGES-->  1
CLUSTER-->  130 TOTAL IMAGES-->  2
CLUSTER-->  131 TOTAL IMAGES-->  2
CLUSTER-->  132 TOTAL IMAGES-->  1
CLUSTER-->  133 TOTAL IMAGES-->  10
CLUSTER-->  134 TOTAL IMAGES-->  2
CLUSTER-->  135 TOTAL IMAGES-->  2
CLUSTER-->  136 TOTAL IMAGES-->  2
CLUSTER-->  137 TOTAL IMAGES-->  27
CLUSTER-->  138 TOTAL IMAGES-->  19
CLUSTER-->  139 TOTAL IMAGES-->  3
CLUSTER-->  140 TOTAL IMAGES-->  4
CLUSTER-->  141 TOTAL IMAGES-->  1
CLUSTER-->  142 TOTAL IMAGES-->  11
CLUSTER-->  143 TOTAL IMAGES-->  1
CLUSTER-->  144 TOTAL IMAGES-->  1
CLUSTER-->  145 TOTAL IMAGES-->  2
CLUSTER-->  146 TOTAL IMAGES-->  1
CLUSTER-->  147 TOTAL IMAGES-->  11
CLUSTER-->  148 TOTAL IMAGES-->  5
CLUSTER-->  149 TOTAL IMAGES-->  3
CLUSTER-->  150 TOTAL IMAGES-->  3
CLUSTER-->  151 TOTAL IMAGES-->  1
CLUSTER-->  152 TOTAL IMAGES-->  3
CLUSTER-->  153 TOTAL IMAGES-->  5
CLUSTER-->  154 TOTAL IMAGES-->  9
CLUSTER-->  155 TOTAL IMAGES-->  16
CLUSTER-->  156 TOTAL IMAGES-->  5
CLUSTER-->  157 TOTAL IMAGES-->  18
CLUSTER-->  158 TOTAL IMAGES-->  154
CLUSTER-->  159 TOTAL IMAGES-->  2
CLUSTER-->  160 TOTAL IMAGES-->  2
CLUSTER-->  161 TOTAL IMAGES-->  7
CLUSTER-->  162 TOTAL IMAGES-->  6
CLUSTER-->  163 TOTAL IMAGES-->  1
CLUSTER-->  164 TOTAL IMAGES-->  10
CLUSTER-->  165 TOTAL IMAGES-->  5
CLUSTER-->  166 TOTAL IMAGES-->  1
CLUSTER-->  167 TOTAL IMAGES-->  4
CLUSTER-->  168 TOTAL IMAGES-->  8
CLUSTER-->  169 TOTAL IMAGES-->  47
CLUSTER-->  170 TOTAL IMAGES-->  5
CLUSTER-->  171 TOTAL IMAGES-->  3
CLUSTER-->  172 TOTAL IMAGES-->  87
CLUSTER-->  173 TOTAL IMAGES-->  2
CLUSTER-->  174 TOTAL IMAGES-->  1873
CLUSTER-->  175 TOTAL IMAGES-->  13
CLUSTER-->  176 TOTAL IMAGES-->  1
CLUSTER-->  177 TOTAL IMAGES-->  9
CLUSTER-->  178 TOTAL IMAGES-->  1
CLUSTER-->  179 TOTAL IMAGES-->  6
CLUSTER-->  180 TOTAL IMAGES-->  51
CLUSTER-->  181 TOTAL IMAGES-->  2
CLUSTER-->  182 TOTAL IMAGES-->  5
CLUSTER-->  183 TOTAL IMAGES-->  1
CLUSTER-->  184 TOTAL IMAGES-->  1
CLUSTER-->  185 TOTAL IMAGES-->  2
CLUSTER-->  186 TOTAL IMAGES-->  3
CLUSTER-->  187 TOTAL IMAGES-->  6
CLUSTER-->  188 TOTAL IMAGES-->  6
CLUSTER-->  189 TOTAL IMAGES-->  48
CLUSTER-->  190 TOTAL IMAGES-->  1
CLUSTER-->  191 TOTAL IMAGES-->  1
CLUSTER-->  192 TOTAL IMAGES-->  1
CLUSTER-->  193 TOTAL IMAGES-->  3
CLUSTER-->  194 TOTAL IMAGES-->  2
CLUSTER-->  195 TOTAL IMAGES-->  1
CLUSTER-->  196 TOTAL IMAGES-->  1
CLUSTER-->  197 TOTAL IMAGES-->  1
CLUSTER-->  198 TOTAL IMAGES-->  1
CLUSTER-->  199 TOTAL IMAGES-->  1
CLUSTER-->  200 TOTAL IMAGES-->  1
CLUSTER-->  201 TOTAL IMAGES-->  1
CLUSTER-->  202 TOTAL IMAGES-->  3
CLUSTER-->  203 TOTAL IMAGES-->  1
CLUSTER-->  204 TOTAL IMAGES-->  1
CLUSTER-->  205 TOTAL IMAGES-->  1
CLUSTER-->  206 TOTAL IMAGES-->  1
CLUSTER-->  207 TOTAL IMAGES-->  1
CLUSTER-->  208 TOTAL IMAGES-->  1
CLUSTER-->  209 TOTAL IMAGES-->  1
CLUSTER-->  210 TOTAL IMAGES-->  1
CLUSTER-->  211 TOTAL IMAGES-->  1

InceptionV3

The number of clusters return are very few

In [16]:
results_df=pd.read_csv('image-results/ideology_model_inceptionv3.npy.csv')
results_df
Out[16]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Agglomerative clustering 2 0.522594 749.840801 0.830656
1 DBSCAN 1 0.295573 1465.284051 1.247779
2 Mean-shift 8 0.373736 268.109170 2.562576
3 Optics 1 0.277729 66.339734 0.857964
4 Agglomerative clustering-scipy 2 0.522594 749.840801 0.830656
5 HDBSCAN 3 0.356685 649.991763 1.457040