In [1]:
import pandas as pd
import numpy as np
from collections import Counter
from matplotlib import pyplot as plt
import os
import cv2
%matplotlib inline
In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
In [3]:
def load_filepaths():
    imdir_ideology = 'ideology_image_dataset/'
    ideology_files=os.listdir('ideology_image_dataset/')
    ideology_files_path=[os.path.join(imdir_ideology,file) for file in ideology_files ]
    
    return ideology_files_path

ideology_files_path=load_filepaths()
len(ideology_files_path)
Out[3]:
2942
In [4]:
def showClustering(predicted_labels,label):
    label_indexs= np.where(predicted_labels==label)[0]
    print("CLUSTER--> ",label,"TOTAL IMAGES--> ",len(label_indexs))
    if(len(label_indexs)>=500):
        fig=plt.figure(figsize=(10, 400))
        
        
    elif(len(label_indexs)>100 and len(label_indexs)<500):
        fig=plt.figure(figsize=(10, 70))
    elif(len(label_indexs)>=50 and len(label_indexs)<100):
        fig=plt.figure(figsize=(10, 30))
        
    elif(len(label_indexs)>=20 and len(label_indexs)<50):
        fig=plt.figure(figsize=(10, 20))
    
    elif(len(label_indexs)>=0 and len(label_indexs)<20):
        fig=plt.figure(figsize=(10, 8))
    
    for i,index in enumerate(label_indexs):
       
        
        image = cv2.imread(ideology_files_path[index])
        image= cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        columns = 4
        rows = np.ceil(len(label_indexs)/float(columns))
        
        fig.add_subplot(rows,columns, i+1)
        plt.imshow(image)
    
   
    plt.show()
In [5]:
results_df=pd.read_csv('D://Himani-work/gsoc2020/code/image-results-hier/ideology_model_resnet50.npy-pca.csv')
results_df
Out[5]:
Unnamed: 0 n_clusters silhouette calinski davies
0 Birch 1 -1.000000 -1.000000 -1.000000
1 Agglomerative clustering 2 0.381774 4.117515 0.484349
In [ ]: