In [2]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler,StandardScaler,normalize
import matplotlib.pyplot as plt 
#importing the header files 
from collections import Counter

from sklearn.manifold import TSNE
import time
import sklearn
from sklearn.decomposition import PCA 

from sklearn import mixture


from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder
from sklearn.model_selection import GridSearchCV

import plotly.express as px
from collections import Counter 
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import umap
from audio_results_util import load_dataset,plotClusters,encodedLabels,plotData3d,plotData2d,plotAllClusterModels,actualDistribution
%matplotlib inline
In [3]:
train_data,true_labels,true_encoded_labels=load_dataset('Audio_features_vowel/chroma_muslim_features_vowel.csv')
In [4]:
train_data.shape
Out[4]:
(240, 12)
In [5]:
results_df=pd.read_csv('audio-results4/chroma_muslim_features_vowel.csv')
results_df
Out[5]:
Unnamed: 0 n_clusters ARI AMI H C V FM A R P silhouette calinski davies
0 K-Means 5 0.002115 -4.089786e-03 0.031572 0.020312 0.024720 0.316719 0.266667 0.242723 0.218602 0.205329 52.488682 1.465689
1 Agglomerative clustering 5 0.008927 -6.422961e-03 0.028640 0.018408 0.022412 0.320990 0.245833 0.188949 0.212859 0.187200 44.802480 1.609729
2 Birch 4 0.029409 5.673560e-03 0.033493 0.025566 0.028997 0.382217 0.304167 0.233684 0.179754 0.209103 53.402907 1.567515
3 DBSCAN 3 -0.027833 3.830049e-03 0.030512 0.025751 0.027930 0.373499 0.183333 0.104983 0.131799 0.084136 23.326012 2.451226
4 Mean-shift 2 -0.002326 -7.700861e-03 0.002239 0.034038 0.004202 0.684781 0.654167 0.197484 0.132489 0.188509 4.682048 1.327661
5 Optics 1 0.000000 -5.797594e-17 0.000000 1.000000 0.000000 0.694318 0.662500 0.200000 0.132500 -1.000000 -1.000000 -1.000000
6 Gaussian-mixture 5 0.005021 -4.587573e-03 0.031150 0.020608 0.024806 0.333282 0.304167 0.276925 0.222496 0.191251 50.008209 1.426081
In [6]:
actualDistribution(train_data,true_labels,true_encoded_labels)
plotAllClusterModels(train_data,'audio-results4/chroma_muslim_features_vowel.csv')
Actual Distribution of the labels in the dataset: --> 
Counter({'a': 159, 'u': 47, 'cantdecide': 13, 'misaligned': 13, 'other': 8})
Counter({0: 159, 4: 47, 1: 13, 2: 13, 3: 8})
audio-results4/chroma_muslim_features_vowel.csv_kmeans_labels.npy
K-Means
predicted_labels--> Counter({0: 67, 4: 47, 2: 47, 3: 43, 1: 36})
 2D representation
 3D representation
Agglomerative
predicted_labels--> Counter({0: 62, 4: 49, 3: 49, 1: 48, 2: 32})
 2D representation
 3D representation
Birch
predicted_labels--> Counter({0: 90, 1: 63, 2: 44, 3: 43})
 2D representation
 3D representation
DBSCAN
predicted_labels--> Counter({1: 91, -1: 85, 0: 53, 2: 11})
 2D representation
 3D representation
OPTICS
predicted_labels--> Counter({0: 240})
 2D representation
 3D representation
MEAN-SHIFT
predicted_labels--> Counter({0: 237, 1: 3})
 2D representation
 3D representation
Gaussian-Mixture
predicted_labels--> Counter({0: 82, 1: 47, 3: 42, 2: 41, 4: 28})
 2D representation
 3D representation
In [ ]:
 

PCA transformed data

In [ ]:
 
In [7]:
results_df=pd.read_csv('audio-results4/chroma_muslim_features_vowel.csv-pca.csv')
results_df
Out[7]:
Unnamed: 0 n_clusters ARI AMI H C V FM A R P silhouette calinski davies
0 K-Means 5 0.015101 -0.003368 0.032581 0.021468 0.025882 0.337756 0.175000 0.205434 0.198251 0.361657 146.687968 0.943290
1 Agglomerative clustering 4 -0.000016 -0.000162 0.026793 0.020060 0.022942 0.352362 0.225000 0.233527 0.157179 0.315915 122.420994 0.984559
2 Birch 4 0.011873 -0.001344 0.025420 0.018943 0.021709 0.359015 0.270833 0.251875 0.178349 0.377455 154.283575 0.867953
3 DBSCAN 5 -0.003090 -0.000281 0.043030 0.029035 0.034674 0.343265 0.170833 0.092564 0.187379 0.153362 49.779987 1.409313
4 Mean-shift 2 -0.002725 0.000179 0.008688 0.012948 0.010399 0.492394 0.400000 0.205515 0.144762 0.289283 95.926483 1.465855
5 Optics 3 -0.001027 -0.003856 0.024195 0.026532 0.025309 0.500022 0.104167 0.049750 0.148773 0.095176 42.263204 1.104442
6 Gaussian-mixture 5 0.016742 -0.007428 0.027785 0.018582 0.022270 0.346685 0.250000 0.253955 0.233910 0.357246 143.932885 0.943624
In [8]:
pca_transformed=PCA(n_components=3).fit_transform(train_data)
pca_transformed=pd.DataFrame(pca_transformed)
actualDistribution(pca_transformed,true_labels,true_encoded_labels)
plotAllClusterModels(pca_transformed,'audio-results4/chroma_muslim_features_vowel.csv-pca')
Actual Distribution of the labels in the dataset: --> 
Counter({'a': 159, 'u': 47, 'cantdecide': 13, 'misaligned': 13, 'other': 8})
Counter({0: 159, 4: 47, 1: 13, 2: 13, 3: 8})
audio-results4/chroma_muslim_features_vowel.csv-pca_kmeans_labels.npy
K-Means
predicted_labels--> Counter({2: 75, 3: 58, 0: 43, 4: 36, 1: 28})
 2D representation
 3D representation
Agglomerative
predicted_labels--> Counter({3: 72, 0: 71, 1: 55, 2: 42})
 2D representation
 3D representation
Birch
predicted_labels--> Counter({0: 78, 1: 58, 2: 55, 3: 49})
 2D representation
 3D representation
DBSCAN
predicted_labels--> Counter({-1: 80, 2: 72, 0: 46, 1: 20, 3: 18, 4: 4})
 2D representation
 3D representation
OPTICS
predicted_labels--> Counter({-1: 168, 0: 29, 2: 27, 1: 16})
 2D representation
 3D representation
MEAN-SHIFT
predicted_labels--> Counter({0: 135, 1: 105})
 2D representation
 3D representation
Gaussian-Mixture
predicted_labels--> Counter({2: 83, 0: 58, 3: 36, 1: 34, 4: 29})
 2D representation
 3D representation
In [ ]:
 

t-SNE transformed data

In [9]:
results_df=pd.read_csv('audio-results4/chroma_muslim_features_vowel.csv-tsne.csv')
results_df
Out[9]:
Unnamed: 0 n_clusters ARI AMI H C V FM A R P silhouette calinski davies
0 K-Means 5 -0.004768 4.217798e-03 0.041856 0.026686 0.032592 0.306369 0.212500 0.201145 0.205989 0.264252 97.069545 1.069569
1 Agglomerative clustering 240 0.000000 -6.363593e-14 1.000000 0.186350 0.314157 0.000000 0.012500 0.000435 0.012500 -1.000000 -1.000000 -1.000000
2 Birch 240 0.000000 -6.363593e-14 1.000000 0.186350 0.314157 0.000000 0.000000 0.000000 0.000000 -1.000000 -1.000000 -1.000000
3 DBSCAN 6 0.009585 5.066973e-03 0.054366 0.036650 0.043783 0.355266 0.116667 0.079892 0.157204 -0.028699 21.959235 1.964273
4 Mean-shift 2 0.026327 6.032368e-03 0.010125 0.121993 0.018698 0.692758 0.658333 0.198742 0.133898 0.226123 9.833873 0.940394
5 Optics 1 0.000000 -5.797594e-17 0.000000 1.000000 0.000000 0.694318 0.662500 0.200000 0.132500 -1.000000 -1.000000 -1.000000
6 Gaussian-mixture 5 -0.007719 -6.592418e-03 0.028447 0.018286 0.022262 0.308292 0.200000 0.143861 0.181979 0.253632 90.660133 1.076181
In [10]:
tsne_transformed=TSNE(n_components=3, n_jobs=-1).fit_transform(train_data)
tsne_transformed=pd.DataFrame(tsne_transformed)
actualDistribution(tsne_transformed,true_labels,true_encoded_labels)

plotAllClusterModels(train_data,'audio-results4/chroma_muslim_features_vowel.csv-tsne')
Actual Distribution of the labels in the dataset: --> 
Counter({'a': 159, 'u': 47, 'cantdecide': 13, 'misaligned': 13, 'other': 8})
Counter({0: 159, 4: 47, 1: 13, 2: 13, 3: 8})
audio-results4/chroma_muslim_features_vowel.csv-tsne_kmeans_labels.npy
K-Means
predicted_labels--> Counter({2: 53, 1: 52, 0: 49, 4: 49, 3: 37})
 2D representation
 3D representation
Agglomerative
Too many labels to show
Birch
Too many labels to show
DBSCAN
predicted_labels--> Counter({-1: 84, 1: 70, 2: 39, 0: 34, 4: 5, 5: 5, 3: 3})
 2D representation
 3D representation
OPTICS
predicted_labels--> Counter({0: 240})
 2D representation
 3D representation
MEAN-SHIFT
predicted_labels--> Counter({0: 236, 1: 4})
 2D representation
 3D representation
Gaussian-Mixture
predicted_labels--> Counter({1: 62, 0: 56, 2: 45, 4: 42, 3: 35})
 2D representation
 3D representation
In [ ]:
 

Umap transformed data

In [11]:
results_df=pd.read_csv('audio-results4/chroma_muslim_features_vowel.csv-umap.csv')
results_df
Out[11]:
Unnamed: 0 n_clusters ARI AMI H C V FM A R P silhouette calinski davies
0 K-Means 5 -0.002381 -0.004808 0.030594 0.019486 0.023808 0.307870 0.220833 0.223752 0.200745 0.447447 275.316680 0.820122
1 Agglomerative clustering 6 -0.002773 -0.008113 0.035485 0.020994 0.026381 0.294754 0.212500 0.153325 0.162451 0.461510 273.469692 0.674990
2 Birch 11 0.006132 -0.010724 0.076015 0.033169 0.046185 0.223844 0.070833 0.044254 0.065301 0.490371 370.792562 0.643981
3 DBSCAN 14 -0.002623 -0.011042 0.102730 0.043100 0.060724 0.226853 0.179167 0.040357 0.079343 0.236779 66.147180 1.663446
4 Mean-shift 3 0.005000 0.003733 0.021308 0.019965 0.020615 0.405794 0.312500 0.221480 0.163833 0.438101 238.575207 0.785456
5 Optics 10 0.024508 0.010311 0.108190 0.047902 0.066404 0.252916 0.045833 0.025944 0.062591 0.330220 93.933790 1.321515
6 Gaussian-mixture 5 0.005404 -0.003263 0.032653 0.021242 0.025739 0.324523 0.187500 0.215826 0.205625 0.373655 191.235696 0.833896
In [12]:
umap_transformed= umap.UMAP(random_state=42,n_components=3).fit_transform(train_data)
umap_transformed=pd.DataFrame(umap_transformed)
actualDistribution(umap_transformed,true_labels,true_encoded_labels)
plotAllClusterModels(umap_transformed,'audio-results4/chroma_muslim_features_vowel.csv-umap')
Actual Distribution of the labels in the dataset: --> 
Counter({'a': 159, 'u': 47, 'cantdecide': 13, 'misaligned': 13, 'other': 8})
Counter({0: 159, 4: 47, 1: 13, 2: 13, 3: 8})
audio-results4/chroma_muslim_features_vowel.csv-umap_kmeans_labels.npy
K-Means
predicted_labels--> Counter({0: 55, 4: 53, 2: 46, 3: 45, 1: 41})
 2D representation
 3D representation
Agglomerative
predicted_labels--> Counter({0: 60, 1: 48, 3: 44, 4: 41, 2: 31, 5: 16})
 2D representation
 3D representation
Birch
predicted_labels--> Counter({4: 34, 1: 32, 7: 31, 3: 25, 8: 23, 6: 20, 0: 17, 2: 17, 5: 16, 9: 13, 10: 12})
 2D representation
 3D representation
DBSCAN
predicted_labels--> Counter({0: 53, -1: 43, 6: 17, 1: 16, 3: 16, 4: 15, 2: 14, 8: 14, 9: 13, 10: 9, 5: 8, 12: 6, 7: 6, 11: 6, 13: 4})
 2D representation
 3D representation
OPTICS
predicted_labels--> Counter({-1: 49, 8: 30, 3: 26, 5: 24, 4: 19, 7: 18, 2: 17, 9: 16, 6: 16, 0: 13, 1: 12})
 2D representation
 3D representation
MEAN-SHIFT
predicted_labels--> Counter({0: 95, 1: 73, 2: 72})
 2D representation
 3D representation
Gaussian-Mixture
predicted_labels--> Counter({3: 71, 1: 54, 0: 44, 4: 40, 2: 31})
 2D representation
 3D representation
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: