import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler,StandardScaler,normalize
import matplotlib.pyplot as plt
#importing the header files
from collections import Counter
from sklearn.manifold import TSNE
import time
import sklearn
from sklearn.decomposition import PCA
from sklearn import mixture
from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder
from sklearn.model_selection import GridSearchCV
import plotly.express as px
from collections import Counter
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import umap
from audio_results_util import load_dataset,plotClusters,encodedLabels,plotData3d,plotData2d,plotAllClusterModels,actualDistribution
%matplotlib inline
train_data,true_labels,true_encoded_labels=load_dataset('Audio_features_vowel/mfcc_ideology_features_vowel.csv')
results_df=pd.read_csv('audio-results4/mfcc_ideology_features_vowel.csv')
results_df
actualDistribution(train_data,true_labels,true_encoded_labels)
plotAllClusterModels(train_data,'audio-results4/mfcc_ideology_features_vowel.csv')
results_df=pd.read_csv('audio-results4/mfcc_ideology_features_vowel.csv-pca.csv')
results_df
pca_transformed=PCA(n_components=3).fit_transform(train_data)
pca_transformed=pd.DataFrame(pca_transformed)
actualDistribution(pca_transformed,true_labels,true_encoded_labels)
plotAllClusterModels(pca_transformed,'audio-results4/mfcc_ideology_features_vowel.csv-pca')
results_df=pd.read_csv('audio-results4/mfcc_ideology_features_vowel.csv-tsne.csv')
results_df
tsne_transformed=TSNE(n_components=3, n_jobs=-1).fit_transform(train_data)
tsne_transformed=pd.DataFrame(tsne_transformed)
actualDistribution(tsne_transformed,true_labels,true_encoded_labels)
plotAllClusterModels(train_data,'audio-results4/mfcc_ideology_features_vowel.csv-tsne')
results_df=pd.read_csv('audio-results4/mfcc_ideology_features_vowel.csv-umap.csv')
results_df
umap_transformed= umap.UMAP(random_state=42,n_components=3).fit_transform(train_data)
umap_transformed=pd.DataFrame(umap_transformed)
actualDistribution(umap_transformed,true_labels,true_encoded_labels)
plotAllClusterModels(umap_transformed,'audio-results4/mfcc_ideology_features_vowel.csv-umap')