Classification of the sound with Machine Learning and visualization¶

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score,recall_score,confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33)
X_bin_train, X_bin_test, y_bin_train, y_bin_test = train_test_split(X, Y_bin, test_size = 0.33)

I - Binary classification¶

1) k Nearest Neighbour (k-NN)¶

Fitting¶

from sklearn.neighbors import KNeighborsClassifier

#creating odd list of K for KNN
#myList = list(range(1,50))

# subsetting just the odd ones
#neighbors = filter(lambda x: x % 2 != 0, myList)
neighbors = list(range(1,50,2))

# empty list that will hold cv scores
cv_scores = []

# perform 10-fold cross validation
for k in neighbors:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_bin_train, y_bin_train, cv=10, scoring='accuracy')
    cv_scores.append(scores.mean())
# changing to misclassification error
MSE = [1 - x for x in cv_scores]

# determining best k
optimal_k = neighbors[MSE.index(min(MSE))]
print("The optimal number of neighbors is %d" % optimal_k)

# plot misclassification error vs k
plt.plot(neighbors, MSE)
plt.xlabel('Number of Neighbors K')
plt.ylabel('Misclassification Error')
plt.show()

The optimal number of neighbors is 7

clf = KNeighborsClassifier(n_neighbors=optimal_k)
clf.fit(X_bin_train, y_bin_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=7, p=2,
           weights='uniform')

# Accuracy of the model
accuracy = clf.score(X_bin_test, y_bin_test)
print("Accuracy of the model: %s" % accuracy)

Accuracy of the model: 0.9297218155197657

import itertools
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix



def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

# Compute confusion matrix
y_bin_predict = clf.predict(X_bin_test)
cnf_matrix = confusion_matrix(y_bin_test, y_bin_predict)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[False,True],
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[False,True], normalize=True,
                      title='Normalized confusion matrix')

plt.show()

The classifier is not able to recognize most of the Rose-crested Blue Pipit species. It may be overfitting.

Predicting¶

print("Binary classification with kNN \n")
y_kasios = clf.predict(X_kasios)
for i in range(len(y_kasios)):
    print("Kasios song n°%s : %s" % (i+1, y_kasios[i]))

Binary classification with kNN 

Kasios song n°1 : False
Kasios song n°2 : False
Kasios song n°3 : False
Kasios song n°4 : False
Kasios song n°5 : False
Kasios song n°6 : False
Kasios song n°7 : False
Kasios song n°8 : False
Kasios song n°9 : True
Kasios song n°10 : False
Kasios song n°11 : False
Kasios song n°12 : False
Kasios song n°13 : False
Kasios song n°14 : False
Kasios song n°15 : False

The kNN classifier has detected the Kasios song n°9 as a Blue Pipit one.

2) Support Vector Machine (SVM)¶

Fitting¶

from sklearn import svm

x_list = np.linspace(2**-5, 2**5, 30)
accuracy_C = []
for x in x_list:
    model = svm.SVC(kernel='linear', C=x)
    model.fit(X_bin_train, y_bin_train)
    accuracy_C.append(model.score(X_bin_test, y_bin_test))
x = 0

# select the C with max accuracy
max_index = accuracy_C.index(max(accuracy_C))
C_chosed = x_list[max_index]

# we do the same thing for gamma
accuracy_gamma = []
for x in x_list:
    model = svm.SVC(kernel='linear', C=C_chosed, gamma=x)
    model.fit(X_bin_train, y_bin_train)
    accuracy_gamma.append(model.score(X_bin_test, y_bin_test))

# to find the C with the max accuracy
max_index = accuracy_gamma.index(max(accuracy_gamma))
gamma_chosed = x_list[max_index]  # the C that we will choose

# We can now define our SVM model
clf = svm.SVC(kernel='linear', C=C_chosed, gamma=gamma_chosed)
clf.fit(X_bin_train, y_bin_train)

SVC(C=0.03125, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.03125, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

# Accuracy of the model
accuracy = clf.score(X_bin_test, y_bin_test)
print("Accuracy of the model: %s" % accuracy)

Accuracy of the model: 0.8638360175695461

# Compute confusion matrix
y_bin_predict = clf.predict(X_bin_test)
cnf_matrix = confusion_matrix(y_bin_test, y_bin_predict)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[False,True],
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[False,True], normalize=True,
                      title='Normalized confusion matrix')

plt.show()

Predicting¶

y_kasios = clf.predict(X_kasios)
for i in range(len(y_kasios)):
    print("Kasios song n°%s : %s" % (i+1, y_kasios[i]))

Kasios song n°1 : False
Kasios song n°2 : True
Kasios song n°3 : False
Kasios song n°4 : False
Kasios song n°5 : False
Kasios song n°6 : False
Kasios song n°7 : False
Kasios song n°8 : False
Kasios song n°9 : True
Kasios song n°10 : True
Kasios song n°11 : False
Kasios song n°12 : False
Kasios song n°13 : False
Kasios song n°14 : False
Kasios song n°15 : False

The SVM classifier finds that the Kasios songs n°2, 9 and 10 are from Blue Pipit.

3) Random Forest¶

Fitting¶

from sklearn.ensemble import RandomForestClassifier

/home/sajeevan/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d

clf = RandomForestClassifier(n_estimators=100, max_depth=7)

clf.fit(X_bin_train, y_bin_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=7, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

Predicting¶

y_pred_train = clf.predict(X_bin_train)
y_pred_test = clf.predict(X_bin_test)

print(accuracy_score(y_bin_train, y_pred_train))
print("Accuracy: %s" % accuracy_score(y_bin_test,y_pred_test))

0.9848265895953757
Accuracy: 0.9341142020497804

# Compute confusion matrix
cnf_matrix = confusion_matrix(y_bin_test, y_pred_test)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[False,True],
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=[False,True], normalize=True,
                      title='Normalized confusion matrix')

plt.show()

y_kasios = clf.predict(X_kasios)

print("Classification with Random Forest \n")
for i in range(len(y_kasios)):
    print("Kasios song n°%s : %s"% (i+1, y_kasios[i]))

Classification with Random Forest 

Kasios song n°1 : False
Kasios song n°2 : False
Kasios song n°3 : False
Kasios song n°4 : False
Kasios song n°5 : False
Kasios song n°6 : False
Kasios song n°7 : False
Kasios song n°8 : False
Kasios song n°9 : False
Kasios song n°10 : False
Kasios song n°11 : False
Kasios song n°12 : False
Kasios song n°13 : False
Kasios song n°14 : False
Kasios song n°15 : False

There is no result from the Random Forest.
Conclusion: Using the binary classification, we can conclude that the Kasios n°9 song has a bigger probability to be a Rose-Crested Blue Pipit one than others. Let's verify if we can get a better result from the Multi Classification.

II - Multi classification¶

1) k Nearest Neighbour (k-NN)¶

Fitting¶

# subsetting just the odd ones
neighbors = list(range(1,50,2))
# empty list that will hold cv scores
cv_scores = []

# perform 10-fold cross validation
for k in neighbors:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='accuracy')
    cv_scores.append(scores.mean())
# changing to misclassification error
MSE = [1 - x for x in cv_scores]

# determining best k
optimal_k = neighbors[MSE.index(min(MSE))]
print("The optimal number of neighbors is %d" % optimal_k)

# plot misclassification error vs k
plt.plot(neighbors, MSE)
plt.xlabel('Number of Neighbors K')
plt.ylabel('Misclassification Error')
plt.show()

The optimal number of neighbors is 5

clf = KNeighborsClassifier(n_neighbors=optimal_k)
clf.fit(X_train, y_train)

# Accuracy of the model
accuracy = clf.score(X_test, y_test)
print("Accuracy of the model: %s" % accuracy)

Accuracy of the model: 0.42020497803806733

# Compute confusion matrix
y_predict = clf.predict(X_test)
cnf_matrix = confusion_matrix(y_test, y_predict)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure(figsize=(20,20))
plot_confusion_matrix(cnf_matrix, classes=Y.unique(),
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure(figsize=(20,20))
plot_confusion_matrix(cnf_matrix, classes=Y.unique(), normalize=True,
                      title='Normalized confusion matrix')

plt.show()

The normalized confusion matrix gives the similarities between species.

Predicting¶

y_kasios = clf.predict(X_kasios)
for i in range(len(y_kasios)):
    print("Kasios song n°%s : %s" % (i+1, y_kasios[i]))

Kasios song n°1 : Broad-winged Jojo
Kasios song n°2 : Rose-crested Blue Pipit
Kasios song n°3 : Ordinary Snape
Kasios song n°4 : Darkwing Sparrow
Kasios song n°5 : Bombadil
Kasios song n°6 : Bent-beak Riffraff
Kasios song n°7 : Scrawny Jay
Kasios song n°8 : Lesser Birchbeere
Kasios song n°9 : Rose-crested Blue Pipit
Kasios song n°10 : Orange Pine Plover
Kasios song n°11 : Canadian Cootamum
Kasios song n°12 : Orange Pine Plover
Kasios song n°13 : Rose-crested Blue Pipit
Kasios song n°14 : Darkwing Sparrow
Kasios song n°15 : Broad-winged Jojo

The kNN classifier finds that the Kasios songs n°2, 9 and 13 are from Blue Pipit.

2) Support Vector Machine (SVM)¶

Fitting¶

x_list = np.linspace(2**-5, 2**5, 30)
accuracy_C = []
for x in x_list:
    model = svm.SVC(kernel='linear', C=x)
    model.fit(X_train, y_train)
    accuracy_C.append(model.score(X_test, y_test))
x = 0

# select the C with max accuracy
max_index = accuracy_C.index(max(accuracy_C))
C_chosed = x_list[max_index]

# we do the same thing for gamma

accuracy_gamma = []
for x in x_list:
    model = svm.SVC(kernel='linear', C=C_chosed, gamma=x)
    model.fit(X_train, y_train)
    accuracy_gamma.append(model.score(X_test, y_test))

# to find the C with the max accuracy
max_index = accuracy_gamma.index(max(accuracy_gamma))
gamma_chosed = x_list[max_index]  # the C that we will choose

# We can now define our SVM model
clf = svm.SVC(kernel='linear', C=C_chosed, gamma=gamma_chosed)
clf.fit(X_train, y_train)

SVC(C=0.03125, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.03125, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

# Accuracy of the model
accuracy = clf.score(X_test, y_test)
print("Accuracy of the model: %s" % accuracy)

Accuracy of the model: 0.37920937042459735

# Compute confusion matrix
y_predict = clf.predict(X_test)
cnf_matrix = confusion_matrix(y_test, y_predict)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure(figsize=(20,20))
plot_confusion_matrix(cnf_matrix, classes=Y.unique(),
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure(figsize=(20,20))
plot_confusion_matrix(cnf_matrix, classes=Y.unique(), normalize=True,
                      title='Normalized confusion matrix')

plt.show()

Predicting¶

y_kasios = model.predict(X_kasios)
for i in range(len(y_kasios)):
    print("Kasios song n°%s : %s" % (i+1, y_kasios[i]))

Kasios song n°1 : Bent-beak Riffraff
Kasios song n°2 : Rose-crested Blue Pipit
Kasios song n°3 : Ordinary Snape
Kasios song n°4 : Darkwing Sparrow
Kasios song n°5 : Pinkfinch
Kasios song n°6 : Green-tipped Scarlet Pipit
Kasios song n°7 : Carries Champagne Pipit
Kasios song n°8 : Lesser Birchbeere
Kasios song n°9 : Darkwing Sparrow
Kasios song n°10 : Orange Pine Plover
Kasios song n°11 : Blue-collared Zipper
Kasios song n°12 : Orange Pine Plover
Kasios song n°13 : Rose-crested Blue Pipit
Kasios song n°14 : Darkwing Sparrow
Kasios song n°15 : Orange Pine Plover

The SVM classifier finds that the Kasios songs n°2 and 13 are from Blue Pipit.

3) Random Forest¶

Fitting¶

clf = RandomForestClassifier(n_estimators=100, max_depth=7)
clf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=7, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

Predicting¶

# Compute confusion matrix
y_pred_test = clf.predict(X_test)
cnf_matrix = confusion_matrix(y_test, y_pred_test)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure(figsize=(20,20))
plot_confusion_matrix(cnf_matrix, classes=Y.unique(),
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure(figsize=(20,20))
plot_confusion_matrix(cnf_matrix, classes=Y.unique(), normalize=True,
                      title='Normalized confusion matrix')

plt.show()

print(accuracy_score(y_train, y_pred_train))
print("Accuracy: %s" % accuracy_score(y_test,y_pred_test))

0.0
Accuracy: 0.5036603221083455

y_kasios = clf.predict(X_kasios)

print("Classification with Random Forest \n")
for i in range(len(y_kasios)):
    print("Kasios song n°%s : %s"% (i+1, y_kasios[i]))

Classification with Random Forest 

Kasios song n°1 : Bent-beak Riffraff
Kasios song n°2 : Rose-crested Blue Pipit
Kasios song n°3 : Darkwing Sparrow
Kasios song n°4 : Darkwing Sparrow
Kasios song n°5 : Orange Pine Plover
Kasios song n°6 : Green-tipped Scarlet Pipit
Kasios song n°7 : Scrawny Jay
Kasios song n°8 : Lesser Birchbeere
Kasios song n°9 : Rose-crested Blue Pipit
Kasios song n°10 : Orange Pine Plover
Kasios song n°11 : Queenscoat
Kasios song n°12 : Orange Pine Plover
Kasios song n°13 : Rose-crested Blue Pipit
Kasios song n°14 : Darkwing Sparrow
Kasios song n°15 : Orange Pine Plover

The Random Forest method finds that the Kasios songs n°2, 9 and 13 are from Blue Pipit. Conclusion:The multi classification gives a precise result than the binary one. It confirms that the Kasios song n°9 should be a Rose-Crested Blue Pipit song. Furthermore, we can add the n°2 and 13.

III - UMAP (visualization)¶

import umap

embedding = umap.UMAP(n_neighbors=17).fit_transform(pd.concat([X, X_kasios]))

/home/guillaume/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py:257: RuntimeWarning: invalid value encountered in sqrt
  return distances if squared else np.sqrt(distances, out=distances)

from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper
from bokeh.palettes import Category20
from bokeh.models import Legend

output_notebook()

df_umap = pd.DataFrame(embedding, columns=('x', 'y'))
df_umap['specie'] = [specie for specie in Y] + ['Kasios Test Bird' for i in range(15)]
df_umap['kasios_index'] = [None for i in Y] + [i for i in range(1, 16)]


plot_figure = figure(
    title='UMAP projection of the birds songs',
    plot_width=800,
    plot_height=600,
    tools=('pan, wheel_zoom, reset'), 
    toolbar_location='above'
)

color_mapping = CategoricalColorMapper(factors=df_umap['specie'].unique(),
                                       palette=Category20[20])

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <span style='font-size: 13px; color: #224499'>Specie:</span>
        <span style='font-size: 13px'>@specie</span>
    </div>
</div>
"""))

legend_items = []

for specie in df_umap['specie'].unique():
    datasource = ColumnDataSource(df_umap.loc[df_umap['specie'] == specie])
    if specie == 'Kasios Test Bird':
        c = plot_figure.circle('x', 'y', source=datasource, color='black',
                               line_alpha=0.7, fill_alpha=0.7, size=20)

        plot_figure.text('x', 'y', text='kasios_index', x_offset=-5, y_offset=8,
                         source=datasource, text_font_size='10pt', text_color='white')
    else:
        c = plot_figure.circle('x', 'y', source=datasource,
                               color=dict(field='specie', transform=color_mapping),
                               line_alpha=0.6, fill_alpha=0.6, size=5)
        
    legend_items.append((specie, [c]))
    

legend = Legend(items=legend_items, location=(30, 50))
legend.click_policy = 'hide'
plot_figure.add_layout(legend, 'right')

show(plot_figure)

plot_figure = figure(
    title='UMAP projection of the birds songs',
    plot_width=800,
    plot_height=600,
    tools=('pan, wheel_zoom, reset'), 
    toolbar_location='above'
)

color_mapping = CategoricalColorMapper(factors=df_umap['specie'].unique(),
                                       palette=Category20[20])

plot_figure.add_tools(HoverTool(tooltips="""
<div>
    <div>
        <span style='font-size: 13px; color: #224499'>Specie:</span>
        <span style='font-size: 13px'>@specie</span>
    </div>
</div>
"""))

legend_items = []

for specie in df_umap['specie'].unique():

    datasource = ColumnDataSource(df_umap.loc[df_umap['specie'] == specie])
    if specie == 'Kasios Test Bird':
        c = plot_figure.circle('x', 'y', source=datasource, color='black',
                               line_alpha=0.7, fill_alpha=0.7, size=20)

        plot_figure.text('x', 'y', text='kasios_index', x_offset=-5, y_offset=8,
                         source=datasource, text_font_size='10pt', text_color='white')
    else:
        c = plot_figure.circle('x', 'y', source=datasource,
                               color=dict(field='specie', transform=color_mapping),
                               muted_color=dict(field='specie', transform=color_mapping),
                               line_alpha=0.6, fill_alpha=0.6, size=5, muted_alpha=0.1)
        
    legend_items.append((specie, [c]))
    if specie != 'Rose-crested Blue Pipit': 
        c.muted = True
    
legend = Legend(items=legend_items, location=(30, 50))
legend.click_policy = 'hide'
plot_figure.add_layout(legend, 'right')

show(plot_figure)

By using the UMAP method, we can find that some Kasios sounds are in the same area as Rose-crested Blue Pipit (n°2, 9, 11, 13 and 14).

Conclusion: Through ML methods and the UMAP visualization , we can conclude that the Kasios sounds n°2, 9 and 13 are potentially from Rose-crested Blue Pipit.

	1900.0	1910.0	1920.0	1930.0	1940.0	1950.0	1960.0	1970.0	1980.0	1990.0	...	5900.0	5910.0	5920.0	5930.0	5940.0	5950.0	5960.0	5970.0	5980.0	5990.0
File ID
402254	-0.777891	-0.398350	-0.014216	-0.855008	2.075899	-0.221931	-0.655440	-1.191778	0.585549	0.236256	...	0.433391	0.306798	-1.075585	-0.452546	-0.197668	-0.996424	-0.379281	-0.244899	-0.400091	-0.611824
406171	-4.162661	-4.180349	-4.229848	-4.230835	-4.269890	-4.245544	-4.282711	-4.257561	-4.243178	-4.282526	...	-1.510778	-1.968467	-1.824303	-1.519034	-1.865900	-1.872201	-1.824038	-1.760304	-1.728515	-1.927553
405901	-0.580728	-0.633019	-0.707576	-0.645468	-0.603866	-0.725716	-0.672938	-0.657233	-0.662175	-0.732691	...	-0.300444	-0.458147	-0.586822	-0.149122	-0.337683	-0.258401	-0.292191	-0.543218	-0.267194	-0.134089
405548	-3.228091	-3.251748	-3.226915	-3.256686	-3.309930	-3.233574	-3.255813	-3.283560	-3.262860	-3.241711	...	7.121533	6.845443	6.915710	6.386549	5.541339	4.538464	2.270188	0.207980	1.365019	3.283181
401782	-3.740562	-3.741388	-3.765818	-3.748055	-3.741544	-3.737901	-3.745398	-3.741317	-3.749377	-3.750319	...	-2.814041	-2.563670	-2.758136	-2.482101	-2.802750	-2.417855	-2.802082	-2.927259	-2.785790	-2.790771

	1900.0	1910.0	1920.0	1930.0	1940.0	1950.0	1960.0	1970.0	1980.0	1990.0	...	5900.0	5910.0	5920.0	5930.0	5940.0	5950.0	5960.0	5970.0	5980.0	5990.0
ID
1	-3.529268	-3.329053	-3.513426	-3.523270	-3.362683	-3.481535	-3.410325	-3.471149	-3.358526	-3.415705	...	-3.231955	-3.377782	-3.150853	-3.322742	-3.141581	-3.317984	-3.318084	-3.201280	-3.373770	-3.249895
2	-7.770975	-7.789180	-7.807659	-7.812153	-7.826096	-7.787444	-7.798715	-7.757632	-7.766777	-7.717729	...	2.070107	1.240989	0.342733	1.253810	-0.095410	0.345641	-0.313179	-0.376685	-0.871546	-0.830625
3	-10.823887	-10.997442	-10.907166	-10.946253	-10.891168	-10.957196	-11.014657	-10.992509	-10.977279	-11.082591	...	-7.309365	-7.188007	-8.339784	-6.470934	-6.471953	-7.001865	-8.281127	-7.377273	-7.894899	-7.307045
4	-7.564831	-7.604928	-7.853263	-7.667681	-7.446895	-7.664611	-7.868092	-7.727359	-7.579639	-7.770928	...	-6.860031	-6.743433	-6.865163	-6.472720	-6.746985	-6.491604	-6.479721	-6.478345	-6.351524	-6.770690
5	13.974970	13.250154	12.210977	12.134556	11.516458	11.235900	10.998921	10.493838	10.029321	9.747640	...	-3.994871	-3.862566	-3.970654	-4.009346	-3.824077	-4.031627	-3.842823	-3.883048	-4.091320	-3.786353

Classification of the sound with Machine Learning and visualization¶

Table of Contents

Import and load data¶

I - Binary classification¶

1) k Nearest Neighbour (k-NN)¶

Fitting¶

Predicting¶

2) Support Vector Machine (SVM)¶

Fitting¶

Predicting¶

3) Random Forest¶

Fitting¶

Predicting¶

II - Multi classification¶

1) k Nearest Neighbour (k-NN)¶

Fitting¶

Predicting¶

2) Support Vector Machine (SVM)¶

Fitting¶

Predicting¶

3) Random Forest¶

Fitting¶

Predicting¶

III - UMAP (visualization)¶