Skip to content

Instantly share code, notes, and snippets.

View hucodelab's full-sized avatar

Hugo R V Angulo hucodelab

View GitHub Profile
# let's see the test accuracy
for k, v in test_accuracy.items():
print("test accuracy for {} n_neighbors is {} %".format(k,round(v*100,3)))
test accuracy for 1 n_neighbors is 94.406 %
test accuracy for 5 n_neighbors is 95.804 %
test accuracy for 10 n_neighbors is 95.105 %
test accuracy for 20 n_neighbors is 96.503 %
test accuracy for 50 n_neighbors is 93.007 %
test accuracy for 100 n_neighbors is 93.706 %
# let's test several parameters
n_neighbors = [1,5,10,20,50,100]
test_accuracy = {}
train_accuracy = {}
for n in n_neighbors:
clf = KNeighborsClassifier(n_neighbors = n)
clf.fit(X_train,y_train)
test_acc=clf.score(X_test,y_test)
# let's see the model's parameters
clf.get_params()
{'algorithm': 'auto',
'leaf_size': 30,
'metric': 'minkowski',
'metric_params': None,
'n_jobs': None,
'n_neighbors': 5,
'p': 2,
# evaluate the model's accuracy
training_acc=clf.score(X_train, y_train)
test_acc=clf.score(X_test,y_test)
# Let's see what the accuracy is
training_acc, test_acc
(0.9436619718309859, 0.958041958041958)
# split the data
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target)
# building and training the model
clf = KNeighborsClassifier(n_neighbors = 5)
clf.fit(X_train,y_train)
df_cancer.columns
Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
'mean smoothness', 'mean compactness', 'mean concavity',
'mean concave points', 'mean symmetry', 'mean fractal dimension',
'radius error', 'texture error', 'perimeter error', 'area error',
'smoothness error', 'compactness error', 'concavity error',
'concave points error', 'symmetry error', 'fractal dimension error',
'worst radius', 'worst texture', 'worst perimeter', 'worst area',
'worst smoothness', 'worst compactness', 'worst concavity',
# load a toy dataframe from https://scikit-learn.org/stable/datasets/toy_dataset.html
cancer = load_breast_cancer()
# convert the data into a numpy array
cancer_data = np.array(cancer.data)
# converting to pandas dataframe
df_cancer = pd.DataFrame(cancer_data, columns=cancer.feature_names)
df_cancer.head()
# importing libraries
import sklearn
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score