Hugo R V Angulo hucodelab

## results
# let's see the test accuracy
for k, v in test_accuracy.items():
    print("test accuracy for {} n_neighbors is {} %".format(k,round(v*100,3)))

test accuracy for 1 n_neighbors is 94.406 %
test accuracy for 5 n_neighbors is 95.804 %
test accuracy for 10 n_neighbors is 95.105 %
test accuracy for 20 n_neighbors is 96.503 %
test accuracy for 50 n_neighbors is 93.007 %
test accuracy for 100 n_neighbors is 93.706 %

## testing_parameters
# let's test several parameters
n_neighbors = [1,5,10,20,50,100]

test_accuracy = {}
train_accuracy = {}

for n in n_neighbors:
    clf = KNeighborsClassifier(n_neighbors = n)
    clf.fit(X_train,y_train)
    test_acc=clf.score(X_test,y_test)

## parameters
# let's see the model's parameters
clf.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,

## evaluating
# evaluate the model's accuracy
training_acc=clf.score(X_train, y_train)
test_acc=clf.score(X_test,y_test)

# Let's see what the accuracy is
training_acc, test_acc
(0.9436619718309859, 0.958041958041958)

## model
# split the data
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target)

# building and training the model
clf = KNeighborsClassifier(n_neighbors = 5)
clf.fit(X_train,y_train)

## df_cancer.columns
df_cancer.columns

Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',

## loading
# load a toy dataframe from https://scikit-learn.org/stable/datasets/toy_dataset.html
cancer = load_breast_cancer()

# convert the data into a numpy array
cancer_data = np.array(cancer.data)

# converting to pandas dataframe
df_cancer = pd.DataFrame(cancer_data, columns=cancer.feature_names)

df_cancer.head()

## libraries
# importing libraries
import sklearn
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
	# let's see the test accuracy
	for k, v in test_accuracy.items():
	print("test accuracy for {} n_neighbors is {} %".format(k,round(v*100,3)))

	test accuracy for 1 n_neighbors is 94.406 %
	test accuracy for 5 n_neighbors is 95.804 %
	test accuracy for 10 n_neighbors is 95.105 %
	test accuracy for 20 n_neighbors is 96.503 %
	test accuracy for 50 n_neighbors is 93.007 %
	test accuracy for 100 n_neighbors is 93.706 %
	# let's test several parameters
	n_neighbors = [1,5,10,20,50,100]

	test_accuracy = {}
	train_accuracy = {}

	for n in n_neighbors:
	clf = KNeighborsClassifier(n_neighbors = n)
	clf.fit(X_train,y_train)
	test_acc=clf.score(X_test,y_test)
	# let's see the model's parameters
	clf.get_params()

	{'algorithm': 'auto',
	'leaf_size': 30,
	'metric': 'minkowski',
	'metric_params': None,
	'n_jobs': None,
	'n_neighbors': 5,
	'p': 2,
	# evaluate the model's accuracy
	training_acc=clf.score(X_train, y_train)
	test_acc=clf.score(X_test,y_test)

	# Let's see what the accuracy is
	training_acc, test_acc
	(0.9436619718309859, 0.958041958041958)
	# split the data
	X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, stratify=cancer.target)

	# building and training the model
	clf = KNeighborsClassifier(n_neighbors = 5)
	clf.fit(X_train,y_train)
	df_cancer.columns

	Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
	'mean smoothness', 'mean compactness', 'mean concavity',
	'mean concave points', 'mean symmetry', 'mean fractal dimension',
	'radius error', 'texture error', 'perimeter error', 'area error',
	'smoothness error', 'compactness error', 'concavity error',
	'concave points error', 'symmetry error', 'fractal dimension error',
	'worst radius', 'worst texture', 'worst perimeter', 'worst area',
	'worst smoothness', 'worst compactness', 'worst concavity',
	# load a toy dataframe from https://scikit-learn.org/stable/datasets/toy_dataset.html
	cancer = load_breast_cancer()

	# convert the data into a numpy array
	cancer_data = np.array(cancer.data)

	# converting to pandas dataframe
	df_cancer = pd.DataFrame(cancer_data, columns=cancer.feature_names)

	df_cancer.head()
	# importing libraries
	import sklearn
	import pandas as pd
	import numpy as np
	from sklearn.datasets import load_breast_cancer

	from sklearn.model_selection import train_test_split
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.metrics import accuracy_score