# KNN鸢尾花

1138-魏同学

## 热门标签

KNN分类

import matplotlib.pylab as pyb
%matplotlib inline
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets

X,y = datasets.load_iris(True)#：True的意思是仅仅返回X，Y
#4个属性，4维空间，
X.shape#150代表样本数量


#降维，切片法
X= X[:,:2]
X.shape


pyb.scatter(X[:,0],X[:,1],c=y)#画图横坐标0，纵坐标1;设置c=y就是给不同类别不同的颜色


#上述数据准备好
#算法
knn = KNeighborsClassifier(n_neighbors=5)
#使用150个样本点作为训练数据
knn.fit(X,y)


import numpy as np
#获取测试数据
#横坐标4~8  ；纵坐标 2~4.5
#调用meshgrid把背景点取出来
x1 = np.linspace(4,8,100)
y1 = np.linspace(2,4.5,80)
X1,Y1 = np.meshgrid(x1,y1)
display(X1.shape,Y1.shape)
pyb.scatter(X1,Y1)


#因为训练数据是（150，2）所以测试数据要是（#，2）
#在X中取一个数，在Y中取一个数，组成一个样本
#X1 = X1.reshape(-1,1)
#Y1 = Y1.reshape(-1,1)
#进行级联
#X_test = np.concatenate([X1,Y1],axis = 1)

#另一种方法，平铺法
X_test = np.c_[X1.ravel(),Y1.ravel()]
X_test.shape


#预测

y_ = knn.predict(X_test)
y_
#y_.shape()


from matplotlib.colors import ListedColormap
#设置颜色
lc = ListedColormap(['#FFAAAA','#AAFFAA','#AAAAFF'])
lc2 = ListedColormap(['#FF0000','#00FF00','#0000FF'])

pyb.scatter(X_test[:,0],X_test[:,1],c=y_,cmap = lc)#测试集上色,#XIAN HUA BEI JING
pyb.scatter(X[:,0],X[:,1],c=y,cmap = lc2)#训练集上色


# 画轮廓面/另一种方法画图
#因为X1和Y1是80和100，所以要对y_进行reshape
#y_.shape()
pyb.contourf(X1,Y1,y_.reshape(80,100),cmap = lc)
pyb.scatter(X[:,0],X[:,1],c=y,cmap = lc2)#训练集上色


## KNN调参、参数的筛选

import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
#model_selection:模型选择
#cross_val_score   cross:交叉；validation：验证（测试）
#交叉验证
from sklearn.model_selection import cross_val_score

X,y = datasets.load_iris(True)
X.shape


knn = KNeighborsClassifier()
score = cross_val_score(knn,X,y,scoring='accuracy',cv=10)
score.mean()


#画出误差
errors = []
for k in range(1,14):
knn = KNeighborsClassifier(n_neighbors=k)
score = cross_val_score(knn,X,y,scoring='accuracy',cv =6).mean()
#误差越小越好,K越合适
errors.append(1-score)

#画图
import  matplotlib.pyplot as plt
%matplotlib inline
plt.plot(np.arange(1,14),errors)



#看一下weights对分数的影响
weights = ['uniform','distance']
for w in weights:
knn = KNeighborsClassifier(n_neighbors=11,weights=w)
score = cross_val_score(knn,X,y,scoring='accuracy',cv =6).mean()
#误差越小越好,K越合适
print(score)


#两个参数用一个代码进行筛选：两个for循环
result = {}
for k in range(1,14):
for w in weights:
knn = KNeighborsClassifier(n_neighbors=k,weights=w)
score = cross_val_score(knn,X,y,scoring='accuracy',cv =6).mean()
result[w+str(k)]= score
#误差越小越好,K越合适
result


#看最大值,转化成列表，找到索引
max(result.values())


#转化成列表，找到索引
np.array(list(result.values()))


np.array(list(result.values())).argmax()


list(result)[20]


Vieu3.3主题

Q Q 登 录