# 极限森林

1138-魏同学

,

## 极限森林

from sklearn.ensemble import ExtraTreesClassifier,RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#决策树，进行裂分是根据信息增益最大进行裂分
#极限森林：样本随机，分裂条件随机（不是分类最好的条件）


X,y = datasets.load_wine(True)
clf = DecisionTreeClassifier()
cross_val_score(clf,X,y,cv = 6,scoring='accuracy').mean()#交叉验证

forest = RandomForestClassifier(n_estimators =100)
cross_val_score(forest,X,y,cv = 6,scoring='accuracy').mean()#交叉验证

extra = ExtraTreesClassifier(n_estimators =100,)
cross_val_score(extra,X,y,cv = 6,scoring='accuracy').mean()#交叉验证


## 梯度提升的原理

import numpy as np
#梯度
#引入回归，回归是分类的极限思想
#分类的列别多到一定程度，就可以看作回归
from sklearn import datasets
from sklearn.model_selection import train_test_split

X,y = datasets.load_iris(True)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)

gbdt = GradientBoostingClassifier(n_estimators = 10)
gbdt.fit(X_train,y_train)
gbdt.score(X_test,y_test)

#梯度提升的原理

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import tree

#X:数据  上网时间和购物金额
#y:目标值
X = np.array([[800,3],[1200,1],[1800,4],[2500,2]])
y = np.array([14,16,24,26])

python
gbdt.fit(X,y)

#使用回归去学习
gbdt.fit(X,y)
gbdt.predict(X)
plt.figure(figsize=(9,6))
tree.plot_tree(gbdt[0,0],filled = True,feature_names = ['消费','上网'])


## 梯度下降

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

f = lambda x : (x-3)**2 + 2.5*x - 7.5
f

x = np.linspace(-2,5,100)
y = f(x)
plt.plot(x,y)

python
#导数函数
d = lambda x:2*(x-3) + 2.5

#学习率
learning_rate = 0.1

min_value = np.random.randint(-3,5,size = 1)[0]
print(min_value)
#退出条件
min_value_last = min_value +0.1

#tollerence容忍度，误差在万分之一，任务结束
tol = 0.0001

while True:
if np.abs(min_value-min_value_last)< tol:
break
#梯度下降
min_value_last = min_value
#更新值:梯度下降
min_value = min_value - learning_rate*d(min_value)
print(min_value)
print(min_value)
`

Vieu3.3主题

Q Q 登 录