# 1.4 案例：广告预测、房价预测

1147-柳同学

## 热门标签

,

### 一、广告预测

# 广告预测——岭回归、LASSO回归
import numpy as np
import pandas as pd
import warnings
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error

# 设置随机数种子
np.random.seed(1)

# 获取数据
data = data.values
X = data[:, :-1]
y = data[:, -1]
return X, y

# 画销量预测图
def plotSalePred(name,y_pred):
x = np.arange(len(y_test))
# 图像中显示中文
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False

fig = plt.figure(facecolor='w')
fig.subplots()
plt.plot(x,y_pred,'r-',lw=2,label = u'预测数据')
plt.plot(x,y_test,'b-',lw=2,label = u'真实数据')
plt.title(u'%s回归预测销量'%(name),fontsize=18)
plt.legend(loc = 'best')
plt.grid()
plt.show()

if __name__ == '__main__':
# 消除警告
warnings.filterwarnings(action='ignore')
# 设置精度
np.set_printoptions(suppress=True)
# 加载数据
# 数据集分割
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

# 岭回归模型
rg_model = Ridge()
param_test = {'alpha': np.logspace(-3, 2, 10),
'normalize': [True, False]}
# 5折交叉验证
rg_gv_model = GridSearchCV(estimator=rg_model, param_grid=param_test, cv=5)
# 训练
rg_gv_model.fit(X_train, y_train)
print('最佳的参数模型为', rg_gv_model.best_params_)
print('最好模型的评分为', rg_gv_model.best_score_)
# 预测
order = y_test.argsort(axis=0)  # 排序
# print(order)
y_test = y_test[order]
X_test = X_test[order, :]
y_rg_pred = rg_gv_model.predict(X_test)
# 评价
print('Ridge mse =', mean_squared_error(y_test, y_rg_pred))
# 画图
plotSalePred('ridge',y_rg_pred)

# LASSO模型
la_model = Lasso()
param_test = {'alpha': np.logspace(-3, 2, 10),
'normalize': [True, False]}
la_gv_model = GridSearchCV(estimator=la_model, param_grid=param_test, cv=5)
la_gv_model.fit(X_train, y_train)
print('最佳的模型参数:', la_gv_model.best_params_)
print('最好模型的评分为', la_gv_model.best_score_)

# 预测
y_la_pred = la_gv_model.predict(X_test)
print('LASSO mse = ', mean_squared_error(y_test, y_la_pred))
# 画图
plotSalePred('LASSO',y_la_pred)

Ridge mse = 1.9403340921989263

LASSO mse =  2.0055878106274507


### 二、房价预测

# 波士顿房价预测
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import ElasticNetCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

# 设置随机数种子
np.random.seed(1)

# 获取数据
X = np.array(data.data)  # 特征值
y = np.array(data.target).reshape(-1, 1)  # 目标值
return X, y

if __name__ == '__main__':
# 消除警告
warnings.filterwarnings(action='ignore')
# 设置精度
np.set_printoptions(suppress=True)
# 加载数据
# 数据集分割
# random_state为随机数种子，为0时，产生不同的随机数
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=0)

# 线性模型
model = Pipeline([
('ss', StandardScaler()),
('poly', PolynomialFeatures(degree=3, include_bias=True)),
('linear', ElasticNetCV(alphas=np.logspace(-3, 2, 10),
l1_ratio=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
fit_intercept=False, cv=3))
])
# 拟合模型
model.fit(X_train, y_train)
# 获得模型的参数
linear = model.get_params('linear')['linear']
print('超参数 =', linear.alpha_)
print('l1_ratio_ = ', linear.l1_ratio_)

# 预测
order = y_test.argsort(axis=0)
y_test = y_test[order].reshape(len(y_test),-1)
X_test = X_test[order, :].reshape(len(y_test),-1)
y_ev_pred = model.predict(X_test)
# 评价
print('ElasticNet MSE=', mean_squared_error(y_test, y_ev_pred.reshape(-1,1)))
print('ElasticNet R2=', r2_score(y_test, y_ev_pred))

# 画图
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False

t = np.arange(len(y_test))  # 样本编号
fig = plt.figure(facecolor='w')
fig.subplots()
plt.plot(t, y_test, 'r-', lw=2, label=u'真实值')
plt.plot(t, y_ev_pred, 'r-', lw=2, label=u'估计值')
plt.legend(loc='best')
plt.title('波士顿房价预测', fontsize=18)
plt.xlabel('样本编号', fontsize=15)
plt.ylabel('房屋价格', fontsize=15)
plt.grid()
plt.show()

l1_ratio_ =  1.0
ElasticNet MSE= 15.945312578376676
ElasticNet R2= 0.8085003721420014


Vieu3.3主题

Q Q 登 录