1.4 案例:广告预测、房价预测

1147-柳同学

发表文章数:589

首页 » 算法 » 正文

一、广告预测

# 广告预测——岭回归、LASSO回归
import numpy as np
import pandas as pd
import warnings
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error

# 设置随机数种子
np.random.seed(1)


# 获取数据
def loaddata():
    data = pd.read_csv('../data/Advertising.csv', header=0, index_col=0)
    data = data.values
    X = data[:, :-1]
    y = data[:, -1]
    return X, y


# 画销量预测图
def plotSalePred(name,y_pred):
    x = np.arange(len(y_test))
    # 图像中显示中文
    mpl.rcParams['font.sans-serif'] = [u'simHei']
    mpl.rcParams['axes.unicode_minus'] = False

    fig = plt.figure(facecolor='w')
    fig.subplots()
    plt.plot(x,y_pred,'r-',lw=2,label = u'预测数据')
    plt.plot(x,y_test,'b-',lw=2,label = u'真实数据')
    plt.title(u'%s回归预测销量'%(name),fontsize=18)
    plt.legend(loc = 'best')
    plt.grid()
    plt.show()


if __name__ == '__main__':
    # 消除警告
    warnings.filterwarnings(action='ignore')
    # 设置精度
    np.set_printoptions(suppress=True)
    # 加载数据
    X, y = loaddata()
    # 数据集分割
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

    # 岭回归模型
    rg_model = Ridge()
    param_test = {'alpha': np.logspace(-3, 2, 10),
                  'normalize': [True, False]}
    # 5折交叉验证
    rg_gv_model = GridSearchCV(estimator=rg_model, param_grid=param_test, cv=5)
    # 训练
    rg_gv_model.fit(X_train, y_train)
    print('最佳的参数模型为', rg_gv_model.best_params_)
    print('最好模型的评分为', rg_gv_model.best_score_)
    # 预测
    order = y_test.argsort(axis=0)  # 排序
    # print(order)
    y_test = y_test[order]
    X_test = X_test[order, :]
    y_rg_pred = rg_gv_model.predict(X_test)
    # 评价
    print('Ridge mse =', mean_squared_error(y_test, y_rg_pred))
    # 画图
    plotSalePred('ridge',y_rg_pred)


    # LASSO模型
    la_model = Lasso()
    param_test = {'alpha': np.logspace(-3, 2, 10),
                  'normalize': [True, False]}
    la_gv_model = GridSearchCV(estimator=la_model, param_grid=param_test, cv=5)
    la_gv_model.fit(X_train, y_train)
    print('最佳的模型参数:', la_gv_model.best_params_)
    print('最好模型的评分为', la_gv_model.best_score_)

    # 预测
    y_la_pred = la_gv_model.predict(X_test)
    print('LASSO mse = ', mean_squared_error(y_test, y_la_pred))
    # 画图
    plotSalePred('LASSO',y_la_pred)


最佳的参数模型为 {'alpha': 0.01291549665014884, 'normalize': True}
最好模型的评分为 0.8636586250535878
Ridge mse = 1.9403340921989263
最佳的模型参数: {'alpha': 0.01291549665014884, 'normalize': True}
最好模型的评分为 0.865719326575077
LASSO mse =  2.0055878106274507

1.4 案例:广告预测、房价预测
1.4 案例:广告预测、房价预测

二、房价预测

# 波士顿房价预测
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import ElasticNetCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score

# 设置随机数种子
np.random.seed(1)


# 获取数据
def loaddata():
    data = load_boston()
    X = np.array(data.data)  # 特征值
    y = np.array(data.target).reshape(-1, 1)  # 目标值
    return X, y


if __name__ == '__main__':
    # 消除警告
    warnings.filterwarnings(action='ignore')
    # 设置精度
    np.set_printoptions(suppress=True)
    # 加载数据
    X, y = loaddata()
    # 数据集分割
    # random_state为随机数种子,为0时,产生不同的随机数
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=0)

    # 线性模型
    model = Pipeline([
        ('ss', StandardScaler()),
        ('poly', PolynomialFeatures(degree=3, include_bias=True)),
        ('linear', ElasticNetCV(alphas=np.logspace(-3, 2, 10),
                                l1_ratio=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
                                fit_intercept=False, cv=3))
    ])
    # 拟合模型
    model.fit(X_train, y_train)
    # 获得模型的参数
    linear = model.get_params('linear')['linear']
    print('超参数 =', linear.alpha_)
    print('l1_ratio_ = ', linear.l1_ratio_)

    # 预测
    order = y_test.argsort(axis=0)
    y_test = y_test[order].reshape(len(y_test),-1)
    X_test = X_test[order, :].reshape(len(y_test),-1)
    y_ev_pred = model.predict(X_test)
    # 评价
    print('ElasticNet MSE=', mean_squared_error(y_test, y_ev_pred.reshape(-1,1)))
    print('ElasticNet R2=', r2_score(y_test, y_ev_pred))

    # 画图
    mpl.rcParams['font.sans-serif'] = [u'simHei']
    mpl.rcParams['axes.unicode_minus'] = False

    t = np.arange(len(y_test))  # 样本编号
    fig = plt.figure(facecolor='w')
    fig.subplots()
    plt.plot(t, y_test, 'r-', lw=2, label=u'真实值')
    plt.plot(t, y_ev_pred, 'r-', lw=2, label=u'估计值')
    plt.legend(loc='best')
    plt.title('波士顿房价预测', fontsize=18)
    plt.xlabel('样本编号', fontsize=15)
    plt.ylabel('房屋价格', fontsize=15)
    plt.grid()
    plt.show()


超参数 = 0.046415888336127795
l1_ratio_ =  1.0
ElasticNet MSE= 15.945312578376676
ElasticNet R2= 0.8085003721420014

1.4 案例:广告预测、房价预测

未经允许不得转载:作者:1147-柳同学, 转载或复制请以 超链接形式 并注明出处 拜师资源博客
原文地址:《1.4 案例:广告预测、房价预测》 发布于2021-01-07

分享到:
赞(0) 打赏

评论 抢沙发

评论前必须登录!

  注册



长按图片转发给朋友

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏

Vieu3.3主题
专业打造轻量级个人企业风格博客主题!专注于前端开发,全站响应式布局自适应模板。

登录

忘记密码 ?

您也可以使用第三方帐号快捷登录

Q Q 登 录
微 博 登 录