# 1.5 案例：多项式曲线拟合的比较

1147-柳同学

## 热门标签

,

# 线性回归、Ridge、LASSO、ElasticNet回归
import numpy as np
import pandas as pd
import warnings
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV

# 设置随机数种子
np.random.seed(0)

# 构造数据
def makedata():
N = 9  # 9个点
x = np.linspace(0, 6, N) + np.random.randn(N)  # 0~6等间隔数加上高斯噪声
x = np.sort(x)  # 排序
y = x ** 2 - 4 * x - 3 + np.random.randn(N)
# 设置成列向量
x = x.reshape(-1, 1)
y = y.reshape(-1, 1)
return x, y

# 计算xss
def xss(y, y_hat):
# 将数组展平
y = y.ravel()
y_hat = y_hat.ravel()
# R2
tss = np.sum(np.power(y - np.average(y, axis=0), 2))
rss = np.sum(np.power(y_hat - y, 2))
ess = np.sum(np.power(y_hat - np.average(y, axis=0), 2))
R2 = (tss - rss) / tss

# 添加到列表
tss_list.append(tss)
ess_list.append(ess)

# 皮尔逊相关系数
corr_coef = np.corrcoef(y, y_hat)[0, 1]
return R2, corr_coef

if __name__ == '__main__':
# 消除警告
warnings.filterwarnings(action='ignore')
# 设置输出样式——精度(不用科学计数法，用小数点来显示)、显示宽度
np.set_printoptions(suppress=True, linewidth=1000)
# 获取数据
N = 9
X, y = makedata()

# 模型
models = [
Pipeline([
('poly', PolynomialFeatures()),
('linear', LinearRegression(fit_intercept=False))
]),
Pipeline([
('poly', PolynomialFeatures()),
('linear', RidgeCV(alphas=np.logspace(-3, 2, 10), normalize=[True, False]))
]),
Pipeline([
('poly', PolynomialFeatures()),
('linear', LassoCV(alphas=np.logspace(-3, 2, 10), normalize=[True, False]))
]),
Pipeline([
('poly', PolynomialFeatures()),
('linear', ElasticNetCV(alphas=np.logspace(-3, 2, 10), l1_ratio=np.linspace(0.1, 1, 10)))
])
]

# 画图
mpl.rcParams['font.sans-serif'] = [u'simHei']
mpl.rcParams['axes.unicode_minus'] = False

fig = plt.figure(figsize=(18, 12), facecolor='w')
# 阶数的数组
d_pool = np.arange(1, 9, 1)
m = d_pool.size
# 设置渐变色
clrs = []
for i in np.linspace(16711680, 255, m):
c = int(i)
clrs.append('#%06x' % c)
# 设置线宽
line_width = np.linspace(5, 2, m)
# 设置标题
titles = u'线性回归', u'Ridge回归', u'LASSO回归', u'ElasticNet回归'

tss_list = []
ess_list = []

# 迭代画4个图
for t in range(4):
model = models[t]
plt.subplot(2, 2, t + 1)
plt.plot(X, y, 'ro', ms=10,zorder =N)

for i, d in enumerate(d_pool):
# 设置参数
model.set_params(poly__degree=d)
# 训练
model.fit(X, y.ravel())
# 获取参数
linear = model.get_params('linesr')['linear']
output = '%s : %d阶，系数为' % (titles[t], d)

# 判断linear中是否有这个属性
if hasattr(linear, 'alpha_'):
idx = output.find('系数')
output = output[:idx] + ('alpha = %.6f' % linear.alpha_) + output[idx:]
if hasattr(linear, 'l1_ratio_'):
idx = output.find('系数')
output = output[:idx] + ('l1_ratio = %.6f' % linear.l1_ratio_) + output[idx:]
print(output, linear.coef_.ravel())

# 预测
x_hat = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_hat = model.predict(x_hat)
s = model.score(X, y)
r2, corr_coef = xss(y, model.predict(X))
# print('r2和相关系数:', r2, corr_coef)

if d == 2:
z = N - 1
else:
z = 0
label = '%d阶，$R^2$=%.3f'%(d,r2)
if hasattr(linear,'l1_ratio_'):
label += 'l1_ratio = %.2f'%(linear.l1_ratio_)
# 画图
plt.plot(x_hat,y_hat,color=clrs[i],label=label,lw = line_width[i],zorder=z)

plt.legend(loc='best')
plt.grid(True)
plt.title(titles[t],fontsize=18)
plt.xlabel('X',fontsize=15)
plt.ylabel('Y',fontsize=15)

plt.tight_layout()
plt.suptitle('多项式曲线拟合比较',fontsize=22)
plt.show()


### TSS >= RSS + ESS

y_max = max(max(tss_list), max(ess_rss_list)) * 1.05
plt.figure(facecolor='w', figsize=(9, 7))
t = np.arange(len(tss_list))  # 样本编号
plt.plot(t, tss_list, 'ro-', lw=2, label='TSS')
plt.plot(t, ess_list, 'mo-', lw=1, label='ESS')
plt.legend(loc='best')
plt.xlabel('样本编号')
plt.ylabel('XSS的值', fontsize=15)
plt.grid(True)
plt.show()


Vieu3.3主题

Q Q 登 录