发布时间:2023-10-04 09:30
最小二乘法线性回归模型
# 导入样本生成器
from sklearn.datasets import make_sparse_uncorrelated
# 导入最小二乘法线性回归模型
from sklearn.linear_model import LinearRegression
# 导入岭线性回归模型
from sklearn.linear_model import Ridge
# 导入拆分训练集的API
from sklearn.model_selection import train_test_split as tsplit
` from sklearn.metrics import mean_squared_error
# 导入复相关系数评估函数
from sklearn.metrics import r2_score
# 导入中位数绝对误差评估函数
from sklearn.metrics import median_absolute_error
import matplotlib.pyplot as plt
import numpy as np
# 生成有4个特征维的100个样本
X, y = make_sparse_uncorrelated(n_samples=100, n_features=4)
X_train, X_test, y_train, y_test = tsplit(X, y, test_size=0.1)
# 实例化最小二乘法线性回归模型
reg = LinearRegression()
# 训练模型
reg.fit(X_train, y_train)
# 预测
y_pred = reg.predict(X_test)
# 预测结果
y_pred
# 实际结果
y_test
# 均方误差
mean_squared_error(y_test, y_pred)
# 复相关系数
r2_score(y_test, y_pred)
# 中位数绝对误差
median_absolute_error(y_test, y_pred)
plt.rcParams[\'font.sans-serif\'] = [\'FangSong\']
plt.rcParams[\'axes.unicode_minus\'] = False
plt.subplot(121)
plt.title(\'残差图\')
plt.plot(y_pred - y_test, \'o\')
plt.plot(np.array([0, 9]), np.array([0, 0]))
plt.xlabel(\'测试样本序号\')
plt.ylabel(\'残差:预测值-实际值\')
plt.subplot(122)
plt.title(\'实际值-预测值\')
plt.plot(y_test, y_pred, \'o\')
y_range = np.linspace(y_test.min(), y_test.max(), 100)
plt.plot(y_range, y_range)
plt.xlabel(\'实际值\')
plt.ylabel(\'预测值\')
plt.show()
```
岭回归模型
from sklearn import linear_model
# 样本特征相关性强
X = np.array([[0, 0], [0, 0], [1, 1]])
y = np.array([0, .1, 1])
# 实例化最小二乘法线性回归模型
reg_linear = linear_model.LinearRegression()
# 实例化岭回归模型
reg_ridge = linear_model.Ridge(alpha=0.5)
# 训练
reg_linear.fit(X, y)
reg_ridge.fit(X, y)
# 受样本的特征强相关性影响,回归结果明显异常
reg_linear.coef_
reg_linear.intercept_
# alpha参数很好的控制了系数的收缩量
reg_ridge.coef_
reg_ridge.intercept_
正规方程
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression
# 获取数据
data = load_boston()
# 数据集划分
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, random_state=22)
# 特征工程
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 机器学习
estimator = LinearRegression()
estimator.fit(x_train, y_train)
# 模型评估
y_predict = estimator.predict(x_test)
y_predict
estimator.coef_
estimator.intercept_
# 均方误差
mean_squared_error(y_test, y_predict)
梯度下降法
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression
# 获取数据
data = load_boston()
# 数据集划分
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, random_state=22)
# 特征工程
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 机器学习
estimator = SGDRegressor(max_iter=1000)
estimator.fit(x_train, y_train)
# 模型评估
y_predict = estimator.predict(x_test)
y_predict
estimator.coef_
estimator.intercept_
mean_squared_error(y_test, y_predict)