发布时间:2023-04-07 19:00
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim
import warnings
warnings.filterwarnings(\"ignore\")
%matplotlib inline
features = pd.read_csv(\"D:/DataSet/temps.csv\")
features.head()
print(\'数据维度:\',features.shape)
#数据维度: (348, 9)
#处理时间数据
import datetime
# 分别得到年,月,日
years = features[\'year\']
months = features[\'month\']
days = features[\'day\']
# datetime格式
dates = [str(int(year)) + \'-\' + str(int(month)) + \'-\' + str(int(day)) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, \'%Y-%m-%d\') for date in dates]
# 独热编码
features = pd.get_dummies(features)
features.head(5)
# 标签
labels = np.array(features[\'actual\'])
# 在特征中去掉标签
features= features.drop(\'actual\', axis = 1)
# 名字单独保存一下,以备后患
feature_list = list(features.columns)
# 转换成合适的格式
features = np.array(features)
features.shape
from sklearn import preprocessing
input_features = preprocessing.StandardScaler().fit_transform(features)
#x 和 y转换成想要的tensor格式
x = torch.tensor(input_features,dtype=float)
y = torch.tensor(labels,dtype=float)
#权重参数初始化
weights = torch.randn((14,128),dtype = float, requires_grad = True)
bases = torch.randn(128,dtype = float, requires_grad = True)
weights2 = torch.randn((128,1),dtype = float, requires_grad = True)
bases2 = torch.randn(1,dtype = float, requires_grad = True)
#学习率 损失
learning_rate = 0.001
losses = []
for i in range(1000):
#计算隐层
hidden = x.mm(weights)+bases
#加入激活函数(除了最后一层/输出层,连上带权重参数的层(基本神经网络的全连接层和卷积神经网络的卷积层)都需要在结果之后 加上激活函数)
hidden = torch.relu(hidden) #得到隐层
#预测结果
predictions = hidden.mm(weights2) + bases2
#-------------------------------------------------------以上为前向传播计算
#计算损失
loss = torch.mean((predictions-y)**2)
losses.append(loss.data.numpy())
#打印损失值
if i%100 == 0:
print(\'loss:\',loss)
#反向传播计算
loss.backward()
#更新参数 梯度下降
weights.data.add_(- learning_rate * weights.grad.data)
bases.data.add_(- learning_rate * bases.grad.data)
weights2.data.add_(- learning_rate * weights2.grad.data)
bases2.data.add_(- learning_rate * bases2.grad.data)
#每次迭代都要记得清空 否则梯度会累加
weights.grad.data.zero_()
bases.grad.data.zero_()
weights2.grad.data.zero_()
bases2.grad.data.zero_()
#定义神经网络
input_size = input_features.shape[1]
hidden_size = 128
output_size = 1
batch_size = 16
my_nn = torch.nn.Sequential(
torch.nn.Linear(input_size,hidden_size),
torch.nn.Sigmoid(),
torch.nn.Linear(hidden_size,output_size)
)
cost = torch.nn.MSELoss(reduction = \'mean\') #损失函数
optimizer = torch.optim.Adam(my_nn.parameters(),lr=0.001) #优化器 Adam/SGD
#训练网络
losses = []
for i in range(1000):
batch_loss = []
#MINI-Batch方法来进行训练 每次取16个数据
for start in range(0,len(input_features),batch_size):
end = start + batch_size if start + batch_size < len(input_features) else len(input_features)
xx = torch.tensor(input_features[start:end],dtype=torch.float,requires_grad = True)
yy = torch.tensor(labels[start:end],dtype=torch.float,requires_grad = True)
prediction = my_nn(xx)
loss = cost(prediction,yy)
optimizer.zero_grad() #每次梯度清零
loss.backward(retain_graph=True)
optimizer.step()#实时更新
batch_loss.append(loss.data.numpy())
#打印损失
if i%100==0:
losses.append(np.mean(batch_loss))
print(i,np.mean(batch_loss))
x = torch.tensor(input_features,dtype = torch.float) #转成tensor格式
predict = my_nn(x).data.numpy() #转成nmupy格式,用matlib工具包画图
# 转换日期格式
dates = [str(int(year)) + \'-\' + str(int(month)) + \'-\' + str(int(day)) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, \'%Y-%m-%d\') for date in dates]
# 创建一个表格来存日期和其对应的标签数值
true_data = pd.DataFrame(data = {\'date\': dates, \'actual\': labels})
# 同理,再创建一个来存日期和其对应的模型预测值
months = features[:, feature_list.index(\'month\')]
days = features[:, feature_list.index(\'day\')]
years = features[:, feature_list.index(\'year\')]
test_dates = [str(int(year)) + \'-\' + str(int(month)) + \'-\' + str(int(day)) for year, month, day in zip(years, months, days)]
test_dates = [datetime.datetime.strptime(date, \'%Y-%m-%d\') for date in test_dates]
predictions_data = pd.DataFrame(data = {\'date\': test_dates, \'prediction\': predict.reshape(-1)})
# 真实值
plt.plot(true_data[\'date\'], true_data[\'actual\'], \'b-\', label = \'actual\')
# 预测值
plt.plot(predictions_data[\'date\'], predictions_data[\'prediction\'], \'ro\', label = \'prediction\')
plt.xticks(rotation = \'60\');
plt.legend()
# 图名
plt.xlabel(\'Date\'); plt.ylabel(\'Maximum Temperature (F)\');
plt.title(\'Actual and Predicted Values\');