发布时间:2024-04-02 13:01
原创申明: 未经许可,禁止以任何形式转载,若要引用,请标注链接地址
全文共计33000字,阅读大概需要5分钟
欢迎关注我的个人公众号:不懂开发的程序猿
先检查电脑配置:NVIDIA RTX2060 + i7-10750H
我这里使用的是Windows操作系统 + 集成显卡:Inter® UHD Graphics + 独立显卡:NVIDIA RTX2060 ,以下实验基于此配置
安装的链接地址:Anacodan安装包官网地址https://repo.anaconda.com/archive/
这里我选择的是Anaconda3-5.2.0-Windows-x86_64,根据自己的操作系统选对应的版本,下载下来,右键管理员运行安装
Windows开始菜单管理员运行Anaconda Prompt控制台看到有base说明安装成功,输入conda --version
创建项目仓库,建议不要将项目的配置环境都装在base根目录仓库下,因为每个项目的配置环境不一样,如果项目运行是一样的环境就都用一样的解释器来运行,不一样的运行环境就建立不同的仓库,命令如下:
输入conda create -n pytorch python=3.6
创建pytorch仓库,选择y进行安装
输入:nvidia-smi
检查显卡配置
官网链接地址https://pytorch.org/
下拉网页,选择配置如下图,
运行命令:conda install pytorch torchvision torchaudio cudatoolkit=10.2 -c pytorch
选y确认安装,如下图。
安装过程大概10分钟
输入如下命令,没有报错,说明torch能正常调用显卡驱动
python
import torch
torch.cuda.is_available()
官网下载地址https://www.jetbrains.com/pycharm/download/#section=windows
傻瓜式安装即可,社区版和专业版都行,如果有edu邮箱的建议用一下专业版,白嫖它不香吗
新建项目如图:
Location:我选择的是E:\code\learn_pytorch
使用的虚拟环境是:E:\code\learn_pytorch\venv
Base 解释器:E:\Anaconda3-5.2.0\python.exe
测试pyCharm能否正确解析pytorch:在控制台输入
import torch
torch.cuda.is_available()
运行Anaconda Prompt控制台,输入
conda activate pytorch
conda install nb_conda
提示输入y,等待安装大概5分钟
输入:jupyter notebook
然后会跳转网页如图:
点击右边的new --> Python[conda env:pytorch],新建一个终端
如果出现如下报错,删掉提示中的路径下的pythoncom36.dll文件即可
在Terminal中输入:
pip install tensorboard
tensorboard --logdir=logs --port=6007
pip install opencv-python
蚂蚁蜜蜂/练手数据集:链接:https://pan.baidu.com/s/1jZoTmoFzaTLWh4lKBHVbEA
密码: 5suq
解压下载好的数据集,粘贴在项目里
新建项目,编写测试代码
# test_tensorboard.py
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from PIL import Image
writer = SummaryWriter("logs")
image_path = "data/train/bees_image/16838648_415acd9e3f.jpg"
img_PIL = Image.open(image_path)
# 转换成numpy型数组
img_array = np.array(img_PIL)
print(type(img_array))
print(img_array.shape)
writer.add_image("test", img_array, 2, dataformats='HWC')
# y = x
for i in range(100):
writer.add_scalar("y=2x", 3*i, i)
writer.close()
在Terminal中输入:
如果不–port=6007,就使用系统默认指定的6006端口
tensorboard --logdir=logs --port=6007
# read_data.py
from torch.utils.data import Dataset
#import cv2
from PIL import Image
import os
class MyData(Dataset):
def __init__(self, root_dir, label_dir):
# 申明全局变量
self.root_dir = root_dir
self.label_dir = label_dir
# 获取路径
self.path = os.path.join(self.root_dir,self.label_dir)
#获取图片列表
self.img_path = os.listdir(self.path)
def __getitem__(self, idx):
# 获取对应索引位置的图片的名字
img_name = self.img_path[idx]
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
# 读取图片
img = Image.open(img_item_path)
# 读取标签
label = self.label_dir
return img, label
def __len__(self):
return len(self.img_path)
# 读取数据集
root_dir = "dataset/train"
ants_label_dir = "ants"
bees_label_dir = "bees"
ants_dataset = MyData(root_dir, ants_label_dir)
bees_dataset = MyData(root_dir, bees_label_dir)
train_dataset = ants_dataset + bees_dataset
# rename_dataset.py
import os
root_dir = "dataset/train"
target_dir = "ants_image"
img_path = os.listdir(os.path.join(root_dir, target_dir))
label = target_dir.split('_')[0]
out_dir = "ants_label"
for i in img_path:
file_name = i.split('_')[0]
with open(os.path.join(root_dir, out_dir, "{}.txt".format(file_name)), 'w') as f:
f.write(label)
# useful_Transforms.py
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
writer = SummaryWriter("logs")
img = Image.open("images/fb93a.jpeg")
print(img)
# ToTensor的使用
trans_totensor = transforms.ToTensor()
img_tensor = trans_totensor(img)
writer.add_image("ToTensor", img_tensor)
# Normalize() 归一化
# [channel] = (input[channel] - mean[channel]) / std[channel]
print(img_tensor[0][0][0])
trans_norma = transforms.Normalize([9, 6, 3], [1, 4, 7])
img_norma = trans_norma(img_tensor)
print(img_tensor[0][0][0])
writer.add_image("Normalize", img_norma, 2)
# Resize
print(img.size)
trans_resize = transforms.Resize((512, 512))
# img -> resize -> img_resize PIL
img_resize = trans_resize(img)
# img_resize PIL -> totensor -> img_resize tensor
img_resize = trans_totensor(img_resize)
writer.add_image("Resize", img_resize, 0)
print(img_resize)
# Compose - resize - 2
trans_resize_2 = transforms.Resize(512)
# PIL -> PIL -> tensor
trans_compose = transforms.Compose([trans_resize_2, trans_totensor])
img_resize_2 = trans_compose(img)
writer.add_image("Resize", img_resize_2, 1)
writer.close()
# Transforms.py
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
# tensor的数据类型
# 通过transforms.ToTensor调用
# 2.为什么需要tensor数据类型
# 绝对路径 E:\code\learn_pytorch\data\train\ants_image\0013035.jpg Windows操作系统会把 \ 当做转义字符,不推荐使用绝对路径
# 相对路径 data/train/ants_image/0013035.jpg
img_path = "data/train/ants_image/0013035.jpg"
img = Image.open(img_path)
writer = SummaryWriter("logs")
# 1.transforms该如何使用
tensor_trans = transforms.ToTensor()
tensor_img = tensor_trans(img)
writer.add_image("Tensor_image", tensor_img)
writer.close()
CIFAR-10是一个更接近普适物体的彩色图像数据集。CIFAR-10 是由Hinton 的学生Alex Krizhevsky 和Ilya Sutskever 整理的一个用于识别普适物体的小型数据集。一共包含10 个类别的RGB 彩色图片:飞机( airplane )、汽车( automobile )、鸟类( bird )、猫( cat )、鹿( deer )、狗( dog )、蛙类( frog )、马( horse )、船( ship )和卡车( truck )。
每个图片的尺寸为32 × 32 ,每个类别有6000个图像,数据集中一共有50000 张训练图片和10000 张测试图片。
# dataset_transforms.py
import torchvision
from torch.utils.tensorboard import SummaryWriter
dataset_transforms = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])
train_set = torchvision.datasets.CIFAR10(root="./dataset", train=True, transform=dataset_transforms, download=True)
test_set = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=dataset_transforms, download=True)
# print(test_set[0])
# print(test_set.classes)
#
# img, target = test_set[0]
# print(img)
# print(target)
# print(test_set.classes[3])
# img.show()
writer = SummaryWriter("logs")
for i in range(10):
img, target = test_set[i]
writer.add_image("test_set", img, i)
writer.close()
# dataloader.py
import torchvision
from torch.utils.data import DataLoader
# 准备测试数据集
from torch.utils.tensorboard import SummaryWriter
test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor())
test_loader = DataLoader(dataset=test_data, batch_size=64, shuffle=True, num_workers=0, drop_last=True)
#测试数据集中第一张图片及target
img, target = test_data[0]
print(img.shape)
print(target)
writer = SummaryWriter("logs")
for epoch in range(2):
step =0
for data in test_loader:
imgs, targets = data
writer.add_images("Epoch: {}".format(epoch), imgs, step)
step += 1
writer.close()
shuffle=False 每次都是一样的取
shuffle=True 随机抽取
# nn_module.py
# @Time: 2022/1/13 21:42
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
class NeuralNetwork(nn.Module):
def __init__(self) -> None:
super().__init__()
def forward(self, input):
output = input + 1
return output
neuralnetwork = NeuralNetwork()
x = torch.tensor(1.0)
output = neuralnetwork(x)
print(output)
# nn_conv.py
# @Time: 2022/1/13 22:08
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torch.nn.functional as F
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
[0, 1, 0],
[2, 1, 0]])
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))
print(input.shape)
print(kernel.shape)
output = F.conv2d(input, kernel, stride=1)
print(output)
output2 = F.conv2d(input, kernel, stride=2)
print(output2)
output3 = F.conv2d(input, kernel, stride=1, padding=1)
print(output3)
# src/nn_conv2d.py
# @Time: 2022/1/13 22:37
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self,x):
x = self.conv1(x)
return x
neuralnetwork = NeuralNetwork()
# print(neuralnetwork)
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data
output = neuralnetwork(imgs)
print(imgs.shape)
print(output.shape)
# torch.Size([64, 3, 32, 32])
writer.add_images("input", imgs, step)
# torch.Size([64, 6, 30, 30])
output = torch.reshape(output, (-1, 3, 30, 30))
writer.add_images("output", output, step)
step += 1
writer.close()
# nn_maxpool.py
# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import MaxPool2d
input = torch.tensor([[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))
print(input.shape)
class NueralNetwork(nn.Module):
def __init__(self):
super(NueralNetwork, self).__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
nueralnetwork = NueralNetwork()
output = nueralnetwork(input)
print(output)
tensor([[[[2., 3.],
[5., 1.]]]])
ceil_mode=False
tensor([[[[2.]]]])
# @Time: 2022/1/14 9:53
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import MaxPool2d
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
# input = torch.tensor([[1, 2, 0, 3, 1],
# [0, 1, 2, 3, 1],
# [1, 2, 1, 0, 0],
# [5, 2, 3, 1, 1],
# [2, 1, 0, 1, 1]], dtype=torch.float32)
#
# input = torch.reshape(input, (-1, 1, 5, 5))
# print(input.shape)
class NueralNetwork(nn.Module):
def __init__(self):
super(NueralNetwork, self).__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)
def forward(self, input):
output = self.maxpool1(input)
return output
nueralnetwork = NueralNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, targets = data
writer.add_images("input", imgs, step)
output = nueralnetwork(imgs)
writer.add_images("output", output, step)
step += 1
writer.close()
# output = nueralnetwork(input)
#
# print(output)
import torch
import torchvision
from torch import nn
from torch.nn import ReLU
from torch.utils.data import DataLoader
input = torch.tensor([[1, -0.5],
[-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
# tensor([[[[ 1.0000, -0.5000],
[-1.0000, 3.0000]]]])
class NerualNetwork(nn.Module):
def __init__(self):
super(NerualNetwork, self).__init__()
self.relu1 = ReLU()
def forward(self, input):
output = self.relu1(input)
return output
nerualnetwork = NerualNetwork()
output = nerualnetwork(input)
print(output)
tensor([[[[1., 0.],
[0., 3.]]]])
tensorboard里面 step 不从0 开始,是图片显示的问题, 用命令:
tensorboard --logdir=logs --samples_per_plugin images=1000
# @Time: 2022/1/14 10:34
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import ReLU, Sigmoid
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
input = torch.tensor([[1, -0.5],
[-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))
print(input.shape)
print(input)
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64, )
class NerualNetwork(nn.Module):
def __init__(self):
super(NerualNetwork, self).__init__()
self.relu1 = ReLU()
self.sigmod1 = Sigmoid()
def forward(self, input):
output = self.sigmod1(input)
return output
nerualnetwork = NerualNetwork()
writer = SummaryWriter("../logs")
step = 0
for data in dataloader:
imgs, target = data
writer.add_images("input", imgs, global_step=step)
output = nerualnetwork(imgs)
writer.add_images("output", output, global_step=step)
step += 1
writer.close()
Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift .
A simple lookup table that stores embeddings of a fixed dictionary and size.
# nn_linear.py
# @Time: 2022/1/14 11:48
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)
class NerualNetwork(nn.Module):
def __init__(self):
super(NerualNetwork, self).__init__()
self.linear1 = Linear(196608, 10)
def forward(self, input):
output = self.linear1(input)
return output
nerualnetwork = NerualNetwork()
for data in dataloader:
imgs, targets =data
print(imgs.shape)
# torch.Size([64, 3, 32, 32])
# output = torch.reshape(imgs, (1, 1, 1, -1))
output = torch.flatten(imgs)
print(output.shape)
# torch.Size([196608])
output = nerualnetwork(output)
print(output.shape)
# torch.Size([10])
# nn_seq.py
# @Time: 2022/1/14 12:57
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.tensorboard import SummaryWriter
class NueralNetwork(nn.Module):
def __init__(self):
super(NueralNetwork, self).__init__()
# self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)
# self.maxpool1 = MaxPool2d(kernel_size=2)
# self.conv2 = Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)
# self.maxpool2 = MaxPool2d(kernel_size=2)
# self.conv3 = Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
# self.maxpool3 = MaxPool2d(kernel_size=2)
# self.flatten = Flatten()
# self.linear1 = Linear(in_features=1024, out_features=64)
# self.linear2 = Linear(in_features=64, out_features=10)
# 简单的写法
self.model1 = Sequential(
Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Flatten(),
Linear(in_features=1024, out_features=64),
Linear(in_features=64, out_features=10)
)
def forward(self, x):
# x = self.conv1(x)
# x = self.maxpool1(x)
# x = self.conv2(x)
# x = self.maxpool2(x)
# x = self.conv3(x)
# x = self.maxpool3(x)
# x = self.flatten(x)
# x = self.linear1(x)
# x = self.linear2(x)
x = self.model1(x)
return x
nueralnetwork = NueralNetwork()
print(nueralnetwork)
input = torch.ones((64, 3, 32, 32))
output = nueralnetwork(input)
print(output.shape)
writer = SummaryWriter("../logs")
writer.add_graph(nueralnetwork, input)
writer.close()
输出结果是:
NueralNetwork(
(model1): Sequential(
(0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Flatten(start_dim=1, end_dim=-1)
(7): Linear(in_features=1024, out_features=64, bias=True)
(8): Linear(in_features=64, out_features=10, bias=True)
)
)
torch.Size([64, 10])
# nn_loss_network.py
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
def __init__(self):
super(NueralNetwork, self).__init__()
self.model1 = Sequential(
Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Flatten(),
Linear(in_features=1024, out_features=64),
Linear(in_features=64, out_features=10)
)
def forward(self, x):
x = self.model1(x)
return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
for data in dataloader:
imgs, targets = data
outputs = nueralnetwork(imgs)
result_loss = loss(outputs, targets)
print(result_loss)
print(outputs)
print(targets)
# nn_optim.py
# @Time: 2022/1/14 20:21
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, download=True,
transform=torchvision.transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)
class NueralNetwork(nn.Module):
def __init__(self):
super(NueralNetwork, self).__init__()
self.model1 = Sequential(
Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2),
MaxPool2d(kernel_size=2),
Flatten(),
Linear(in_features=1024, out_features=64),
Linear(in_features=64, out_features=10)
)
def forward(self, x):
x = self.model1(x)
return x
loss = nn.CrossEntropyLoss()
nueralnetwork = NueralNetwork()
# 定义优化器
optim = torch.optim.SGD(nueralnetwork.parameters(), lr=0.01)
for epoch in range(20):
running_loss = 0.0
for data in dataloader:
imgs, targets = data
outputs = nueralnetwork(imgs)
result_loss = loss(outputs, targets)
# 梯度清零
optim.zero_grad()
result_loss.backward()
optim.step()
running_loss += result_loss
print(running_loss)
# model_pretrained.py
import torchvision
# train_data = torchvision.datasets.ImageNet("../data_image_net", split='train', download=True,
# transform=torchvision.transforms.ToTensor())
RuntimeError: The dataset is no longer publicly accessible. You need to download the archives externally and place them in the root directory.
from torch import nn
vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)
print(vgg16_true)
train_data = torchvision.datasets.CIFAR10('../data', train=True, transform=torchvision.transforms.ToTensor(),
download=True)
vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10))
print(vgg16_true)
print(vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)
数据集下载的位置:Downloading:
https://download.pytorch.org/models/vgg16-397923af.pth
to C:\Users\15718/.cache\torch\hub\checkpoints\vgg16-397923af.pth
***如果遇到数据集较大,网络不通畅,建议:复制下载链接,用迅雷下 ***
# model_save.py
import torch
import torchvision
from torch import nn
vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式一 模型结构 + 模型参数
torch.save(vgg16, "vgg16_method1.pth")
# 保存方式二 模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
# 陷阱
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)
def forward(self,x):
x = self.conv1(x)
return x
neuralnetwork = NeuralNetwork()
torch.save(neuralnetwork, "neuralnetwork_method1.pth")
# model_load.py
import torch
from model_save import *
# 方式一 --》 保存方式一,加载模型
model = torch.load("vgg16_method1.pth")
# print(model)
# 方式二 加载模型,字典格式的
model = torch.load("vgg16_method2.pth")
print(model)
# 方式二 加载模型,VGG格式的
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
print(vgg16)
# 陷阱
model = torch.load("neuralnetwork_method1.pth")
print(model)
# model.py
# @Time: 2022/1/15 9:03
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
from torch import nn
#搭建神经网络
class NurealNetwork(nn.Module):
def __init__(self):
super(NurealNetwork, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
nn.Linear(64*4*4, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model(x)
return x
if __name__ == '__main__':
nurealnetwork = NurealNetwork()
input = torch.ones((64, 3, 32, 32))
output = nurealnetwork(input)
print(output.shape)
# train.py
# @Time: 2022/1/15 8:43
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
# 准备数据集
import torch.optim
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from model import *
train_data = torchvision.datasets.CIFAR10(root="../dataset", train=True,transform=torchvision.transforms.ToTensor(),
download=True)
test_data = torchvision.datasets.CIFAR10(root="../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
train_data_size = len(train_data)
test_data_size = len(test_data)
# python中格式化字符串的写法,如果train_data_size = 10, 输出-->训练数据集的长度为:10
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))
# 利用DataLoader加载数据集
train_dataloader = DataLoader(train_data,batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)
# 创建神经网络
nurealnetwork = NurealNetwork()
# 损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化器
# learning_rate = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(nurealnetwork.parameters(), lr=learning_rate)
# 设置训练网络的一些参数
# 记录训练次数
total_train_step = 0
# 记录测试次数
total_test_step = 0
# 训练的轮数
epoch = 10
# 添加Tensorboard
writer = SummaryWriter("../logs_train")
for i in range(epoch):
print("------第 {} 轮训练开始------".format(i + 1))
# 训练开始
nurealnetwork.train()
for data in train_dataloader:
imgs, targets = data
outputs = nurealnetwork(imgs)
loss = loss_fn(outputs, targets)
# 优化器优化模型
# 梯度清零
optimizer.zero_grad()
# 反向传播得出每一个梯度
loss.backward()
# 对其中的参数进行优化
optimizer.step()
# 训练结束,total_train_step加一
total_train_step += 1
# 减少打印的量
if total_train_step % 100 == 0:
print("训练次数:{} , Loss: {}".format(total_train_step, loss.item()))
writer.add_scalar("train_loss", loss.item(), total_train_step)
#验证集
nurealnetwork.eval()
total_test_loss = 0
total_accuracy = 0
with torch.no_grad():
for data in test_dataloader:
imgs, targets = data
outputs = nurealnetwork(imgs)
# 比较输出与目标之间的差距
loss = loss_fn(outputs, targets)
total_test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy
print("整体测试集上的Loss:{}".format(total_test_loss))
print("整体测试集上的正确率:{}".format(total_accuracy / test_data_size))
writer.add_scalar("test_loss", total_test_loss, total_test_step)
writer.add_scalar("test_accracy", total_accuracy / test_data_size, total_test_step)
total_test_step += 1
# 保存模型
torch.save(nurealnetwork, "nurealnetwork_{}.pth".format(i))
#官方推荐保存方式
# torch.save(nurealnetwork.state_dict(), "nurealnetwork_{}.pth".format(i))
print("模型已保存")
writer.close()
CPU训练 vs GPU训练
# test.py
# @Time: 2022/1/15 15:40
# @Author: 金阳
# @Parameter:
# @Version: 1.0.1
import torch
import torchvision
from PIL import Image
from torch import nn
image_path = "../images/dog.png"
image = Image.open(image_path)
print(image)
image = image.convert("RGB")
transform =torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor()])
image =transform(image)
print(image.shape)
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
nn.Linear(64*4*4, 64),
nn.Linear(64, 10)
)
def forward(self, x):
x = self.model(x)
return x
model = torch.load("tudui_29_gpu.pth")
print(model)
image = torch.reshape(image, (1, 3, 32, 32))
model.eval()
with torch.no_grad():
image = image.cuda()
output = model(image)
print(output)
print(output.argmax(1))
下载代码:https://github.com/ultralytics/yolov5/tree/v6.0
安装,配置环境
pip install -r requirements.txt
如果安装依赖库太慢了或者失败了,建议在Anaconda命令行里,进入项目的对应仓库里,安装依赖库,命令如下
pip install 依赖库 -i https://pypi.tuna.tsinghua.edu.cn/simple
def parse_opt():
parser = argparse.ArgumentParser()
# 设置权重,可选预训练数据集模型有yolov5s.pt yolov5m.pt yolov5l.pt yolov5x.pt 依次增大,训练所需时间也相应更久,效果更好
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
# 设置训练的径在哪里
parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
# 默认指定了图片尺寸640px,
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
#指定是用CPU训练还是GPU训练,如果有多个GPU,可以同时开启
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
# 训练过程中可以预览图片,如果是训练视频则可以预览视频
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(FILE.stem, opt)
return opt
将batch_size = 32 调为4,不然显卡会带不动
测试集跑完整个视频,大概花了半个小时
本地跑不动的可以用Google colab 云端服务器跑
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
# 指定视频的路径
parser.add_argument('--source', type=str, default='data/video/movie.mp4', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
print(opt)
check_requirements(exclude=('pycocotools', 'thop'))
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
Annotations:包含了xml文件,描述了图片的各种信息,特别是标注出了目标的位置坐标
ImageSets:主要关注下main文件夹的内容,里面的文件包含了不同类别的训练/验证数据集图片名称
JPEGImages:原图片
SegmentatioClass、SegmenObject:语义分割
待更新
待更新
源码https://github.com/AIZOOTech/FaceMaskDetection
# -*- coding:utf-8 -*-
import cv2
import time
import argparse
import numpy as np
from PIL import Image
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bbox
from utils.nms import single_class_non_max_suppression
from load_model.pytorch_loader import load_pytorch_model, pytorch_inference
# model = load_pytorch_model('models/face_mask_detection.pth');
model = load_pytorch_model('models/model360.pth')
# anchor configuration
#feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]]
feature_map_sizes = [[45, 45], [23, 23], [12, 12], [6, 6], [4, 4]]
anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]]
anchor_ratios = [[1, 0.62, 0.42]] * 5
# generate anchors
anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)
# for inference , the batch size is 1, the model output shape is [1, N, 4],
# so we expand dim for anchors to [1, anchor_num, 4]
anchors_exp = np.expand_dims(anchors, axis=0)
id2class = {0: 'Mask', 1: 'NoMask'}
def inference(image,
conf_thresh=0.5,
iou_thresh=0.4,
target_shape=(160, 160),
draw_result=True,
show_result=True
):
'''
Main function of detection inference
:param image: 3D numpy array of image
:param conf_thresh: the min threshold of classification probabity.
:param iou_thresh: the IOU threshold of NMS
:param target_shape: the model input size.
:param draw_result: whether to daw bounding box to the image.
:param show_result: whether to display the image.
:return:
'''
# image = np.copy(image)
output_info = []
height, width, _ = image.shape
image_resized = cv2.resize(image, target_shape)
image_np = image_resized / 255.0 # 归一化到0~1
image_exp = np.expand_dims(image_np, axis=0)
image_transposed = image_exp.transpose((0, 3, 1, 2))
y_bboxes_output, y_cls_output = pytorch_inference(model, image_transposed)
# remove the batch dimension, for batch is always 1 for inference.
y_bboxes = decode_bbox(anchors_exp, y_bboxes_output)[0]
y_cls = y_cls_output[0]
# To speed up, do single class NMS, not multiple classes NMS.
bbox_max_scores = np.max(y_cls, axis=1)
bbox_max_score_classes = np.argmax(y_cls, axis=1)
# keep_idx is the alive bounding box after nms.
keep_idxs = single_class_non_max_suppression(y_bboxes,
bbox_max_scores,
conf_thresh=conf_thresh,
iou_thresh=iou_thresh,
)
for idx in keep_idxs:
conf = float(bbox_max_scores[idx])
class_id = bbox_max_score_classes[idx]
bbox = y_bboxes[idx]
# clip the coordinate, avoid the value exceed the image boundary.
xmin = max(0, int(bbox[0] * width))
ymin = max(0, int(bbox[1] * height))
xmax = min(int(bbox[2] * width), width)
ymax = min(int(bbox[3] * height), height)
if draw_result:
if class_id == 0:
color = (0, 255, 0)
else:
color = (255, 0, 0)
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
cv2.putText(image, "%s: %.2f" % (id2class[class_id], conf), (xmin + 2, ymin - 2),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color)
output_info.append([class_id, conf, xmin, ymin, xmax, ymax])
if show_result:
Image.fromarray(image).show()
return output_info
def run_on_video(video_path, output_video_name, conf_thresh):
cap = cv2.VideoCapture(video_path)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
# writer = cv2.VideoWriter(output_video_name, fourcc, int(fps), (int(width), int(height)))
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
if not cap.isOpened():
raise ValueError("Video open failed.")
return
status = True
idx = 0
while status:
start_stamp = time.time()
status, img_raw = cap.read()
img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
read_frame_stamp = time.time()
if (status):
inference(img_raw,
conf_thresh,
iou_thresh=0.5,
target_shape=(360, 360),
draw_result=True,
show_result=False)
cv2.imshow('image', img_raw[:, :, ::-1])
cv2.waitKey(1)
inference_stamp = time.time()
# writer.write(img_raw)
write_frame_stamp = time.time()
idx += 1
print("%d of %d" % (idx, total_frames))
print("read_frame:%f, infer time:%f, write time:%f" % (read_frame_stamp - start_stamp,
inference_stamp - read_frame_stamp,
write_frame_stamp - inference_stamp))
# writer.release()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Face Mask Detection")
parser.add_argument('--img-mode', type=int, default=1, help='set 1 to run on image, 0 to run on video.')
parser.add_argument('--img-path', type=str, default='img_test/test5.jpg', help='path to your image.')
parser.add_argument('--video-path', type=str, default='0', help='path to your video, `0` means to use camera.')
# parser.add_argument('--hdf5', type=str, help='keras hdf5 file')
args = parser.parse_args()
if args.img_mode:
imgPath = args.img_path
img = cv2.imread(imgPath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
inference(img, show_result=True, target_shape=(360, 360))
else:
video_path = args.video_path
if args.video_path == '0':
video_path = 0
run_on_video(video_path, '', conf_thresh=0.5)
图片检测结果:
python pytorch_infer.py --img-path /path/to/your/img
python pytorch_infer.py --img-mode 0 --video-path /path/to/video
# 如果要打开本地摄像头, video_path填写0就可以了,如下
python pytorch_infer.py --img-mode 0 --video-path 0