发布时间:2022-09-20 12:00
基于paddlepaddle2.0版本的搭建。最近飞桨2.0版本出来啦!也挺好用的,所以就参考一些其他版本的代码,用paddlepaddle2.0版本重新写一下deeplabv3+ResNet网络。这篇文章为deeplabv3中ASPP等模块的搭建,以及整合前面的ResNet网络,骨干网络部分可以看上一篇文章deeplabv3+系列之ResNet骨干网络
原论文地址:Rethinking Atrous Convolution for Semantic Image Segmentation
paddlepaddle2.0版本安装教程
## layers
## 一些子层
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
class ConvBNReLU(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding='same',
**kwargs):
super().__init__()
self._conv = nn.Conv2D(
in_channels, out_channels, kernel_size, padding=padding, **kwargs)
self._batch_norm = SyncBatchNorm(out_channels)
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
x = F.relu(x)
return x
class ConvBN(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding='same',
**kwargs):
super().__init__()
self._conv = nn.Conv2D(
in_channels, out_channels, kernel_size, padding=padding, **kwargs)
self._batch_norm = SyncBatchNorm(out_channels)
def forward(self, x):
x = self._conv(x)
x = self._batch_norm(x)
return x
class SeparableConvBNReLU(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding='same',
**kwargs):
super().__init__()
self.depthwise_conv = ConvBN(
in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
padding=padding,
groups=in_channels,
**kwargs)
self.piontwise_conv = ConvBNReLU(
in_channels, out_channels, kernel_size=1, groups=1)
def forward(self, x):
x = self.depthwise_conv(x)
x = self.piontwise_conv(x)
return x
## ASPPModule
#from paddle import nn
#from paddleseg.models import layers
class ASPPModule(nn.Layer):
"""
ASPP模块
"""
def __init__(self,
aspp_ratios, #1 6 12 18
in_channels,
out_channels,
align_corners,
use_sep_conv=False,
image_pooling=False):
super().__init__()
self.align_corners = align_corners
self.aspp_blocks = nn.LayerList()
#Features
#ASPPConv
for ratio in aspp_ratios:
if use_sep_conv and ratio > 1:
conv_func = SeparableConvBNReLU#3x3空洞卷积(不同系数)+bn+relu
else:
conv_func = ConvBNReLU#1x1卷积+bn+relu
block = conv_func(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1 if ratio == 1 else 3,
dilation=ratio,
padding=0 if ratio == 1 else ratio)
self.aspp_blocks.append(block)
out_size = len(self.aspp_blocks)
#ASPPpooling
if image_pooling:
#顺序容器。子Layer将按构造函数参数的顺序添加到此容器中。传递给构造函数的参数可以Layers或可迭代的name Layer元组。
self.global_avg_pool = nn.Sequential(
nn.AdaptiveAvgPool2D(output_size=(1, 1)),
ConvBNReLU(
in_channels, out_channels, kernel_size=1, bias_attr=False))
out_size += 1
self.image_pooling = image_pooling
self.conv_bn_relu = ConvBNReLU(
in_channels=out_channels * out_size,
out_channels=out_channels,
kernel_size=1)
self.dropout = nn.Dropout(p=0.1) # drop rate
def forward(self, x):
outputs = []
for block in self.aspp_blocks:
y = block(x)
y = F.interpolate(
y,
x.shape[2:],
mode='bilinear',
align_corners=self.align_corners)
outputs.append(y)
if self.image_pooling:
img_avg = self.global_avg_pool(x)
img_avg = F.interpolate(
img_avg,
x.shape[2:],
mode='bilinear',
align_corners=self.align_corners)
outputs.append(img_avg)
x = paddle.concat(outputs, axis=1)
x = self.conv_bn_relu(x)
x = self.dropout(x)
return x
class DeepLabV3Head(nn.Layer):
"""
deeplabv3Head模块:ASPP模块+分类模块
"""
def __init__(self, num_classes, backbone_indices, backbone_channels,
aspp_ratios, aspp_out_channels, align_corners):
super().__init__()
self.aspp = ASPPModule(
aspp_ratios,
backbone_channels[0],
aspp_out_channels,
align_corners,
use_sep_conv=False,
image_pooling=True)
self.cls = nn.Conv2D(
in_channels=aspp_out_channels,
out_channels=num_classes,
kernel_size=1)
self.backbone_indices = backbone_indices
def forward(self, feat_list):
logit_list = []
x = feat_list
x = self.aspp(x)
logit = self.cls(x)
logit_list.append(logit)
return logit_list
class DeepLabV3(nn.Layer):
"""
deeplabv3模块:骨干网络(ResNet)+deeplabv3Head模块+插值模块
"""
def __init__(self,
num_classes,
backbone_indices=(3, ),
aspp_ratios=(1, 6, 12, 18),
aspp_out_channels=256,
align_corners=False,
pretrained=None):
super().__init__()
backbone = ResNet(layers=101,duplicate_blocks=True)
backbone_channels = [backbone.out_channels[3] * 4]
self.head = DeepLabV3Head(num_classes, backbone_indices,
backbone_channels, aspp_ratios,
aspp_out_channels, align_corners)
self.align_corners = align_corners
self.pretrained = pretrained
self.init_weight()
backbone = ResNet(layers=101,duplicate_blocks=True)
self.layer1 = backbone.layer1
self.layer2 = backbone.layer2
self.layer3 = backbone.layer3
self.layer4 = backbone.layer4
self.layer5 = backbone.layer5
self.layer6 = backbone.layer6
self.layer7 = backbone.layer7
self.conv = backbone.conv
self.pool2d_max = backbone.pool2d_max
def forward(self, inputs):
x = self.conv(inputs)
x = self.pool2d_max(x)
#print(x.shape)
x = self.layer1(x)
#print(x.shape)
x = self.layer2(x)
#print(x.shape)
x = self.layer3(x)
#print(x.shape)
x = self.layer4(x)
#print(x.shape)
x = self.layer5(x)
x = self.layer6(x)
x = self.layer7(x)
logit_list = self.head(x)
return [
F.interpolate(
logit,
[inputs.shape[2],inputs.shape[3]],
#x.shape[2:],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
x_data = np.random.rand(2, 3, 224, 224).astype(np.float32)
x = to_variable(x_data)
model = DeepLabV3(59)
model.eval()
pred = model(x)
print('dilated resnet50: pred.shape = ', pred[0].shape)