最近发现了一份不错的源代码,作者使用 PyTorch 实现了如今主流的卷积神经网络 CNN 框架,包含了 12 中模型架构。所有代码使用的数据集是 CIFAR。
CNN 经典论文
该项目实现的是主流的 CNN 模型,涉及的论文包括:
1. CNN 模型(12 篇)
(lenet) LeNet-5, convolutional neural networks
(alexnet) ImageNet Classification with Deep Convolutional Neural Networks
(vgg) Very Deep Convolutional Networks for Large-Scale Image Recognition
(resnet) Deep Residual Learning for Image Recognition
(preresnet) Identity Mappings in Deep Residual Networks
(resnext) Aggregated Residual Transformations for Deep Neural Networks
(densenet) Densely Connected Convolutional Networks
(senet) Squeeze-and-Excitation Networks
(bam) BAM: Bottleneck Attention Module
(cbam) CBAM: Convolutional Block Attention Module
(genet) Gather-Excite: Exploiting Feature Context in Convolutional Neural Networks
(sknet) SKNet: Selective Kernel Networks
2. 正则化(3 篇)
(shake-shake) Shake-Shake regularization
(cutout) Improved Regularization of Convolutional Neural Networks with Cutout
(mixup) mixup: Beyond Empirical Risk Minimization
3. 学习速率调度器(2 篇)
(cos_lr) SGDR: Stochastic Gradient Descent with Warm Restarts
(htd_lr) Stochastic Gradient Descent with Hyperbolic-Tangent Decay on Classification
1. 需求
- Python >= 3.5
PyTorch >= 0.4
其它依赖项 (pyyaml, easydict, tensorboardX)
pip install -r requirements.txt
2. 模型代码
作者将所有的模型都存放在 model 文件夹下,我们来看一下 PyTorch 实现的 ResNet 网络结构:
# -*-coding:utf-8-*- import math import torch import torch.nn as nn import torch.nn.functional as F __all__ = ['resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202'] def conv3x3(in_planes, out_planes, stride=1): "3x3 convolution with padding" return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) class BasicBlock(nn.Module): expansion = 1 def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv_1 = conv3x3(inplanes, planes, stride) self.bn_1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv_2 = conv3x3(planes, planes) self.bn_2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv_1(x) out = self.bn_1(out) out = self.relu(out) out = self.conv_2(out) out = self.bn_2(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class Bottleneck(nn.Module): expansion = 4 def __init__(self, inplanes, planes, stride=1, downsample=None): super(Bottleneck, self).__init__() self.conv_1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn_1 = nn.BatchNorm2d(planes) self.conv_2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn_2 = nn.BatchNorm2d(planes) self.conv_3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) self.bn_3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride def forward(self, x): residual = x out = self.conv_1(x) out = self.bn_1(out) out = self.relu(out) out = self.conv_2(out) out = self.bn_2(out) out = self.relu(out) out = self.conv_3(out) out = self.bn_3(out) if self.downsample is not None: residual = self.downsample(x) out += residual out = self.relu(out) return out class ResNet(nn.Module): def __init__(self, depth, num_classes, block_name='BasicBlock'): super(ResNet, self).__init__() # Model type specifies number of layers for CIFAR-10 model if block_name == 'BasicBlock': assert ( depth - 2) % 6 == 0, 'depth should be 6n+2, e.g. 20, 32, 44, 56, 110, 1202' n = (depth - 2) // 6 block = BasicBlock elif block_name == 'Bottleneck': assert ( depth - 2) % 9 == 0, 'depth should be 9n+2, e.g. 20, 29, 47, 56, 110, 1199' n = (depth - 2) // 9 block = Bottleneck else: raise ValueError('block_name shoule be Basicblock or Bottleneck') self.inplanes = 16 self.conv_1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False) self.bn_1 = nn.BatchNorm2d(16) self.relu = nn.ReLU(inplace=True) self.stage_1 = self._make_layer(block, 16, n) self.stage_2 = self._make_layer(block, 32, n, stride=2) self.stage_3 = self._make_layer(block, 64, n, stride=2) self.avgpool = nn.AvgPool2d(8) self.fc = nn.Linear(64 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): # nn.init.xavier_normal(m.weight.data) nn.init.kaiming_normal_(m.weight.data) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes)) return nn.Sequential(*layers) def forward(self, x): x = self.conv_1(x) x = self.bn_1(x) x = self.relu(x) # 32x32 x = self.stage_1(x) # 32x32 x = self.stage_2(x) # 16x16 x = self.stage_3(x) # 8x8 x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x def resnet20(num_classes): return ResNet(depth=20, num_classes=num_classes) def resnet32(num_classes): return ResNet(depth=32, num_classes=num_classes) def resnet44(num_classes): return ResNet(depth=44, num_classes=num_classes) def resnet56(num_classes): return ResNet(depth=56, num_classes=num_classes) def resnet110(num_classes): return ResNet(depth=110, num_classes=num_classes) def resnet1202(num_classes): return ResNet(depth=1202, num_classes=num_classes)
3. 使用
## 1 GPU for lenet CUDA_VISIBLE_DEVICES=0 python -u train.py --work-path ./experiments/cifar10/lenet ## resume from ckpt CUDA_VISIBLE_DEVICES=0 python -u train.py --work-path ./experiments/cifar10/lenet --resume ## 2 GPUs for resnet1202 CUDA_VISIBLE_DEVICES=0,1 python -u train.py --work-path ./experiments/cifar10/preresnet1202 ## 4 GPUs for densenet190bc CUDA_VISIBLE_DEVICES=0,1,2,3 python -u train.py --work-path ./experiments/cifar10/densenet190bc
我们使用 yaml 文件 config.yaml 保存参数,查看 ./experimets 中的任何文件以了解更多详细信息。您可以通过 tensorboard 中 tensorboard –logdir path-to-event –port your-port 查看训练曲线。培训日志将通过日志转储,请检查您工作路径中的 log.txt。
模型在 CIFAR 数据集上的结果
1. 12 种 CNN 模型:
2. 正则化
默认的数据扩充方法是 RandomCrop+RandomHorizontalLip+Normalize,而 √ 表示采用哪种附加方法。
PS:Shake_Resnet26_2X64d 通过剪切和混合达到 97.71% 的测试精度!很酷,对吧?
3. 不同的学习速率调度器
