Follow the pyTorch website’s simplified code for implementing Resnet

Guide package

import torch
import torch.nn as nn
from torch.hub import load_state_dict_from_url
Copy the code

Set of network download paths when using pre-trained parameters

model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth'.'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth'.'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth'.'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'.'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth'.'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth'.'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth'.'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth'.'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',}Copy the code

I’m going to use the 3 by 3 convolution, so I’m going to encapsulate it ahead of time

def conv3x3(in_planes, out_planes, stride=1, padding=1) :
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False) 
Copy the code

I’m going to use the 1 by 1 convolution, and I’m going to encapsulate it ahead of time

def conv1x1(in_planes, out_planes, stride=1) :
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
Copy the code

Building BasicBlock(core)

  • The modules below resnet-50 use Basicneck. We can see that the structure inside is the stack of 3 * 3 convolution – BN-relu – 3 * 3 convolution – Bn-relu. Pay attention to downsample, because our network is a residual network. X plus F of x, so we have to make sure that our original feature and the convolution feature have the same shape and number of channels in order to Add them together using Add
class BasicBlock(nn.Module) :
    expansion = 1  # Change in channel after Block

    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None) :
        # downsample: adjust dimensions to be consistent before adding
        # norm_layer: Batch normalization Layer
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d  If the BN layer is not custom, the standard BN layer is used
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x) :
        identity = x  # save x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)  # downsample adjusts x dimensions so that F(x)+x are the same

        This is the sum of the residuals
        out += identity
        out = self.relu(out)  Add and activate

        return out
Copy the code

Build BottleBlock(core)

  • In resnet-50 layer and above, all the modules use bottleneckers. The structure is 1 * 1 convolution – BN-3 * 3 convolution – Bn-1 * 1 convolution – bn-relu. Note downsample(iagain).
class Bottleneck(nn.Module) :
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None) :
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = norm_layer(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = norm_layer(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)  Planes * self.expansion
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x) :
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out
Copy the code

Building Resnet(Core)

class ResNet(nn.Module) :
    def __init__(self, block, layers, num_class=1000, norm_layer=None) :
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64

        # conv1 in ppt figure
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1.1))  # (1,1) equals GAP
        self.fc = nn.Linear(512 * block.expansion, num_class)

        Self.modules () gets each layer, determines which layer it is, and executes the corresponding initialization method
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    Note the use of downsample here
    def _make_layer(self, block, planes, blocks, stride=1) :
        Generate different stages/layers
        # block: block type(basic block/bottle block)
        # blocks: Number of blocks
        norm_layer = self._norm_layer
        downsample = None

        ifstride ! =1 orself.inplanes ! = planes * block.expansion:Need to adjust dimensions
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),  Adjust spatial(H x W) and channel simultaneously
                norm_layer(planes * block.expansion)
            )

        # use list + Sequential(*list) to build a network,
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, norm_layer))  # The first block is processed separately
        self.inplanes = planes * block.expansion  Record the channel changes of layerN. Please refer to the PPT Resnet table for details
        for _ in range(1, blocks):  # Start the loop at 1, because the first module has already been processed separately
            layers.append(block(self.inplanes, planes, norm_layer=norm_layer))
        return nn.Sequential(*layers)  # Use Sequential layers to combine blocks to form stages. If layers=[2,3,4], then *layers=?

    def forward(self, x) :
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x
Copy the code

Pre-built Resnet

  • This function is essentially a wrapper function that builds resNet, generating different models based on different parameters (layers, whether to pre-train, using blocks).
def _resnet(arch, block, layers, pretrained, progress, **kwargs) :
    model = ResNet(block, layers, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model
Copy the code

resnet18

def resnet18(pretrained=False, progress=True, **kwargs) :
    R "" "ResNet - 18 model from ` "Deep Residual Learning for Image Recognition" < https://arxiv.org/pdf/1512.03385.pdf > ` _ the Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """
    return _resnet('resnet18', BasicBlock, [2.2.2.2], pretrained, progress,
                   **kwargs)
Copy the code

Calling the presentation model

model = resnet18(pretrained=True)
model.eval(a)Copy the code

Use pyTorch’s own network and load it directly

import torch
model2 = torch.hub.load('pytorch/vision: v0.4.2'.'resnet18', pretrained=True)
# or any of these variants
# model = torch. Hub. Load (' PyTorch/Vision :v0.4.2', 'resnet34', pretrained=True)
# model = torch. Hub. Load (' PyTorch/Vision :v0.4.2', 'resnet50', pretrained=True)
# model = torch. Hub. Load (' PyTorch/Vision :v0.4.2', 'resnet101', pretrained=True)
# model = torch. Hub. Load (' PyTorch/Vision :v0.4.2', 'resnet152', pretrained=True)
model2.eval(a)The pretrained mode used above is the same as loading a ready-made model using load
model.state_dict()
model2.state_dict()
Copy the code

Test with pictures

# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/dog.jpg"."dog.jpg")
try: 
    urllib.URLopener().retrieve(url, filename)
except: 
    urllib.request.urlretrieve(url, filename)

# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)

# Preprocess
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485.0.456.0.406], std=[0.229.0.224.0.225]),
])
input_tensor = preprocess(input_image)

Add a dimension of Batch
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

Whether to use GPU for training
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
print(torch.nn.functional.softmax(output[0], dim=0))

result = torch.nn.functional.softmax(output[0], dim=0)

result.argmax()
Copy the code

Overall code

# guide package
import torch
import torch.nn as nn
from torch.hub import load_state_dict_from_url

Set of network download paths when using pre-trained parameters
model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth'.'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth'.'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth'.'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth'.'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth'.'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth'.'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth'.'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth'.'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',}# The next long will use 3 * 3 convolution, encapsulated in advance
def conv3x3(in_planes, out_planes, stride=1, padding=1) :
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=padding, bias=False)


#? Why no bias because we will be followed by a normalized BN layer after going through the convolutional layer, so even if bais is added, it will not play any role in increasing the parameters and increasing the training difficulty

# The next long will use 1 * 1 convolution, encapsulated in advance
def conv1x1(in_planes, out_planes, stride=1) :
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                     bias=False)  #? Why no bias: If the convolutional layer is followed by the BN layer, then bias parameters can be eliminated and memory can be saved


# All the modules below resnet-50 are basicneck modules. We can see that the structure inside is the stack of 3 * 3 convolution -BN-relu - 3 * 3 convolution -bn-relu. Pay attention to downsample, because our network is a residual network. X plus F of x, so we have to make sure that our original feature and the convolution feature have the same shape and number of channels in order to Add them together using Add
class BasicBlock(nn.Module) :
    expansion = 1  # Change in channel after Block

    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None) :
        # downsample: adjust dimensions to be consistent before adding
        # norm_layer: Batch normalization Layer
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d  If the BN layer is not custom, the standard BN layer is used
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x) :
        identity = x  # save x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)  # downsample adjusts x dimensions so that F(x)+x are the same

        This is the sum of the residuals
        out += identity
        out = self.relu(out)  Add and activate

        return out


# The resnet-50 layer and above are all blocks that use bottlenecties. We can see that the structure inside is 1 * 1 convolution - BN-3 * 3 convolution - Bn-1 * 1 convolution - bn-relu. Note downsample(same as above)
class Bottleneck(nn.Module) :
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None, norm_layer=None) :
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = norm_layer(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = norm_layer(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)  Planes * self.expansion
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x) :
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


# This section is the main part of Resnet, the first is the first layer, After a 7 * 7 convolutional layer - BN-relu - maxpooling-stagE2 - stage3 - stage4-stage5-levelling (AdaptiveAvgPool2d) - FC, Stage2-5 is constructed in the mode of _make_layer. Before connecting the full connection layer, AdaptiveAvgPool2d is used. In fact, a maximum value is selected for each layer and the maximum value of all layers is spliced into a vector, which can be directly calculated with the full connection layer.
class ResNet(nn.Module) :
    def __init__(self, block, layers, num_class=1000, norm_layer=None) :
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64

        # conv1 in ppt figure
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1.1))  # (1,1) equals GAP
        self.fc = nn.Linear(512 * block.expansion, num_class)

        Self.modules () gets each layer, determines which layer it is, and executes the corresponding initialization method
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    Note the use of downsample here
    def _make_layer(self, block, planes, blocks, stride=1) :
        Generate different stages/layers
        # block: block type(basic block/bottle block)
        # blocks: Number of blocks
        norm_layer = self._norm_layer
        downsample = None

        ifstride ! =1 orself.inplanes ! = planes * block.expansion:Need to adjust dimensions
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),  Adjust spatial(H x W) and channel simultaneously
                norm_layer(planes * block.expansion)
            )

        # use list + Sequential(*list) to build a network,
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, norm_layer))  # The first block is processed separately
        self.inplanes = planes * block.expansion  Record the channel changes of layerN. Please refer to the PPT Resnet table for details
        for _ in range(1, blocks):  # Start the loop at 1, because the first module has already been processed separately
            layers.append(block(self.inplanes, planes, norm_layer=norm_layer))
        return nn.Sequential(*layers)  # Use Sequential layers to combine blocks to form stages. If layers=[2,3,4], then *layers=?

    def forward(self, x) :
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x


# This function is actually a wrapper function to build ResNet, according to different parameters, generate different models
def _resnet(arch, block, layers, pretrained, progress, **kwargs) :
    model = ResNet(block, layers, **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(model_urls[arch],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model


# Resnet18 model
def resnet18(pretrained=False, progress=True, **kwargs) :
    R "" "ResNet - 18 model from ` "Deep Residual Learning for Image Recognition" < https://arxiv.org/pdf/1512.03385.pdf > ` _ the Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """
    return _resnet('resnet18', BasicBlock, [2.2.2.2], pretrained, progress,
                   **kwargs)


def resnet50(pretrained=False, progress=True, **kwargs) :
    R "" "ResNet - 50 model from ` "Deep Residual Learning for Image Recognition" < https://arxiv.org/pdf/1512.03385.pdf > ` _ the Args: pretrained (bool): If True, returns a model pre-trained on ImageNet progress (bool): If True, displays a progress bar of the download to stderr """
    return _resnet('resnet50', Bottleneck, [3.4.6.3], pretrained, progress,
                   **kwargs)


model = resnet18(pretrained=True)
model.eval(a)Copy the code