Importing Related Libraries

Import OS from glob import torch as t # set a random seed to ensure repeatability of the result t.random.manual_seed(0) t.ruda.manual_seed_all (0) The # Benchmark pattern speeds up the calculation, but because of the randomness in the calculation, Each network feedforward results vary slightly t.b ackends. Cudnn. Benchmark = True # to avoid that the volatility t.b ackends. Cudnn. Deterministic = True from PIL import Image  import torch.nn as nn from tqdm.auto import tqdm from torchvision import transforms from torchvision.utils import save_image, make_grid from torch.optim import SGD from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, MultiStepLR, CosineAnnealingLR from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as patch import torch.nn.functional as F import json from torchvision.models.mobilenet import mobilenet_v2 from torchvision.models.resnet import resnet18, resnet34 from torchsummary import summary %matplotlib inline

Set network configuration parameters

class Config: # weights_decay = 1e-5 class_num = 11 # How many epochs did the network evaluate Checkpoint_interval = 1 checkpoint_interval = 1 log print_interval = 50 # model save path Checkpoints = 'drive/My drive/Data/Datawhale - DigitsRecognition/checkpoints/' training models of # loading paths pretrained ='/content/drive/My Drive/Data/Datawhale - DigitsRecognition/checkpoints/epoch - 32 _acc - 0.67. PTH '# started training epoch start_epoch = 0 # training in total number of the epoch Epoches = 50 # label smooth = 0.1 # probability of random erase_prob = 0.5 config = config ()

Building a network model

Generally speaking, when constructing Baseline, lightweight network with as few parameters as possible and low model complexity will be chosen as backbone. Backbone can only be used to replace it with more complex backbone.

This is MobileNet V2 as the backbone to build a classification network

class DigitsMobilenet(nn.Module): def __init__(self, class_num=11): super(DigitsMobilenet, self).__init__() self.net = mobilenet_v2(pretrained=True) self.net.classifier = nn.Sequential( nn.AdaptiveAvgPool2d((1, 1)) ) self.fc1 = nn.Linear(1280, class_num) self.fc2 = nn.Linear(1280, class_num) self.fc3 = nn.Linear(1280, class_num) self.fc4 = nn.Linear(1280, class_num) self.fc5 = nn.Linear(1280, class_num) def forward(self, img): ("") Params: img(tensor): shape [N, C, H, W] Returns: fc1(tensor): Presentation FC5 (Tensor) is the third character of the Tensor, and the third character of the Tensor is the fourth character of the Tensor. Presentation "" features = self.net(img). View (-1, 1280) fc1 = self.fc1(features) fc2 = self.fc2(features) fc3 = self.fc3(features) fc4 = self.fc4(features) fc5 = self.fc5(features) return fc1, fc2, fc3, fc4, fc5 class DigitsResnet18(nn.Module): def __init__(self, class_num=11): Super (Digitsmobilenet, self).__init__() self.net = ResNet18 (PreTrained =True) # NN.Identity Self.net.fc = Nn. Identity() self.fc1 = Nn. Linear(512, class_num) self.fc2 = Nn. Linear(512, class_num) self.fc2 = Nn. Linear(512, class_num) self.fc2 = Nn. Linear(512, class_num) self.fc2 = Nn. Linear(512, class_num) self.fc2 = Nn. class_num) self.fc3 = nn.Linear(512, class_num) self.fc4 = nn.Linear(512, class_num) self.fc5 = nn.Linear(512, class_num) def forward(self, img): features = self.net(img).squeeze() fc1 = self.fc1(features) fc2 = self.fc2(features) fc3 = self.fc3(features) fc4 = self.fc4(features) fc5 = self.fc5(features) return fc1, fc2, fc3, fc4, fc5

Building Training Modules

There are several Tricks used here

Label Smooth

Label smoothing is a regularization technique to avoid overfitting due to the small amount of data.

The formula for Label Smooth is as follows: εdenotes smoothness (set to 0.1 in the experiment), C denotes the number of categories in multiple categories, and Pi denotes the probability of Label softening.

$$P_i=\begin{cases} 1-\epsilon \quad if(i=y)\\\frac{\epsilon}{C-1}\quad if(i\neq y) \end{cases}$$
For example, the one-hot coding vector of a label is [0, 1, 0, 0], and after label smooth, the one-hot coding vector becomes [0.033, 0.9, 0.033, 0.033].
Cosine attenuation +warmup Generally, the gradient is extremely unstable at the beginning, so you should use a small learning first train for a few iterations, and then restore the learning rate to the initial learning rate and start normal training. During the first n (n was set as 10) iterations, the learning rate was linearly adjusted to reach the initial learning rate. To a certain extent, the stability of training is guaranteed, and it can better converge to the minimum value. However, the cosine attenuation adjustment strategy can well jump out of the local minimum, and has a greater possibility to get a better local minimum. As shown in the figure below, learning rate curves under warmup and cosine attenuation strategies are respectively represented

# ----------------------------------- LabelSmoothEntropy ----------------------------------- # class LabelSmoothEntropy(nn.module): def __init__(self, smooth=0.1, class_weights=None, size_average='mean'): super(LabelSmoothEntropy, self).__init__() self.size_average = size_average self.smooth = smooth self.class_weights = class_weights def forward(self, preds, targets): lb_pos, lb_neg = 1 - self.smooth, self.smooth / (preds.shape[0] - 1) smoothed_lb = t.zeros_like(preds).fill_(lb_neg).scatter_(1, targets[:, None], lb_pos) log_soft = F.log_softmax(preds) if self.class_weights is not None: loss = -log_soft * smoothed_lb * self.class_weights[None, :] else: loss = -log_soft * smoothed_lb loss = loss.sum(1) if self.size_average == 'mean': return loss.mean() elif self.size_average == 'sum': return loss.sum() else: raise NotImplementedError class Trainer: def __init__(self): self.device = t.device('cuda') if t.cuda.is_available() else t.device('cpu') self.train_set = DigitsDataset(data_dir['train_data'], data_dir['train_label']) self.train_loader = DataLoader(self.train_set, batch_size=config.batch_size, num_workers=8, pin_memory=True, drop_last=True) self.val_loader = DataLoader(DigitsDataset(data_dir['val_data'], data_dir['val_label'], aug=False), batch_size=config.batch_size,\ num_workers=8, pin_memory=True, Drop_last =True) self.model = digitsMuniNet (config.class_num). To (self.device) # use Label Smooth self.criterion = LabelSmoothEntropy().to(self.device) self.optimizer = SGD(self.model.parameters(), lr=config.lr, momentum=config.momentum, weight_decay=config.weights_decay, Nesterov = True) # using cosine vector attenuation self adjusting strategy. Lr_scheduler = CosineAnnealingWarmRestarts (self. The optimizer, 10, 2, eta_min=10e-4) # self.lr_scheduler = (self.optimizer, [10, 20, 30], 0) self.best_acc = 0 if config.pretrained is not None: self.load_model(config.pretrained) # print('Load model from %s'%config.pretrained) acc = self.eval() self.best_acc = acc  print('Load model from %s, Eval Acc: %.2f'%(config.pretrained, acc * 100)) def train(self): for epoch in range(config.start_epoch, config.epoches): self.train_epoch(epoch) if (epoch + 1) % config.eval_interval == 0: print('Start Evaluation') acc = self.eval() if acc > self.best_acc: os.makedirs(config.checkpoints, exist_ok=True) save_path = config.checkpoints+'epoch-%d_acc-%.2f.pth'%(epoch+1, acc) self.save_model(save_path) print('%s saved successfully... '%save_path) self.best_acc = acc def train_epoch(self, epoch): total_loss = 0 corrects = 0 tbar = tqdm(self.train_loader) self.model.train() for i, (img, label) in enumerate(tbar): img = img.to(self.device) label = label.to(self.device) self.optimizer.zero_grad() pred = self.model(img) loss = self.criterion(pred[0], label[:, 0]) + \ self.criterion(pred[1], label[:, 1]) + \ self.criterion(pred[2], label[:, 2]) + \ self.criterion(pred[3], label[:, 3]) + \ self.criterion(pred[4], label[:, 4]) total_loss += loss.item() loss.backward() self.optimizer.step() temp = t.stack([\ pred[0].argmax(1) == label[:, 0], \ pred[1].argmax(1) == label[:, 1], \ pred[2].argmax(1) == label[:, 2], \ pred[3].argmax(1) == label[:, 3], \ pred[4].argmax(1) == label[:, 4]\], dim=1) # Corrects += t.ll (temp, dim=1).sum().item() if (i + 1) % config.print_interval == 0: self.lr_scheduler.step() tbar.set_description('loss: %.3f, acc: %.3f'%(loss/(i+1), corrects*100/((i + 1) * config.batch_size))) def eval(self): self.model.eval() corrects = 0 with t.no_grad(): tbar = tqdm(self.val_loader) for i, (img, label) in enumerate(tbar): img = img.to(self.device) label = label.to(self.device) pred = self.model(img) temp = t.stack([ pred[0].argmax(1) == label[:, 0], \ pred[1].argmax(1) == label[:, 1], \ pred[2].argmax(1) == label[:, 2], \ pred[3].argmax(1) == label[:, 3], \ pred[4].argmax(1) == label[:, 4]\ ], dim=1) corrects += t.all(temp, dim=1).sum().item() tbar.set_description('Val Acc: %.2f'%(corrects * 100 /((i+1)*config.batch_size))) self.model.train() return corrects / (len(self.val_loader) * config.batch_size) def save_model(self, save_path, save_opt=False, save_config=False): Dicts = {} dicts['model'] = self.model.state_dict() if save_opt: dicts['opt'] = self.optimizer.state_dict() if save_config: dicts['config'] = {s: config.__getattribute__(s) for s in dir(config) if not s.startswith('_')} t.save(dicts, save_path) def load_model(self, load_path, save_opt=False, save_config=False): Dicts = t.load(load_path) self.model.load_state_dict(dicts['model']) if save_opt: self.optimizer.load_state_dict(dicts['opt']) if save_config: for k, v in dicts['config'].items(): config.__setattr__(k, v)

conclusion

In general, I personally think the idea of using categories is quite new, and I didn’t even think of using them at first. If the classification model can do it, why bother with object detection. Of course, target detection should be better for competitions.

This section is highly related to the previous section, and this section uses the previous code. Code in my gihub warehouse, welcome Star.

I also share all the data through cloud disk, which is the address

OK, that’s it for now

mo4tech.com (Moment For Technology) is a global community with thousands techies from across the global hang out!Passionate technologists, be it gadget freaks, tech enthusiasts, coders, technopreneurs, or CIOs, you would find them all here.

DataWhale Street View Character Encoding Recognition Project – Model Construction

Importing Related Libraries

Set network configuration parameters

Building a network model

Building Training Modules

conclusion

DataWhale Street View Character Encoding Recognition Project – Model Construction

Importing Related Libraries

Set network configuration parameters

Building a network model

Building Training Modules

conclusion

Related Posts

Use Bklexer for lexical analysis

Use the Scipy library functions to solve the convex hull problem

Python: Python Fundamentals — Exception Handling