Speak in front of, originally want to by a simple multilayer perceptron experiment on different optimization methods, the results as I write this research on evaluation index to the first, before also wrote a article: www.cnblogs.com/xiximayou/p… Unlike the previous article, this time we have added some related implementations, so let’s take a look.

Pytorch is used to build the whole process of multi-layer perceptron classification

Importing related packages

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc
Copy the code

Set random seed

Setting a random seed is always needed to allow our experiment to reproduce: that is, to generate the same results for randomly initialized data.

np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
Copy the code

Load the data

Using the numeric data that comes with simple Sklearn:

print("Load data")
digits = load_digits()
data, label = digits.data, digits.target
# print(data.shape, label.shape)
train_data, test_data, train_label, test_label = train_test_split(data, label, test_size=3., random_state=123)
print('Training Data:', train_data.shape)
print('Test data:', test_data.shape)
Copy the code

Define relevant parameters

print("Define relevant parameters")
epochs = 30
batch_size = train_data.shape[0]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
input_dim = data.shape[1]
hidden_dim = 256
output_dim = len(set(label))
Copy the code

Building a data set

Pytorch builds datasets by implementing a class that inherits Dataset and overrides the __len__ and __getitem__ methods in the class.

print("Build the data set")
class DigitsDataset(Dataset) :
  def __init__(self, input_data, input_label) :
    data = []
    for i,j in zip(input_data, input_label):
      data.append((i,j))
    self.data = data

  def __len__(self) :
    return len(self.data)

  def __getitem__(self, index) :
    d, l = self.data[index]
    return d, l
Copy the code

At initialization, we put each piece of data and its label in a list, then count the total in __len__ and get each piece of data according to the index in __getitem__. Next we will use the DataLoader to convert the defined data set into a DataLoader.

trainDataset = DigitsDataset(train_data, train_label)
testDataset = DigitsDataset(test_data, test_label)
# print(trainDataset[0])
# print(trainDataset[0])
trainDataLoader = DataLoader(trainDataset, batch_size=batch_size, shuffle=True, num_workers=2)
testDataLoader = DataLoader(testDataset, batch_size=batch_size, shuffle=False, num_workers=2)
Copy the code

Define the model

Here we simply implement the next multi-layer perceptron:

class Model(nn.Module) :
  def __init__(self, input_dim, hidden_dim, output_dim) :
    super(Model, self).__init__()
    self.fc1 = nn.Linear(input_dim, hidden_dim) 
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_dim, output_dim)

  def forward(self, x) :
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    return x
Copy the code

Define loss functions, optimizers, and initialization parameters

model = Model(input_dim, hidden_dim, output_dim)
print(model)
model.to(device)

print("Define loss function, optimizer")
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

print("Initialize related parameters")
for param in model.parameters():
  nn.init.normal_(param, mean=0, std=0.01)
Copy the code

Train and test

Here, we only use the evaluation index function of Sklearn to calculate the evaluation index: accuracy_score: calculation accuracy, Precision_score: calculation accuracy, recall_score: calculation recall rate, f1_score: Calculate F1, Classification_report, confusion_matrix. How to use it, we can look at the code.

print("Start training the main loop.")
total_step = len(trainDataLoader)

model.train()
for epoch in range(epochs):
  tot_loss = 0.0
  tot_acc = 0.0
  train_preds = []
  train_trues = []
  # model.train()
  for i,(train_data_batch, train_label_batch) in enumerate(trainDataLoader):
    train_data_batch = train_data_batch.float().to(device) Convert double data to float
    train_label_batch = train_label_batch.to(device)
    outputs = model(train_data_batch)
    # _, preds = torch.max(outputs.data, 1)
    loss = criterion(outputs, train_label_batch)
    # print(loss)
    # Backpropagation optimizes network parameters
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    Add up the losses for each step
    tot_loss += loss.data
    train_outputs = outputs.argmax(dim=1)

    train_preds.extend(train_outputs.detach().cpu().numpy())
    train_trues.extend(train_label_batch.detach().cpu().numpy())

    # tot_acc += (outputs.argmax(dim=1) == train_label_batch).sum().item()

  sklearn_accuracy = accuracy_score(train_trues, train_preds) 
  sklearn_precision = precision_score(train_trues, train_preds, average='micro')
  sklearn_recall = recall_score(train_trues, train_preds, average='micro')
  sklearn_f1 = f1_score(train_trues, train_preds, average='micro')
  print("[sklearn_metrics] Epoch:{} loss:{:.4f} accuracy:{:.4f} precision:{:.4f} recall:{:.4f} f1:{:.4f}".format(epoch, tot_loss, sklearn_accuracy, sklearn_precision, sklearn_recall, sklearn_f1))

test_preds = []
test_trues = []
model.eval(a)with torch.no_grad():
  for i,(test_data_batch, test_data_label) in enumerate(testDataLoader):
    test_data_batch = test_data_batch.float().to(device) Convert double data to float
    test_data_label = test_data_label.to(device)
    test_outputs = model(test_data_batch)
    test_outputs = test_outputs.argmax(dim=1)
    test_preds.extend(test_outputs.detach().cpu().numpy())
    test_trues.extend(test_data_label.detach().cpu().numpy())

  sklearn_precision = precision_score(test_trues, test_preds, average='micro')
  sklearn_recall = recall_score(test_trues, test_preds, average='micro')
  sklearn_f1 = f1_score(test_trues, test_preds, average='micro')
  print(classification_report(test_trues, test_preds))
  conf_matrix = get_confusion_matrix(test_trues, test_preds)
  print(conf_matrix)
  plot_confusion_matrix(conf_matrix)
  print("[sklearn_metrics] accuracy:{:.4f} precision:{:.4f} recall:{:.4f} f1:{:.4f}".format(sklearn_accuracy, sklearn_precision, sklearn_recall, sklearn_f1))
Copy the code

Define and draw confusion matrices

In addition, we add the calculation and rendering of the obfuscation matrix.

def get_confusion_matrix(trues, preds) :
  labels = [0.1.2.3.4.5.6.7.8.9]
  conf_matrix = confusion_matrix(trues, preds, labels)
  return conf_matrix
  
def plot_confusion_matrix(conf_matrix) :
  plt.imshow(conf_matrix, cmap=plt.cm.Greens)
  indices = range(conf_matrix.shape[0])
  labels = [0.1.2.3.4.5.6.7.8.9]
  plt.xticks(indices, labels)
  plt.yticks(indices, labels)
  plt.colorbar()
  plt.xlabel('y_pred')
  plt.ylabel('y_true')
  # display data
  for first_index in range(conf_matrix.shape[0) :for second_index in range(conf_matrix.shape[1]):
      plt.text(first_index, second_index, conf_matrix[first_index, second_index])
  plt.savefig('heatmap_confusion_matrix.jpg')
  plt.show()
Copy the code

The results showed

Load data Training data: (1257, 64) Test data: (540, 64) Define relevant parameters, construct data set definition, calculate evaluation index definition Model Linear(in_features=64, out_features=256, bias=True) (relu): ReLU() (fc2): Linear(in_features=256, out_features=10, Bias =True)) defined the loss function, and the optimizer initialized relevant parameters to start training the main cycle [Sklearn_metrics] Epoch:0 Loss :2.2986 accuracy:0.1098 Precision :0.1098 Recall :0.1098 F1:0.1098 [Sklearn_metrics] Epoch:1 Loss :2.2865 accuracy:0.1225 Precision :0.1225 Recall :0.1225 F1:0.1225 [Sklearn_metrics] Epoch:2 Loss :2.2637 accuracy:0.1702 Precision :0.1702 recall:0.1702 F1:0.1702 [Sklearn_metrics] Epoch:3 Loss :2.2316 Accuracy :0.3174 Precision :0.3174 Recall :0.3174 F1 :0.3174 [Sklearn_metrics] Epoch:4 Loss :2.1915 Accuracy :0.5561 Precision :0.5561 recall:0.5561 F1 :0.5561 [Sklearn_metrics] Epoch:5 Loss :2.1438 accuracy:0.6881 Precision :0.6881 recall:0.6881 F1 :0.6881 [Sklearn_metrics] Epoch:6 Loss :2.0875 Accuracy :0.7669 Precision :0.7669 Recall :0.7669 F1:0.7669 [Sklearn_metrics] Epoch:7 Loss :2.0213 Accuracy :0.8226 Precision :0.8226 Recall :0.8226 F1:0.8226 [Sklearn_metrics] Epoch:8 Loss :1.9428 Accuracy :0.8409 Precision :0.8409 recall:0.8409 F1 :0.8409 [Sklearn_metrics] Epoch:9 Loss :1.8494 accuracy:0.8552 Precision :0.8552 recall:0.8552 F1:0.8552 [Sklearn_metrics] Epoch:10 Loss :1.7397 Accuracy :0.8568 Precision :0.8568 recall:0.8568 F1:0.8568 [Sklearn_metrics] Epoch:11 Loss :1.6140 accuracy:0.8632 Precision :0.8632 recall:0.8632 F1:0.8632 [Sklearn_metrics] Epoch:12 Loss :1.4748 accuracy:0.8616 Precision :0.8616 Recall :0.8616 F1:0.8616 [Sklearn_metrics] Epoch:13 Loss :1.3259 accuracy:0.8640 Precision :0.8640 Recall :0.8640 F1:0.8640 [Sklearn_metrics] Epoch:14 Loss :1.1735 accuracy:0.8703 Precision :0.8703 recall:0.8703 F1:0.8703 [Sklearn_metrics] Epoch:15 Loss :1.0245 accuracy:0.8791 Precision :0.8791 recall:0.8791 F1:0.8791 [Sklearn_metrics] Epoch:16 Loss :0.8858 Accuracy :0.8878 Precision :0.8878 recall:0.8878 F1:0.8878 [Sklearn_metrics] Epoch:17 Loss :0.7625 accuracy:0.9006 Precision :0.9006 recall:0.9006 F1:0.9006 [Sklearn_metrics] Epoch:18 Loss :0.6575 Accuracy :0.9045 Precision :0.9045 Recall :0.9045 F1:0.9045 [Sklearn_metrics] Epoch:19 Loss :0.5709 accuracy:0.9077 Precision :0.9077 Recall :0.9077 F1:0.9077 [Sklearn_metrics] Epoch:20 Loss :0.5004 Accuracy :0.9093 Precision :0.9093 recall:0.9093 F1:0.9093 [Sklearn_metrics] Epoch:21 Loss :0.4436 accuracy:0.9101 Precision :0.9101 recall:0.9101 F1:0.9101 [Sklearn_metrics] Epoch:22 Loss :0.3982 Accuracy :0.9109 Precision :0.9109 Recall :0.9109 F1:0.9109 [Sklearn_metrics] Epoch:23 Loss :0.3615 Accuracy :0.9149 Precision :0.9149 Recall :0.9149 F1 :0.9149 [Sklearn_metrics] Epoch:24 Loss :0.3314 Accuracy :0.9173 Precision :0.9173 Recall :0.9173 F1 :0.9173 [Sklearn_metrics] Epoch:25 Loss :0.3065 Accuracy :0.9196 Precision :0.9196 Recall :0.9196 F1 :0.9196 [Sklearn_metrics] Epoch:26 Loss :0.2856 Accuracy :0.9228 Precision :0.9228 Recall :0.9228 F1:0.9228 [Sklearn_metrics] Epoch:27 Loss :0.2673 accuracy:0.9236 Precision :0.9236 recall:0.9236 F1 :0.9236 [Sklearn_metrics] Epoch:28 Loss :0.2512 Accuracy :0.9268 Precision :0.9268 Recall :0.9268 F1 :0.9268 [Sklearn_metrics] Epoch:29 Loss :0.2370 Accuracy :0.9300 Precision :0.9300 recall:0.9300 F1 :0.9300 Precision recall F1-Score support 0 0.98 0.98 0.98 59 1 0.86 0.86 0.86 56 2 0.98 0.91 0.94 53 3 0.98 0.93 0.96 46 4 0.95 0.97 0.96 61 5 0.98 0.91 0.95 57 6 0.96 0.96 0.96 57 7 0.92 0.98 0.95 50 8 0.87 0.81 0.84 48 9 0.77 0.91 0.83 53 accuracy 0.92 540 macro AVG 0.93 0.92 0.92 540 weighted AVG 0.93 0.92 0.92 540 [[58 0 0 0 1 0 0 0 0 0 0 0 0 0] [0 48 0 0 0 0 1 0 0 7] [0 2 48 0 0 0 0 1 2 0] [0 0 1 43 0 0 0 1 1 0 0] [0 0 0 59 0 0 1 1 0] [0 0 0 0 1 52 0 0 0 4] [1 1 0 0 0 0 55 0 0 0 0 0] [0 0 0 0 0 0 49 0 1] [0 4 0 0 1 1 1 0 39 2] [0 1 0 1 0 0 1 0 0 1 2 48]] <Figure size 640x480 with 2 Axes> [Sklearn_metrics] accuracy:0.9241 precision:0.9241 f1:0.9241Copy the code

Evaluation index correlation: accuracy – accuracy – recall – F1

(1) Basic knowledge We have gone through pyTorch loading data, building models, training and testing, and evaluating models using SkLearn. Next, we will discuss the evaluation indicators in detail. First of all, the following four basic evaluation indicators (for multi-classification) are generally described: accuracy: accuracy. Accuracy is how much data is correctly identified. For the whole, for example, the prediction label [0,1,1,3,2,2,1] and the real label [1,2,1,3,2,1,1], the correct rate is 5/7 = 0.7142. 5 means that 5 corresponding positions in the list are the same, i.e. 0, 2, 3, 4 and 6 positions, and 7 is the total length of the list. Precision: How much of the predicted positive data is correct. The category with label 1 is shown here. For those without label 1, we first set all of them to 0 (0 here means negative sample, not class 0), then: In the prediction label [0,1,1,0,0,0,1] real label [1,0,1,0,0,1,1], 3 of the prediction labels are positive, i.e. 1, 2 of the three are the same as the real label, i.e. 26 position, so the accuracy rate is: 2/3 = 0.6666 recall: How many of the positive samples are correctly identified. Again, label 1 is used to illustrate: among the real labels, 4 are 1, and 2 of the 4 are predicted, so the recall rate is: 2/4 =0.5000. F1: Consider the accuracy and recall rate comprehensively. It’s going to be 2 times p times r divided by p plus r.

(2) The specific calculation uses TP, FP, FN, TN, respectively explain what these are: the first is True False means, the second is Positive Negative. The first is a judgment on the second. TP, which is True Positive, if the prediction is Positive, it’s True. FP, which is False Positive, if the forecast is Positive, False, if the forecast is Positive, the real value is negative. F sub n, which is False Negative, is predicted as Negative, which is predicted as Negative, the true value is positive. TN, which is True Negative, Negative forecast is True, which is Negative forecast, the True value is Negative. So according to our previous definition: accuracy is not (TP + FN)/(TP + FP + FN + TN) precision is not (TP)/(TP + FP) recall is not: (TP)/(TP + FN) f1 或 : 2 * precision * recall/(precision + FN)

(3) Micro-F1 and Macro-F1 To put it simply, micro-F1 is to calculate TP, FP, FN and TN of each category first, and then calculate relevant evaluation indicators. In the case of data imbalance, the quantity of each category is taken into account. Macro-f1 first calculates the evaluation index of each category and finally takes the average, which is susceptible to the influence of the category with higher accuracy and recall rate.

Basic implementation

Next, we need to realize the evaluation indicators according to the understanding.

(1) Basic implementation

def get_acc_p_r_f1(trues, preds) :
  labels = [0.1.2.3.4.5.6.7.8.9]
  TP,FP,FN,TN = 0.0.0.0
  for label in labels:
    preds_tmp = np.array([1 if pred == label else 0 for pred in preds])
    trues_tmp = np.array([1 if true == label else 0 for true in trues])
    # print(preds_tmp, trues_tmp)
    # print()
    # TP = 1
    # TN predicted to be 0 true to be 0
    # FN is predicted to be 0 and true to be 1
    # FP is predicted to be 1 and true to be 0
    TP += ((preds_tmp == 1) & (trues_tmp == 1)).sum()
    TN += ((preds_tmp == 0) & (trues_tmp == 0)).sum()
    FN += ((preds_tmp == 0) & (trues_tmp == 1)).sum()
    FP += ((preds_tmp == 1) & (trues_tmp == 0)).sum(a)# print(TP, FP, FN)
  precision = TP / (TP + FP)
  recall = TP / (TP + FN)
  f1 = 2 * precision * recall / (precision + recall)
  return precision, recall, f1

def get_acc(trues, preds) :
  accuracy = (np.array(trues) == np.array(preds)).sum(a) /len(trues)
  return accuracy
Copy the code

I won’t go into details, but the code is easy to read.

(2) Based on the confusion matrix

def get_p_r_f1_from_conf_matrix(conf_matrix) :
  TP,FP,FN,TN = 0.0.0.0
  labels = [0.1.2.3.4.5.6.7.8.9]
  nums = len(labels)
  for i in labels:
    TP += conf_matrix[i, i]
    FP += (conf_matrix[:i, i].sum() + conf_matrix[i+1:, i].sum())
    FN += (conf_matrix[i, i+1:].sum() + conf_matrix[i, :i].sum())
  print(TP, FP, FN)
  precision = TP / (TP + FP)
  recall = TP / (TP + FN)
  f1 = 2 * precision * recall / (precision + recall)
  return precision, recall, f1

def get_acc_from_conf_matrix(conf_matrix) :
  labels = [0.1.2.3.4.5.6.7.8.9]
  return sum([conf_matrix[i, i] for i in range(len(labels))]) / np.sum(np.sum(conf_matrix, axis=0))
Copy the code

The final result

Loading data training data: (1257.64) Test data: (540.64) define relevant parameters, construct data set definition, calculate evaluation index definition Model((FC1): Linear(IN_features =64, out_features=256, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=256, out_features=10, bias=True)) define the loss function, and the optimizer initializes the parameters to start training the main loop [custom_metrics] Epoch:0 loss:2.2986 accuracy:0.1098 precision:0.1098 recall:0.1098 f1:0.1098
[sklearn_metrics] Epoch:0 loss:2.2986 accuracy:0.1098 precision:0.1098 recall:0.1098 f1:0.1098
[custom_metrics] Epoch:1 loss:2.2865 accuracy:0.1225 precision:0.1225 recall:0.1225 f1:0.1225
[sklearn_metrics] Epoch:1 loss:2.2865 accuracy:0.1225 precision:0.1225 recall:0.1225 f1:0.1225
[custom_metrics] Epoch:2 loss:2.2637 accuracy:0.1702 precision:0.1702 recall:0.1702 f1:0.1702
[sklearn_metrics] Epoch:2 loss:2.2637 accuracy:0.1702 precision:0.1702 recall:0.1702 f1:0.1702
[custom_metrics] Epoch:3 loss:2.2316 accuracy:0.3174 precision:0.3174 recall:0.3174 f1:0.3174
[sklearn_metrics] Epoch:3 loss:2.2316 accuracy:0.3174 precision:0.3174 recall:0.3174 f1:0.3174
[custom_metrics] Epoch:4 loss:2.1915 accuracy:0.5561 precision:0.5561 recall:0.5561 f1:0.5561
[sklearn_metrics] Epoch:4 loss:2.1915 accuracy:0.5561 precision:0.5561 recall:0.5561 f1:0.5561
[custom_metrics] Epoch:5 loss:2.1438 accuracy:0.6881 precision:0.6881 recall:0.6881 f1:0.6881
[sklearn_metrics] Epoch:5 loss:2.1438 accuracy:0.6881 precision:0.6881 recall:0.6881 f1:0.6881
[custom_metrics] Epoch:6 loss:2.0875 accuracy:0.7669 precision:0.7669 recall:0.7669 f1:0.7669
[sklearn_metrics] Epoch:6 loss:2.0875 accuracy:0.7669 precision:0.7669 recall:0.7669 f1:0.7669
[custom_metrics] Epoch:7 loss:2.0213 accuracy:0.8226 precision:0.8226 recall:0.8226 f1:0.8226
[sklearn_metrics] Epoch:7 loss:2.0213 accuracy:0.8226 precision:0.8226 recall:0.8226 f1:0.8226
[custom_metrics] Epoch:8 loss:1.9428 accuracy:0.8409 precision:0.8409 recall:0.8409 f1:0.8409
[sklearn_metrics] Epoch:8 loss:1.9428 accuracy:0.8409 precision:0.8409 recall:0.8409 f1:0.8409
[custom_metrics] Epoch:9 loss:1.8494 accuracy:0.8552 precision:0.8552 recall:0.8552 f1:0.8552
[sklearn_metrics] Epoch:9 loss:1.8494 accuracy:0.8552 precision:0.8552 recall:0.8552 f1:0.8552
[custom_metrics] Epoch:10 loss:1.7397 accuracy:0.8568 precision:0.8568 recall:0.8568 f1:0.8568
[sklearn_metrics] Epoch:10 loss:1.7397 accuracy:0.8568 precision:0.8568 recall:0.8568 f1:0.8568
[custom_metrics] Epoch:11 loss:1.6140 accuracy:0.8632 precision:0.8632 recall:0.8632 f1:0.8632
[sklearn_metrics] Epoch:11 loss:1.6140 accuracy:0.8632 precision:0.8632 recall:0.8632 f1:0.8632
[custom_metrics] Epoch:12 loss:1.4748 accuracy:0.8616 precision:0.8616 recall:0.8616 f1:0.8616
[sklearn_metrics] Epoch:12 loss:1.4748 accuracy:0.8616 precision:0.8616 recall:0.8616 f1:0.8616
[custom_metrics] Epoch:13 loss:1.3259 accuracy:0.8640 precision:0.8640 recall:0.8640 f1:0.8640
[sklearn_metrics] Epoch:13 loss:1.3259 accuracy:0.8640 precision:0.8640 recall:0.8640 f1:0.8640
[custom_metrics] Epoch:14 loss:1.1735 accuracy:0.8703 precision:0.8703 recall:0.8703 f1:0.8703
[sklearn_metrics] Epoch:14 loss:1.1735 accuracy:0.8703 precision:0.8703 recall:0.8703 f1:0.8703
[custom_metrics] Epoch:15 loss:1.0245 accuracy:0.8791 precision:0.8791 recall:0.8791 f1:0.8791
[sklearn_metrics] Epoch:15 loss:1.0245 accuracy:0.8791 precision:0.8791 recall:0.8791 f1:0.8791
[custom_metrics] Epoch:16 loss:0.8858 accuracy:0.8878 precision:0.8878 recall:0.8878 f1:0.8878
[sklearn_metrics] Epoch:16 loss:0.8858 accuracy:0.8878 precision:0.8878 recall:0.8878 f1:0.8878
[custom_metrics] Epoch:17 loss:0.7625 accuracy:0.9006 precision:0.9006 recall:0.9006 f1:0.9006
[sklearn_metrics] Epoch:17 loss:0.7625 accuracy:0.9006 precision:0.9006 recall:0.9006 f1:0.9006
[custom_metrics] Epoch:18 loss:0.6575 accuracy:0.9045 precision:0.9045 recall:0.9045 f1:0.9045
[sklearn_metrics] Epoch:18 loss:0.6575 accuracy:0.9045 precision:0.9045 recall:0.9045 f1:0.9045
[custom_metrics] Epoch:19 loss:0.5709 accuracy:0.9077 precision:0.9077 recall:0.9077 f1:0.9077
[sklearn_metrics] Epoch:19 loss:0.5709 accuracy:0.9077 precision:0.9077 recall:0.9077 f1:0.9077
[custom_metrics] Epoch:20 loss:0.5004 accuracy:0.9093 precision:0.9093 recall:0.9093 f1:0.9093
[sklearn_metrics] Epoch:20 loss:0.5004 accuracy:0.9093 precision:0.9093 recall:0.9093 f1:0.9093
[custom_metrics] Epoch:21 loss:0.4436 accuracy:0.9101 precision:0.9101 recall:0.9101 f1:0.9101
[sklearn_metrics] Epoch:21 loss:0.4436 accuracy:0.9101 precision:0.9101 recall:0.9101 f1:0.9101
[custom_metrics] Epoch:22 loss:0.3982 accuracy:0.9109 precision:0.9109 recall:0.9109 f1:0.9109
[sklearn_metrics] Epoch:22 loss:0.3982 accuracy:0.9109 precision:0.9109 recall:0.9109 f1:0.9109
[custom_metrics] Epoch:23 loss:0.3615 accuracy:0.9149 precision:0.9149 recall:0.9149 f1:0.9149
[sklearn_metrics] Epoch:23 loss:0.3615 accuracy:0.9149 precision:0.9149 recall:0.9149 f1:0.9149
[custom_metrics] Epoch:24 loss:0.3314 accuracy:0.9173 precision:0.9173 recall:0.9173 f1:0.9173
[sklearn_metrics] Epoch:24 loss:0.3314 accuracy:0.9173 precision:0.9173 recall:0.9173 f1:0.9173
[custom_metrics] Epoch:25 loss:0.3065 accuracy:0.9196 precision:0.9196 recall:0.9196 f1:0.9196
[sklearn_metrics] Epoch:25 loss:0.3065 accuracy:0.9196 precision:0.9196 recall:0.9196 f1:0.9196
[custom_metrics] Epoch:26 loss:0.2856 accuracy:0.9228 precision:0.9228 recall:0.9228 f1:0.9228
[sklearn_metrics] Epoch:26 loss:0.2856 accuracy:0.9228 precision:0.9228 recall:0.9228 f1:0.9228
[custom_metrics] Epoch:27 loss:0.2673 accuracy:0.9236 precision:0.9236 recall:0.9236 f1:0.9236
[sklearn_metrics] Epoch:27 loss:0.2673 accuracy:0.9236 precision:0.9236 recall:0.9236 f1:0.9236
[custom_metrics] Epoch:28 loss:0.2512 accuracy:0.9268 precision:0.9268 recall:0.9268 f1:0.9268
[sklearn_metrics] Epoch:28 loss:0.2512 accuracy:0.9268 precision:0.9268 recall:0.9268 f1:0.9268
[custom_metrics] Epoch:29 loss:0.2370 accuracy:0.9300 precision:0.9300 recall:0.9300 f1:0.9300
[sklearn_metrics] Epoch:29 loss:0.2370 accuracy:0.9300 precision:0.9300 recall:0.9300 f1:0.9300
              precision    recall  f1-score   support

           0       0.98      0.98      0.98        59
           1       0.86      0.86      0.86        56
           2       0.98      0.91      0.94        53
           3       0.98      0.93      0.96        46
           4       0.95      0.97      0.96        61
           5       0.98      0.91      0.95        57
           6       0.96      0.96      0.96        57
           7       0.92      0.98      0.95        50
           8       0.87      0.81      0.84        48
           9       0.77      0.91      0.83        53

    accuracy                           0.92       540
   macro avg       0.93      0.92      0.92       540
weighted avg       0.93      0.92      0.92       540

[[58  0  0  0  1  0  0  0  0  0]
 [ 0 48  0  0  0  0  1  0  0  7]
 [ 0  2 48  0  0  0  0  1  2  0]
 [ 0  0  1 43  0  0  0  1  1  0]
 [ 0  0  0  0 59  0  0  1  1  0]
 [ 0  0  0  0  1 52  0  0  0  4]
 [ 1  1  0  0  0  0 55  0  0  0]
 [ 0  0  0  0  0  0  0 49  0  1]
 [ 0  4  0  0  1  1  1  0 39  2]
 [ 0  1  0  1  0  0  0  1  2 48]]
<Figure size 640x480 with 2 Axes>
[custom_metrics] accuracy:0.9241 precision:0.9241 recall:0.9241 f1:0.9241
[sklearn_metrics] accuracy:0.9241 precision:0.9241 recall:0.9241 f1:0.9241
[cm_metrics] accuracy:0.9241 precision:0.9241 recall:0.9241 f1:0.9241
Copy the code

We computed the same result as sklearn. To make sure it is correct, here we print the accuracy, recall and micro-F1 for each category we tested.

[custom_metrics] 0 Precision :0.9831 F1:0.9831 [custom_metrics] 1 Precision :0.8571 F1:0.8571 [custom_metrics] 2 Precision :0.9796 recall:0.9057 f1:0.9412 [Custom_metrics] 3 Precision :0.9773 Recall :0.9348 F1:0.9556 [custom_metrics] 4 Precision :0.9516 PRECISION :0.9672 F1:0.9593 [Custom_metrics] 5 Precision :0.9811 F1:0.9455 [custom_metrics] 6 Precision :0.9649 recall:0.9649 F1:0.9649 [Custom_metrics] 7 Precision :0.9245 Recall :0.9800 F1:0.9515 [custom_metrics] 8 Precision :0.8667 recall:0.8125 f1:0.8387 [custom_metrics] 9 Precision :0.7742 recall:0.9057 f1:0.8348Copy the code
[cm_metrics] 0 precision:0.9831 F1:0.9831 [cm_metrics] 1 Precision :0.8571 f1:0.8571 [cm_metrics] 2 Precision :0.9057 f1:0.9412 [cm_metrics] 3 Precision :0.9773 f1:0.9556 [cm_metrics] 4 Precision :0.9516 f1:0.9593 [cm_metrics] 5 Precision :0.9811 f1:0.9455 [cm_metrics] 6 Precision :0.9649 recall:0.9649 f1:0.9649 [cm_metrics] 7 Precision :0.9245 recall:0.9800 F1:0.9515 [cm_metrics] 8 Precision :0.8667 f1:0.8387 [cm_metrics] 9 Precision :0.7742 f1:0.8348Copy the code

This is consistent with classiFICation_report in SKlearn.

Draw ROC and calculate AUC

Finally, draw ROC curve and calculate AUC. These two evaluation indicators are lazy and will not be introduced. First, the label should be binarized:

def get_roc_auc(trues, preds) :
  labels = [0.1.2.3.4.5.6.7.8.9]
  nb_classes = len(labels)
  fpr = dict()
  tpr = dict()
  roc_auc = dict(a)print(trues, preds)
  for i in range(nb_classes):
    fpr[i], tpr[i], _ = roc_curve(trues[:, i], preds[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
    # Compute micro-average ROC curve and ROC area
  fpr["micro"], tpr["micro"], _ = roc_curve(trues.ravel(), preds.ravel())
  roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
  # First aggregate all false positive rates
  all_fpr = np.unique(np.concatenate([fpr[i] for i in range(nb_classes)]))
  # Then interpolate all ROC curves at this points
  mean_tpr = np.zeros_like(all_fpr)
  for i in range(nb_classes):
    mean_tpr += interp(all_fpr, fpr[i], tpr[i])
  # Finally average it and compute AUC
  mean_tpr /= nb_classes
  fpr["macro"] = all_fpr
  tpr["macro"] = mean_tpr
  roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
  # Plot all ROC curves
  lw = 2
  plt.figure()
  plt.plot(fpr["micro"], tpr["micro"],label='micro-average ROC curve (area = {0:0.2f})'.format(roc_auc["micro"]),color='deeppink', linestyle=':', linewidth=4)
  plt.plot(fpr["macro"], tpr["macro"],label='Macro - Average ROC curve (area = {0:0.2f})'.format(roc_auc["macro"]),color='navy', linestyle=':', linewidth=4)
  colors = cycle(['aqua'.'darkorange'.'cornflowerblue'])
  for i, color in zip(range(nb_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=lw, label='ROC curve of class {0} (area = {1:0.2f})'.format(i, roc_auc[i]))
  plt.plot([0.1], [0.1].'k--', lw=lw)
  plt.xlim([0.0.1.0])
  plt.ylim([0.0.1.05])
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  plt.title('Some extension of Receiver operating characteristic to multi-class')
  plt.legend(loc="lower right")
  plt.savefig("Classification ROC_10 PNG.")
  plt.show()
  
test_trues = label_binarize(test_trues, classes=[i for i in range(10)])
test_preds = label_binarize(test_preds, classes=[i for i in range(10)])
get_roc_auc(test_trues, test_preds)
Copy the code