YOLOv3 source code precision understanding (seven) YoloDataSet

The code mainly refers to bubbliiing’s Github YOLOv3 code: github.com/bubbliiiing…

Interpretation of source code

Training section

Dataloader. Py files

class YoloDataset(Dataset) :
    def __init__(self, annotation_lines, input_shape, num_classes, train) :
        super(YoloDataset, self).__init__()
        self.annotation_lines   = annotation_lines
        self.input_shape        = input_shape
        self.num_classes        = num_classes
        self.length             = len(self.annotation_lines)
        self.train              = train

    def __len__(self) :
        return self.length

    def __getitem__(self, index) :
        index       = index % self.length
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        # Random data enhancement during training
        No random enhancement of data is performed during validation
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        image, box  = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train)
        image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2.0.1))
        box         = np.array(box, dtype=np.float32)
        if len(box) ! =0:
            box[:, [0.2]] = box[:, [0.2]] / self.input_shape[1]
            box[:, [1.3]] = box[:, [1.3]] / self.input_shape[0]

            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
        return image, box

    def rand(self, a=0, b=1) :
        return np.random.rand()*(b-a) + a

    def get_random_data(self, annotation_line, input_shape, jitter=3., hue=1., sat=0.7, val=0.4, random=True) :
        line    = annotation_line.split()
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Read the image and convert it to RGB
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        image   = Image.open(line[0])
        image   = cvtColor(image)
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        Get the image width and target width
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        iw, ih  = image.size
        h, w    = input_shape
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Get the prediction box
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        box     = np.array([np.array(list(map(int,box.split(', ')))) for box in line[1:]])

        if not random:
            scale = min(w/iw, h/ih)
            nw = int(iw*scale)
            nh = int(ih*scale)
            dx = (w-nw)//2
            dy = (h-nh)//2

            # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
            # Add gray bars to the excess parts of the image
            # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
            image       = image.resize((nw,nh), Image.BICUBIC)
            new_image   = Image.new('RGB', (w,h), (128.128.128))
            new_image.paste(image, (dx, dy))
            image_data  = np.array(new_image, np.float32)

            # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
            # Adjust the real box
            # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
            if len(box)>0:
                np.random.shuffle(box)
                box[:, [0.2]] = box[:, [0.2]]*nw/iw + dx
                box[:, [1.3]] = box[:, [1.3]]*nh/ih + dy
                box[:, 0:2][box[:, 0:2] <0] = 0
                box[:, 2][box[:, 2]>w] = w
                box[:, 3][box[:, 3]>h] = h
                box_w = box[:, 2] - box[:, 0]
                box_h = box[:, 3] - box[:, 1]
                box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box

            return image_data, box
                
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        # Scale the image and distort the length and width
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
        scale = self.rand(25..2)
        if new_ar < 1:
            nh = int(scale*h)
            nw = int(nh*new_ar)
        else:
            nw = int(scale*w)
            nh = int(nw/new_ar)
        image = image.resize((nw,nh), Image.BICUBIC)

        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        # Add gray bars to the excess parts of the image
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        dx = int(self.rand(0, w-nw))
        dy = int(self.rand(0, h-nh))
        new_image = Image.new('RGB', (w,h), (128.128.128))
        new_image.paste(image, (dx, dy))
        image = new_image

        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        # flip image
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
        flip = self.rand()<. 5
        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

        image_data      = np.array(image, np.uint8)
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Transform the image gamut
        # Calculate the parameters of the gamut transform
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        r               = np.random.uniform(-1.1.3) * [hue, sat, val] + 1
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Transfer image to HSV
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
        dtype           = image_data.dtype
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Apply transform
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        x       = np.arange(0.256, dtype=r.dtype)
        lut_hue = ((x * r[0]) % 180).astype(dtype)
        lut_sat = np.clip(x * r[1].0.255).astype(dtype)
        lut_val = np.clip(x * r[2].0.255).astype(dtype)

        image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
        image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)

        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Adjust the real box
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        if len(box)>0:
            np.random.shuffle(box)
            box[:, [0.2]] = box[:, [0.2]]*nw/iw + dx
            box[:, [1.3]] = box[:, [1.3]]*nh/ih + dy
            if flip: box[:, [0.2]] = w - box[:, [2.0]]
            box[:, 0:2][box[:, 0:2] <0] = 0
            box[:, 2][box[:, 2]>w] = w
            box[:, 3][box[:, 3]>h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w>1, box_h>1)] 
        
        return image_data, box
Copy the code

Read the code in detail

Three parts of dataset

# initialization
def __init__(self, annotation_lines, input_shape, num_classes, train) :
    super(YoloDataset, self).__init__()
    # List of data read from TXT
    self.annotation_lines   = annotation_lines
    # [416416]
    self.input_shape        = input_shape
    # voc 20
    self.num_classes        = num_classes
    # Total training data
    self.length             = len(self.annotation_lines)
    # Train or val, train needs backward propagation, val does not
    self.train              = train

# Return the total number of images (training data)
def __len__(self) :
    return self.length

Get data from index
def __getitem__(self, index) :
    index       = index % self.length
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    # Random data enhancement during training
    No random enhancement of data is performed during validation
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    # According to the training or test phase, see whether the image data enhancement and box change
    image, box  = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train)
    Pytorch requires [channel,w,h] input format, so it directly transpose the dimensions
    image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2.0.1))
    # load box as ndarray,
    box         = np.array(box, dtype=np.float32)
    if len(box) ! =0:
        # In this section we convert our true width and height x1,x2,y1,y2 into normalized coordinates
        box[:, [0.2]] = box[:, [0.2]] / self.input_shape[1]
        box[:, [1.3]] = box[:, [1.3]] / self.input_shape[0]
        
        Transform the normalized coordinates of X1,x2,y1,y2 into the form of the central coordinates of x,y, W,h
        box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
        box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
    # return image data and label data
    return image, box
Copy the code

callget_random_datamethods

C # read information is as follows: \ \ Users \ XXX yolo3 - pytorch - master \ VOCdevkit/VOC2007 JPEGImages / 000072. 40,71,333,473,13 JPG
def get_random_data(self, annotation_line, input_shape, jitter=3., hue=1., sat=0.7, val=0.4, random=True) :
    Split the read data
    line    = annotation_line.split()
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    # Read the image and convert it to RGB
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    
    # We read the image and convert it to RGB format
    image   = Image.open(line[0])
    image   = cvtColor(image)
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    Get the image width and target width
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    
    # Get Real
    iw, ih  = image.size
    # specify width height [416,416]
    h, w    = input_shape
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    # Get the prediction box
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    
    # we obtain the data box [[40,71,333,473,13], [89,31,323,73,11]...]. Notice his compound notation, can be learned
    box     = np.array([np.array(list(map(int,box.split(', ')))) for box in line[1:]])

    The random variable is train or val
    if not random:
        # is val, instead of image enhancement, directly change the box
        scale = min(w/iw, h/ih)
        nw = int(iw*scale)
        nh = int(ih*scale)
        
        We need to get the width and height of the fill
        dx = (w-nw)//2
        dy = (h-nh)//2

        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Add gray bars to the excess parts of the image
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        
        # Fill the image to get a true-to-life image
        image       = image.resize((nw,nh), Image.BICUBIC)
        new_image   = Image.new('RGB', (w,h), (128.128.128))
        new_image.paste(image, (dx, dy))
        
        # return nDARray data for the image
        image_data  = np.array(new_image, np.float32)

        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # Adjust the real box
        # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
        # If there is a real box
        if len(box)>0:
            # shuffle the order of the boxes
            np.random.shuffle(box)
            # Calculate the true coordinates of our coordinates after filling
            box[:, [0.2]] = box[:, [0.2]]*nw/iw + dx
            box[:, [1.3]] = box[:, [1.3]]*nh/ih + dy
            
            box[:, 0:2][box[:, 0:2] <0] = 0
            box[:, 2][box[:, 2]>w] = w
            box[:, 3][box[:, 3]>h] = h
            box_w = box[:, 2] - box[:, 0]
            box_h = box[:, 3] - box[:, 1]
            box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box

        return image_data, box

    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    # Scale the image and distort the length and width
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    
    (The following are all the methods of image enhancement [random scaling, twisting, flipping, RGB-HSV-RGB, etc.], without detailed interpretation)
    new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
    scale = self.rand(25..2)
    if new_ar < 1:
        nh = int(scale*h)
        nw = int(nh*new_ar)
    else:
        nw = int(scale*w)
        nh = int(nw/new_ar)
    image = image.resize((nw,nh), Image.BICUBIC)

    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    # Add gray bars to the excess parts of the image
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    dx = int(self.rand(0, w-nw))
    dy = int(self.rand(0, h-nh))
    new_image = Image.new('RGB', (w,h), (128.128.128))
    new_image.paste(image, (dx, dy))
    image = new_image

    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    # flip image
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - #
    flip = self.rand()<. 5
    if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)

    image_data      = np.array(image, np.uint8)
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    # Transform the image gamut
    # Calculate the parameters of the gamut transform
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    r               = np.random.uniform(-1.1.3) * [hue, sat, val] + 1
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    # Transfer image to HSV
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
    dtype           = image_data.dtype
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    # Apply transform
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    x       = np.arange(0.256, dtype=r.dtype)
    lut_hue = ((x * r[0]) % 180).astype(dtype)
    lut_sat = np.clip(x * r[1].0.255).astype(dtype)
    lut_val = np.clip(x * r[2].0.255).astype(dtype)

    image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
    image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)

    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    # Adjust the real box
    # -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
    if len(box)>0:
        np.random.shuffle(box)
        box[:, [0.2]] = box[:, [0.2]]*nw/iw + dx
        box[:, [1.3]] = box[:, [1.3]]*nh/ih + dy
        if flip: box[:, [0.2]] = w - box[:, [2.0]]
        box[:, 0:2][box[:, 0:2] <0] = 0
        box[:, 2][box[:, 2]>w] = w
        box[:, 3][box[:, 3]>h] = h
        box_w = box[:, 2] - box[:, 0]
        box_h = box[:, 3] - box[:, 1]
        box = box[np.logical_and(box_w>1, box_h>1)] 

    return image_data, box
Copy the code

Let’s look at the data set of our box before the change

The data after the data enhancement change is

\

YOLOv3 source code precision understanding (seven) YoloDataSet

Interpretation of source code

Training section

Dataloader. Py files

Related Posts

Random forests in ensemble learning

Graph theory for Machine Learning (7) : Connectivity

Intelligent learning lamp auto-shoots frame design