yolov3 keras版本train.py各函数解析

发布时间：2023-11-25 14:00

main（）主要作用：

①读取信息：训练用的txt、类别、anchor

②判断是否是tiny

③权重保存格式

④样本数据按比例划分

⑤两阶段训练

⑥保存权重

def _main():
    annotation_path = 'train.txt'
    log_dir = 'logs/000/'
    classes_path = 'model_data/voc_classes.txt'
    anchors_path = 'model_data/yolo_anchors.txt'
    class_names = get_classes(classes_path)
    num_classes = len(class_names)
    anchors = get_anchors(anchors_path)

    input_shape = (416,416) # multiple of 32, hw

    is_tiny_version = len(anchors)==6 # default setting
    if is_tiny_version:
        model = create_tiny_model(input_shape, anchors, num_classes,
            freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5')
    else:
        model = create_model(input_shape, anchors, num_classes,
            freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze
    #该回调函数将日志信息写入TensorBorad，使得你可以动态的观察训练和测试指标的图像以及不同层的激活值直方图。该回调函数将在每个epoch后保存模型到filepath
    logging = TensorBoard(log_dir=log_dir)   #TensorBoard可视化工具
    
    #save_weights_only只存储权重，save_best_only只存储最优结果,
    #每隔3个epoch存储一次
    checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
        monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)   #降低学习率，每次0.1，当学习率三次未减少，就停止。verbose为1，显示进度条
    early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)  #验证集准确率，连续增加小于（min_delta）时，连续10个epoch，则终止训练
    #样本数量：数据被拆10份
    val_split = 0.1 #训练集和验证集比例
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines)*val_split)   #验证数
    num_train = len(lines) - num_val  #训练数

    # Train with frozen layers first, to get a stable loss.
    # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.


    #第一阶段，冻结部分网络，只训练底层权重
    if True:
        model.compile(optimizer=Adam(lr=1e-3), loss={
            'yolo_loss': lambda y_true, y_pred: y_pred})  #损失函数，使用定制的 yolo_loss Lambda层，Adam是基于一阶梯度来优化随机目标函数的算法，optimizer是优化器
        batch_size = 32
        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
        #模型fit数据，使用数据生成包装器（data_generator_wrapper），按批次生成训练和验证数据。最终，模型model存储权重。
        model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),  #data_generator_wrapper为数据读取
                steps_per_epoch=max(1, num_train//batch_size),
                validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
                validation_steps=max(1, num_val//batch_size),
                epochs=50,
                initial_epoch=0,
                callbacks=[logging, checkpoint])  #callbacks这个list的回调函数将会在训练过程中的适当时机被调用，日志记录，检查点
        model.save_weights(log_dir + 'trained_weights_stage_1.h5')  #保存最终权重

    # Unfreeze and continue training, to fine-tune.
    # Train longer if the result is not good.

    #第二阶段，使用第一阶段以训练完成的网络权重继续训练：
    #将全部的权重都设置为可训练，而在第一阶段中，则是冻结部分权重
    #优化器不变，但是学习率下降
    if True:
        for i in range(len(model.layers)):
            model.layers[i].trainable = True  #全部层
        model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change损失函数不变
        print('Unfreeze all of the layers.')

        batch_size = 32 # note that more GPU memory is required after unfreezing the body
        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
        model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
            steps_per_epoch=max(1, num_train//batch_size),
            validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
            validation_steps=max(1, num_val//batch_size),
            epochs=100,
            initial_epoch=50,
            callbacks=[logging, checkpoint, reduce_lr, early_stopping])
            #reduce_lr;当评价指标不在提升时，减少学习率，每次减少10%（factor），当学习率3次未减少（patience）时，终止训练。
            #early_stopping：验证集损失值，连续增加小于0时，持续10个epoch，则终止训练。


        model.save_weights(log_dir + 'trained_weights_final.h5')

get_classes(classes_path)读取每行类别，返回类别名。

def get_classes(classes_path):
    '''loads the classes'''
    with open(classes_path) as f:
        class_names = f.readlines()   #读取文件中的所有行，并以列表的方式返回它们
    class_names = [c.strip() for c in class_names]   #strip将字符串开头和末尾的空白（但不包括中间的空白）删除，并返回删除后的结果。  此行作用：每读一行去掉一次
    return class_names

 get_anchors(anchors_path):读取anchor信息。

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors) #两列

create_model（）：

1、拆分图片尺寸的宽h和高w；
2、创建图片的输入层image_input。在输入层中，既可显式指定图片尺寸，如(416, 416, 3)，也可隐式指定，用“?”代替，如(?, ?, 3)；
3、计算anchor的数量num_anchors；
4、根据anchor的数量，创建真值y_true的输入格式。

参数：

input_shape：输入图尺寸（416,416）
anchors：默认9种anchor，结构（9,2）
num_classes：类别个数
load_pretrained：是否使用预训练权重，预训练权重，既可以产生更好的效果，也可以加快模型的训练速度；
freeze_body：冻结模式，1或2。其中，1是冻结DarkNet53网络中的层，2是只保留最后3个1x1的卷积层，其余层全部冻结；
weights_path：预训练权重的读取路径

def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
            weights_path='model_data/yolo_weights.h5'):
    '''create the training model'''
    K.clear_session() # get a new session  清除session
    image_input = Input(shape=(None, None, 3))   #图片输入的格式
    h, w = input_shape   #尺寸   416
    num_anchors = len(anchors)    #anchor数量

    #YOLO的三种尺度，每个尺度的anchor数，类别数+边框4个+置信度1，“//”整除，y_true就是Ground Truth，用循环创建3个input层的列表，为作为y_true
    y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
        num_anchors//3, num_classes+5)) for l in range(3)]     #默认参数下：y_true[l]的shape为（batch,H,W,3,num_classes+5)
    """
    y_true的张量结构:
    Tensor("input_2:0", shape=(?, 13, 13, 3, 6), dtype=float32)
    Tensor("input_3:0", shape=(?, 26, 26, 3, 6), dtype=float32)
    Tensor("input_4:0", shape=(?, 52, 52, 3, 6), dtype=float32)
    其中，在真值y_true中，第1位是输入的样本数，第2~3位是特征图的尺寸，如13x13，
    第4位是每个图中的anchor数，第5位是：类别(n)+4个框值(xywh)+框的置信度(是否含有物体)
    """

    """
    image_input：输入尺寸
    num_anchors//3：每个尺度的anchor数
    num_classes：类别数
    """
    model_body = yolo_body(image_input, num_anchors//3, num_classes)   #yolo_body是核心
    """
    在model_body中，最终的输入是image_input，最终的输出是3个矩阵的列表：

    [(?, 13, 13, 18), (?, 26, 26, 18), (?, 52, 52, 18)]
    """
    print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))

    #加载预训练的逻辑块
    if load_pretrained:
        model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)  #略过不匹配skip_mismatch；
        print('Load weights {}.'.format(weights_path))
        if freeze_body in [1, 2]:
            # Freeze darknet53 body or freeze all but 3 output layers.
            #选择冻结模式：模式1是冻结185层，模式2是保留最底部3层，其余全部冻结。
            #整个模型共有252层；将所冻结的层，设置为不可训练，trainable=False；
            num = (185, len(model_body.layers)-3)[freeze_body-1]
            for i in range(num): model_body.layers[i].trainable = False  #将其它层训练关闭
            print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
        """
        185层是DarkNet53网络的层数，而最底部3层是3个1x1的卷积层，用于预测最终结果。185层是DarkNet53网络的最后一个残差单元，其输入和输出如下:
        input: [(None, 13, 13, 1024), (None, 13, 13, 1024)]
        output: (None, 13, 13, 1024)
        
        最底部3个1x1的卷积层，将3个特征矩阵转换为3个预测矩阵，其格式如下：

        1: (None, 13, 13, 1024) -> (None, 13, 13, 18)
        2: (None, 26, 26, 512) -> (None, 26, 26, 18)
        3: (None, 52, 52, 256) -> (None, 52, 52, 18)
        """
"""
        下一步，构建模型的损失层model_loss，其内容如下：
        1、Lambda是Keras的自定义层，输入为model_body.output和y_true，输出output_shape是(1,)，即一个损失值；
        2、自定义Lambda层的名字name为yolo_loss；
        3、层的参数是锚框列表anchors、类别数num_classes和IoU阈值ignore_thresh。其中，ignore_thresh用于在物体置信度损失中过滤IoU较小的框；
        4、yolo_loss是损失函数的核心逻辑。
        """
    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
        [*model_body.output, *y_true])
    model = Model([model_body.input, *y_true], model_loss)  #模型

    return model

tiny略过。

data_generator（）：生成数据格式，将boxes信息以及他们匹配的anchors、置信度信息、类别信息保存到y_true中，即label标签的制作。

#数据生成
#annotation_lines指读入的图片地址，也可能是图片信息，也就是框、类等信息
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
    #按照batchsize大小读取数据，并打乱顺序送入到get_random_data函数，将得到的图像和标注信息转换为numpy格式，将得到的标注信息送入到preprocess_true_boxes行处理。
    '''data generator for fit_generator'''
    n = len(annotation_lines)
    i = 0
    while True:
        image_data = []
        box_data = []
        for b in range(batch_size):
            if i==0:
                np.random.shuffle(annotation_lines)
            #get_random_data处理标注数据，限制最大框为20，同时也方便了拼接操作
            image, box = get_random_data(annotation_lines[i], input_shape, random=True)  #get_random_data 函数定义位置：model.py的165行
            image_data.append(image)  #会在数组后面加上相应元素
            box_data.append(box)
            i = (i+1) % n
        image_data = np.array(image_data)
        box_data = np.array(box_data)
        #将boxs信息及与他们匹配的anchors，置信度信息，类别信息保存到y_true中，即label标签的制作。
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) #preprocess_true_boxes yolo3文件夹下model.py 的232行
        yield [image_data, *y_true], np.zeros(batch_size)

data_generator_wrapper（）：读取数据长度，判断是否为0，调用data_generator函数。

##取读取数据的长度并判断长度是否为0，调用data_generator函数
def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
    n = len(annotation_lines)
    if n==0 or batch_size<=0: return None
    return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)  #data_generator

yolov3 keras版本train.py各函数解析

相关推荐