Training your own data with TF object detection API
Tim Chen(motion$) Lv5

System Info

To-dos

Env prepare

Make your own dataset

  • For us, we have the yolo format annotaion files(txt files), but TFRecord format data is fit to the tensorlow.
1
2
3
yolo-2-voc.py
voc-2-csv.py
csv-2-tfrecord.py

yolo to voc

  • Prepare two folders, one for annotation files and the other for the image files. VOC format(xml files) will save into the converted_lanbels folder.
  • manual change your own data label-mappings
  • Notice that the value of (x, y, width, height) are integers .
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    # Script to convert yolo annotations to voc format

    # Sample format
    # <annotation>
    # <folder>_image_fashion</folder>
    # <filename>brooke-cagle-39574.jpg</filename>
    # <size>
    # <width>1200</width>
    # <height>800</height>
    # <depth>3</depth>
    # </size>
    # <segmented>0</segmented>
    # <object>
    # <name>head</name>
    # <pose>Unspecified</pose>
    # <truncated>0</truncated>
    # <difficult>0</difficult>
    # <bndbox>
    # <xmin>549</xmin>
    # <ymin>251</ymin>
    # <xmax>625</xmax>
    # <ymax>335</ymax>
    # </bndbox>
    # </object>
    # <annotation>
    import os
    import xml.etree.cElementTree as ET
    from PIL import Image

    ANNOTATIONS_DIR_PREFIX = "annotations"

    DESTINATION_DIR = "converted_labels"

    CLASS_MAPPING = {
    '0': 'cream_hazelnut',
    '1': 'cream_berry',
    '2': 'cream_cherry',
    '3': 'yida_cool_lemon',
    '4': 'box_yogurt_mango',
    '5': 'white_strawberry',
    '6': 'cookies_lemon',
    '7': 'yogurt_cranberry',
    '8': 'box_cookies_matcha',
    '9': 'cookies_matcha',
    '10': 'yogurt_mango',
    '11': 'white_passionfruit',
    '12': 'yida_cool_litchi',
    '13': 'box_white_strawberry'
    # Add your remaining classes here.
    }


    def create_root(file_prefix, width, height):
    root = ET.Element("annotations")
    ET.SubElement(root, "filename").text = "{}.jpg".format(file_prefix)
    ET.SubElement(root, "folder").text = "images"
    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = "3"
    return root


    def create_object_annotation(root, voc_labels):
    for voc_label in voc_labels:
    obj = ET.SubElement(root, "object")
    ET.SubElement(obj, "name").text = voc_label[0]
    ET.SubElement(obj, "pose").text = "Unspecified"
    ET.SubElement(obj, "truncated").text = str(0)
    ET.SubElement(obj, "difficult").text = str(0)
    bbox = ET.SubElement(obj, "bndbox")
    ET.SubElement(bbox, "xmin").text = str(voc_label[1])
    ET.SubElement(bbox, "ymin").text = str(voc_label[2])
    ET.SubElement(bbox, "xmax").text = str(voc_label[3])
    ET.SubElement(bbox, "ymax").text = str(voc_label[4])
    return root


    def create_file(file_prefix, width, height, voc_labels):
    root = create_root(file_prefix, width, height)
    root = create_object_annotation(root, voc_labels)
    tree = ET.ElementTree(root)
    tree.write("{}/{}.xml".format(DESTINATION_DIR, file_prefix))


    def read_file(file_path):
    file_prefix = file_path.split(".txt")[0]
    image_file_name = "{}.jpg".format(file_prefix)
    img = Image.open("{}/{}".format("images", image_file_name))
    w, h = img.size

    with open("{}/{}".format(ANNOTATIONS_DIR_PREFIX, file_path), 'r') as file:
    lines = file.readlines()
    voc_labels = []
    for line in lines:
    voc = []
    line = line.strip()
    data = line.split()
    voc.append(CLASS_MAPPING.get(data[0]))
    bbox_width = float(data[3]) * w
    bbox_height = float(data[4]) * h
    center_x = float(data[1]) * w
    center_y = float(data[2]) * h
    voc.append(int(center_x - (bbox_width / 2)))
    voc.append(int(center_y - (bbox_height / 2)))
    voc.append(int(center_x + (bbox_width / 2)))
    voc.append(int(center_y + (bbox_height / 2)))
    voc_labels.append(voc)
    create_file(file_prefix, w, h, voc_labels)
    print("Processing complete for file: {}/{}".format(ANNOTATIONS_DIR_PREFIX, file_path))


    def start():
    if not os.path.exists(DESTINATION_DIR):
    os.makedirs(DESTINATION_DIR)
    for filename in os.listdir(ANNOTATIONS_DIR_PREFIX):
    if filename.endswith('txt'):
    read_file(filename)
    else:
    print("Skipping file: {}".format(filename))


    if __name__ == "__main__":
    start()

train test split on xml files

  • You can change the percentage to split the dataset manually.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    import os
    import random
    import time
    import shutil

    xmlfilepath = r'./Annotations'
    saveBasePath = r"./"

    trainval_percent = 0.8
    train_percent = 0.8
    total_xml = os.listdir(xmlfilepath)
    num = len(total_xml)
    list = range(num)
    tv = int(num * trainval_percent)
    tr = int(tv * train_percent)
    trainval = random.sample(list, tv)
    train = random.sample(trainval, tr)
    print("train and val size", tv)
    print("train size", tr)

    start = time.time()

    test_num = 0
    val_num = 0
    train_num = 0
    print('total xml : {}'.format(total_xml))

    for i in list:
    name = total_xml[i]
    # print('name : {}'.format(name))
    if i in trainval: # train and val set
    if i in train:
    directory = "train"
    train_num += 1
    xml_path = os.path.join(os.getcwd(), '{}'.format(directory))
    if (not os.path.exists(xml_path)):
    os.mkdir(xml_path)
    filePath = os.path.join(xmlfilepath, name)
    newfile = os.path.join(saveBasePath, os.path.join(directory, name))
    # print('newfile : {}'.format(newfile))
    shutil.copyfile(filePath, newfile)
    else:
    directory = "validation"
    xml_path = os.path.join(os.getcwd(), '{}'.format(directory))
    if (not os.path.exists(xml_path)):
    os.mkdir(xml_path)
    val_num += 1
    filePath = os.path.join(xmlfilepath, name)
    newfile = os.path.join(saveBasePath, os.path.join(directory, name))
    # print('newfile : {}'.format(newfile))
    shutil.copyfile(filePath, newfile)
    else:
    directory = "test"
    xml_path = os.path.join(os.getcwd(), '{}'.format(directory))
    if (not os.path.exists(xml_path)):
    os.mkdir(xml_path)
    test_num += 1
    filePath = os.path.join(xmlfilepath, name)
    newfile = os.path.join(saveBasePath, os.path.join(directory, name))
    # print('name : {}'.format(name))
    shutil.copyfile(filePath, newfile)

    end = time.time()
    seconds = end - start
    print("train total : " + str(train_num))
    print("validation total : " + str(val_num))
    print("test total : " + str(test_num))
    total_num = train_num + val_num + test_num
    print("total number : " + str(total_num))
    print("Time taken : {0} seconds".format(seconds))

voc to csv

  • Transfer the xml files to csv for trian, test and validation folder individually.
  • You should change the save path for your own csv files.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    import os
    import glob
    import pandas as pd
    import xml.etree.ElementTree as ET

    def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    print(root.find('filename').text)
    for member in root.findall('object'):
    value = (root.find('filename').text,
    int(root.find('size')[0].text), #width
    int(root.find('size')[1].text), #height
    member[0].text,
    int(member[4][0].text),
    int(float(member[4][1].text)),
    int(member[4][2].text),
    int(member[4][3].text)
    )
    xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

    def main():
    for directory in ['train','test','validation']:
    xml_path = os.path.join(os.getcwd(), './{}'.format(directory))

    xml_df = xml_to_csv(xml_path)
    # xml_df.to_csv('whsyxt.csv', index=None)
    xml_df.to_csv('/home/tim/workspace/models/research/object_detection/data/dove_cholo_{}_labels.csv'.format(directory), index=None)
    print('Successfully converted xml to csv.')

    main()

csv to tfrecord

  • You should set your JPEGImage path.
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Created on Tue Mar 5 15:28:55 2019

    @author: z
    """

    """
    Usage:
    # From tensorflow/models/
    # Create train data:
    python generate_tfrecord.py --csv_input=data/tv_vehicle_labels.csv --output_path=train.record
    # Create test data:
    python generate_tfrecord.py --csv_input=data/test_labels.csv --output_path=test.record
    """

    import os
    import io
    import pandas as pd
    import tensorflow as tf

    from PIL import Image
    from object_detection.utils import dataset_util
    from collections import namedtuple, OrderedDict

    os.chdir('/home/tim/workspace/models/research/')

    flags = tf.app.flags
    flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
    flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
    FLAGS = flags.FLAGS


    # TO-DO replace this with label map
    def class_text_to_int(row_label):
    # 你的所有类别, 必须从1开始,0被征用作为了背景。
    if row_label == 'cream_berry':
    return 1
    elif row_label == 'cream_cherry':
    return 2
    elif row_label == 'yida_cool_lemon':
    return 3
    elif row_label == 'box_yogurt_mango':
    return 4
    elif row_label == 'white_strawberry':
    return 5
    elif row_label == 'cookies_lemon':
    return 6
    elif row_label == 'yogurt_cranberry':
    return 7
    elif row_label == 'box_cookies_matcha':
    return 8
    elif row_label == 'cookies_matcha':
    return 9
    elif row_label == 'yogurt_mango':
    return 10
    elif row_label == 'white_passionfruit':
    return 11
    elif row_label == 'yida_cool_litchi':
    return 12
    elif row_label == 'box_white_strawberry':
    return 13
    elif row_label == 'cream_hazelnut':
    return 14
    else:
    return None


    def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


    def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
    encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
    xmins.append(row['xmin'] / width)
    xmaxs.append(row['xmax'] / width)
    ymins.append(row['ymin'] / height)
    ymaxs.append(row['ymax'] / height)
    classes_text.append(row['class'].encode('utf8'))
    classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
    'image/height': dataset_util.int64_feature(height),
    'image/width': dataset_util.int64_feature(width),
    'image/filename': dataset_util.bytes_feature(filename),
    'image/source_id': dataset_util.bytes_feature(filename),
    'image/encoded': dataset_util.bytes_feature(encoded_jpg),
    'image/format': dataset_util.bytes_feature(image_format),
    'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
    'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
    'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
    'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
    'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
    'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


    def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(os.getcwd(), 'object_detection/VOCdevkit/VOC2020_dove_cholo/JPEGImages/')
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, 'filename')
    num = 0
    for group in grouped:
    num += 1
    tf_example = create_tf_example(group, path)
    writer.write(tf_example.SerializeToString())
    if (num % 100 == 0): # 每完成100个转换,打印一次
    print(num)

    writer.close()
    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(output_path))


    if __name__ == '__main__':
    tf.app.run()
  • command to generate tfrecord files
    1
    2
    3
    4
    5
    6
    7
    cd models/research/

    python generate_tfrecord.py --csv_input=object_detection/data/dove_cholo_test_labels.csv --output_path=dove_test.tfrecord

    python generate_tfrecord.py --csv_input=object_detection/data/dove_cholo_validation_labels.csv --output_path=dove_validation.tfrecord

    python generate_tfrecord.py --csv_input=object_detection/data/dove_cholo_train_labels.csv --output_path=dove_train.tfrecord

Training model

  • Things to prepare
    • create your own label-map.pbtxt
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      cd models/research/object_detection/data
      create label-map.pbtxt
      contents are belows


      item {
      id: 1 # id 从1开始编号
      name: 'red pedestrian'
      }

      item {
      id: 2
      name: 'green pedestrian'
      }
    • model config file list
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      cd object_detection/samples/config/

      (base) tim@tim-System-Product-Name:~/workspace/models/research/object_detection/samples/configs$ tree
      .
      ├── embedded_ssd_mobilenet_v1_coco.config
      ├── facessd_mobilenet_v2_quantized_320x320_open_image_v4.config
      ├── faster_rcnn_inception_resnet_v2_atrous_coco.config
      ├── faster_rcnn_inception_resnet_v2_atrous_cosine_lr_coco.config
      ├── faster_rcnn_inception_resnet_v2_atrous_oid.config
      ├── faster_rcnn_inception_resnet_v2_atrous_oid_v4.config
      ├── faster_rcnn_inception_resnet_v2_atrous_pets.config
      ├── faster_rcnn_inception_v2_coco.config
      ├── faster_rcnn_inception_v2_pets.config
      ├── faster_rcnn_nas_coco.config
      ├── faster_rcnn_resnet101_atrous_coco.config
      ├── faster_rcnn_resnet101_ava_v2.1.config
      ├── faster_rcnn_resnet101_coco.config
      ├── faster_rcnn_resnet101_fgvc.config
      ├── faster_rcnn_resnet101_kitti.config
      ├── faster_rcnn_resnet101_pets.config
      ├── faster_rcnn_resnet101_voc07.config
      ├── faster_rcnn_resnet152_coco.config
      ├── faster_rcnn_resnet152_pets.config
      ├── faster_rcnn_resnet50_coco.config
      ├── faster_rcnn_resnet50_fgvc.config
      ├── faster_rcnn_resnet50_pets.config
      ├── mask_rcnn_inception_resnet_v2_atrous_coco.config
      ├── mask_rcnn_inception_v2_coco.config
      ├── mask_rcnn_resnet101_atrous_coco.config
      ├── mask_rcnn_resnet101_pets.config
      ├── mask_rcnn_resnet50_atrous_coco.config
      ├── rfcn_resnet101_coco.config
      ├── rfcn_resnet101_pets.config
      ├── ssd_inception_v2_coco.config
      ├── ssd_inception_v2_pets.config
      ├── ssd_inception_v3_pets.config
      ├── ssdlite_mobilenet_edgetpu_320x320_coco.config
      ├── ssdlite_mobilenet_edgetpu_320x320_coco_quant.config
      ├── ssdlite_mobilenet_v1_coco.config
      ├── ssdlite_mobilenet_v2_coco.config
      ├── ssdlite_mobilenet_v3_large_320x320_coco.config
      ├── ssdlite_mobilenet_v3_small_320x320_coco.config
      ├── ssd_mobilenet_v1_0.75_depth_300x300_coco14_sync.config
      ├── ssd_mobilenet_v1_0.75_depth_quantized_300x300_coco14_sync.config
      ├── ssd_mobilenet_v1_0.75_depth_quantized_300x300_pets_sync.config
      ├── ssd_mobilenet_v1_300x300_coco14_sync.config
      ├── ssd_mobilenet_v1_coco.config
      ├── ssd_mobilenet_v1_focal_loss_pets.config
      ├── ssd_mobilenet_v1_focal_loss_pets_inference.config
      ├── ssd_mobilenet_v1_fpn_shared_box_predictor_640x640_coco14_sync.config
      ├── ssd_mobilenet_v1_pets.config
      ├── ssd_mobilenet_v1_ppn_shared_box_predictor_300x300_coco14_sync.config
      ├── ssd_mobilenet_v1_quantized_300x300_coco14_sync.config
      ├── ssd_mobilenet_v2_coco.config
      ├── ssd_mobilenet_v2_fpnlite_quantized_shared_box_predictor_256x256_depthmultiplier_75_coco14_sync.config
      ├── ssd_mobilenet_v2_fullyconv_coco.config
      ├── ssd_mobilenet_v2_oid_v4.config
      ├── ssd_mobilenet_v2_pets_keras.config
      ├── ssd_mobilenet_v2_quantized_300x300_coco.config
      ├── ssd_resnet101_v1_fpn_shared_box_predictor_oid_512x512_sync.config
      └── ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync.config

      0 directories, 57 files

    • Custom your own model config, ssd_moblienet_v1_coco.config for example
    • Open it and change the code.
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
      100
      101
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
      112
      113
      114
      115
      116
      117
      118
      119
      120
      121
      122
      123
      124
      125
      126
      127
      128
      129
      130
      131
      132
      133
      134
      135
      136
      137
      138
      139
      140
      141
      142
      143
      144
      145
      146
      147
      148
      149
      150
      151
      152
      153
      154
      155
      156
      157
      158
      159
      160
      161
      162
      163
      164
      165
      166
      167
      168
      169
      170
      171
      172
      173
      174
      175
      176
      177
      178
      179
      180
      181
      182
      183
      184
      185
      186
      187
      188
      189
      190
      191
      192
      193
      194
      # SSD with Mobilenet v1 configuration for MSCOCO Dataset.
      # Users should configure the fine_tune_checkpoint field in the train config as
      # well as the label_map_path and input_path fields in the train_input_reader and
      # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
      # should be configured.

      model {
      ssd {
      num_classes: 14 ## change here
      box_coder {
      faster_rcnn_box_coder {
      y_scale: 10.0
      x_scale: 10.0
      height_scale: 5.0
      width_scale: 5.0
      }
      }
      matcher {
      argmax_matcher {
      matched_threshold: 0.5
      unmatched_threshold: 0.5
      ignore_thresholds: false
      negatives_lower_than_unmatched: true
      force_match_for_each_row: true
      }
      }
      similarity_calculator {
      iou_similarity {
      }
      }
      anchor_generator {
      ssd_anchor_generator {
      num_layers: 6
      min_scale: 0.2
      max_scale: 0.95
      aspect_ratios: 1.0
      aspect_ratios: 2.0
      aspect_ratios: 0.5
      aspect_ratios: 3.0
      aspect_ratios: 0.3333
      }
      }
      image_resizer {
      fixed_shape_resizer {
      height: 300
      width: 300
      }
      }
      box_predictor {
      convolutional_box_predictor {
      min_depth: 0
      max_depth: 0
      num_layers_before_predictor: 0
      use_dropout: false
      dropout_keep_probability: 0.8
      kernel_size: 1
      box_code_size: 4
      apply_sigmoid_to_scores: false
      conv_hyperparams {
      activation: RELU_6,
      regularizer {
      l2_regularizer {
      weight: 0.00004
      }
      }
      initializer {
      truncated_normal_initializer {
      stddev: 0.03
      mean: 0.0
      }
      }
      batch_norm {
      train: true,
      scale: true,
      center: true,
      decay: 0.9997,
      epsilon: 0.001,
      }
      }
      }
      }
      feature_extractor {
      type: 'ssd_mobilenet_v1'
      min_depth: 16
      depth_multiplier: 1.0
      conv_hyperparams {
      activation: RELU_6,
      regularizer {
      l2_regularizer {
      weight: 0.00004
      }
      }
      initializer {
      truncated_normal_initializer {
      stddev: 0.03
      mean: 0.0
      }
      }
      batch_norm {
      train: true,
      scale: true,
      center: true,
      decay: 0.9997,
      epsilon: 0.001,
      }
      }
      }
      loss {
      classification_loss {
      weighted_sigmoid {
      }
      }
      localization_loss {
      weighted_smooth_l1 {
      }
      }
      hard_example_miner {
      num_hard_examples: 3000
      iou_threshold: 0.99
      loss_type: CLASSIFICATION
      max_negatives_per_positive: 3
      min_negatives_per_image: 0
      }
      classification_weight: 1.0
      localization_weight: 1.0
      }
      normalize_loss_by_num_matches: true
      post_processing {
      batch_non_max_suppression {
      score_threshold: 1e-8
      iou_threshold: 0.6
      max_detections_per_class: 100
      max_total_detections: 100
      }
      score_converter: SIGMOID
      }
      }
      }

      train_config: {
      batch_size: 24 ## change here
      optimizer {
      rms_prop_optimizer: {
      learning_rate: {
      exponential_decay_learning_rate {
      initial_learning_rate: 0.0004
      decay_steps: 800720
      decay_factor: 0.95
      }
      }
      momentum_optimizer_value: 0.9
      decay: 0.9
      epsilon: 1.0
      }
      }
      fine_tune_checkpoint: "object_detection/finetune_cpkt/ssd_mobilenet_v1_coco_2018_01_28/model.ckpt" ## change here
      from_detection_checkpoint: true
      # Note: The below line limits the training process to 200K steps, which we
      # empirically found to be sufficient enough to train the pets dataset. This
      # effectively bypasses the learning rate schedule (the learning rate will
      # never decay). Remove the below line to train indefinitely.
      num_steps: 10000 ## change here
      data_augmentation_options {
      random_horizontal_flip {
      }
      }
      data_augmentation_options {
      ssd_random_crop {
      }
      }
      }

      train_input_reader: {
      tf_record_input_reader {
      input_path: "object_detection/data/dove_train.tfrecord" ## change here
      }
      label_map_path: "object_detection/data/dove_cholo_label_map.pbtxt" ## change here
      }

      eval_config: {
      num_examples: 3438 ## change here
      # Note: The below line limits the evaluation process to 10 evaluations.
      # Remove the below line to evaluate indefinitely.
      max_evals: 10
      }

      eval_input_reader: {
      tf_record_input_reader {
      input_path: "object_detection/data/dove_validation.tfrecord" ## change here
      }
      label_map_path: "object_detection/data/dove_cholo_label_map.pbtxt" ## change here
      shuffle: false
      num_readers: 1
      }
    • Download the pre-trained model
      modle zoo
    • fine_tune_checkpoint: “object_detection/finetune_cpkt/ssd_mobilenet_v1_coco_2018_01_28/model.ckpt” ## change here

legacy training (同时跑train.py和eval.py)

  • 旧的训练方法,path, /models/research/object_detection/legacy/train.py

  • 旧的训练方法,path, /models/research/object_detection/legacy/eval.py

  • ref-eval的使用

  • –logtostderr, 日志保存

  • –train_dir, 训练模型保存的位置

  • –pipeline_config_path, 模型配置文件的路径

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    ## 可用GPU训练,但常常会cuda out of memory
    ## 先在trian.py和eval.py中加入以下代码控制gpu的内存使用

    import os

    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    config = tf.ConfigProto(allow_soft_placement = True)
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.35)
    config.gpu_options.allow_growth = True

    sess0 = tf.InteractiveSession(config = config)

    # 原文链接:https://blog.csdn.net/baidu_33597755/article/details/102311000

    cd models/research/

    python object_detection/legacy/train.py \
    --pipeline_config_path=object_detection/dove_cholo_od/config/ssd_mobilenet_v2_coco.config \
    --train_dir=object_detection/dove_cholo_od/dove_train_dir/ssd_m_v2/dove_train \
    --alsologtostderr

    # 等train.py跑了一会之后,再运行eval.py

    python object_detection/legacy/eval.py \
    --pipeline_config_path=object_detection/dove_cholo_od/config/ssd_mobilenet_v2_coco.config \
    --checkpoint_dir=object_detection/dove_cholo_od/dove_train_dir/ssd_m_v2/dove_train \
    --eval_dir=object_detection/dove_cholo_od/dove_train_dir/ssd_m_v2/dove_eval \
    --logtostderr

  • Then open the tensorboard to watch the training and eval progress

  • Open two tensorboard at the same time

1
2
3
tensorboard --logdir=object_detection/dove_cholo_od/dove_train_dir/ssd_m_v2/dove_train --port=6005

tensorboard --logdir=object_detection/dove_cholo_od/dove_train_dir/ssd_m_v2/dove_eval

modern training(暂不支持GPU)

  • 新的训练方法,path, /models/research/object_detection/model_main.py
    1
    2
    3
    4
    5
    6
    7
    8

    # From the tensorflow/models/research/ directory
    python object_detection/model_main.py \
    --pipeline_config_path=object_detection/training/ssd_mobilenet_v1_coco.config \
    --model_dir=object_detection/training \
    --num_train_steps=50000 \
    --num_eval_steps=2000 \
    --alsologtostderr

Model evaluation

Q&A

1
2
3
4
1. ImportError: cannot import name 'input_reader_pb2' from 'object_detection.protos'
solution:
# From tensorflow/models/research/
protoc object_detection/protos/*.proto --python_out=.
1
2
3
4
5
6
7
8
9
10
2. from nets import inception_resnet_v2 ModuleNotFoundError: No module named 'nets'
solution:
cd model/research/
python setup.py build
python setup.py install


cd model/research/slim/
python setup.py build
python setup.py install
1
2
3
4
3. Not found: PATH_TO_BE_CONFIGURED; No such file or directory
solution:
download pre-trained cpkt model
go into the config file and Search for "PATH_TO_BE_CONFIGURED" to find the fields that should be configured.
1
2
4. No module named 'pycocotools'
pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
 评论