Skip to content

Instantly share code, notes, and snippets.

@kleysonr
Created October 15, 2020 18:39

Revisions

  1. kleysonr created this gist Oct 15, 2020.
    107 changes: 107 additions & 0 deletions split_yolo_dataset.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,107 @@
    import glob
    import os
    from shutil import copyfile

    RATIO_TEST = 0.25
    RATIO_VAL = 0.10

    lista = {
    'frente': [],
    'recibo': []
    }

    def split(dataset):

    nitens = len(dataset)

    ntrain_ = round(nitens * (1-RATIO_TEST))
    ntest = round(nitens * RATIO_TEST)
    nval = round(ntrain_ * RATIO_VAL)
    ntrain = ntrain_ - nval

    train = dataset[0:ntrain]
    val = dataset[ntrain:ntrain+nval]
    test = dataset[ntrain+nval:ntrain+nval+ntest+1]

    print('----------------------------')
    print('Total size: {}'.format(nitens))
    print('Train: {}'.format(len(train)))
    print('Test: {}'.format(len(test)))
    print('Val: {}'.format(len(val)))

    return (train, test, val)

    def move_files(train, test, val):

    for f in train:

    src = os.path.join('images', '{}.png'.format(f))
    dst = os.path.join('images/train', '{}.png'.format(f))
    copyfile(src, dst)
    os.remove(src)

    src = os.path.join('images', '{}.txt'.format(f))
    dst = os.path.join('images/train', '{}.txt'.format(f))
    copyfile(src, dst)
    os.remove(src)

    for f in test:

    src = os.path.join('images', '{}.png'.format(f))
    dst = os.path.join('images/test', '{}.png'.format(f))
    copyfile(src, dst)
    os.remove(src)

    src = os.path.join('images', '{}.txt'.format(f))
    dst = os.path.join('images/test', '{}.txt'.format(f))
    copyfile(src, dst)
    os.remove(src)

    for f in val:

    src = os.path.join('images', '{}.png'.format(f))
    dst = os.path.join('images/val', '{}.png'.format(f))
    copyfile(src, dst)
    os.remove(src)

    src = os.path.join('images', '{}.txt'.format(f))
    dst = os.path.join('images/val', '{}.txt'.format(f))
    copyfile(src, dst)
    os.remove(src)

    def main():

    files = glob.glob('images/*.txt', recursive=False)

    # Gera lista de arquivos para cada classe
    for f in files:

    path = os.path.dirname(f)
    filename = os.path.basename(f)
    (filename, ext) = os.path.splitext(filename)

    if filename == 'classes':
    continue

    # Ler a primeira linha do arquivo
    fline=open(f).readline().strip()

    if fline[0] == '0':
    lista['frente'].append(filename)
    elif fline[0] == '1':
    lista['recibo'].append(filename)

    # Split dataset
    (train, test, val) = split(lista['frente'])
    move_files(train, test, val)

    (train, test, val) = split(lista['recibo'])
    move_files(train, test, val)

    if __name__ == "__main__":

    os.makedirs('images/train', exist_ok=True)
    os.makedirs('images/test', exist_ok=True)
    os.makedirs('images/val', exist_ok=True)

    main()