浏览代码

Autosplit (#1488)

5.0
Glenn Jocher GitHub 4 年前
父节点
当前提交
4798e66fdf
找不到此签名对应的密钥 GPG 密钥 ID: 4AEE18F83AFDEB23
共有 1 个文件被更改,包括 17 次插入0 次删除
  1. +17
    -0
      utils/datasets.py

+ 17
- 0
utils/datasets.py 查看文件

@@ -902,3 +902,20 @@ def flatten_recursive(path='../coco128'):
create_folder(new_path)
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
shutil.copyfile(file, new_path / Path(file).name)


def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0)): # from utils.datasets import *; autosplit()
""" Autosplit a dataset into train/val/test splits and save *.txt files
# Arguments
path: Path to images directory
weights: Train, val, test weights (list)
"""
path = Path(path) # images dir
files = list(path.rglob('*.*'))
indices = random.choices([0, 1, 2], weights=weights, k=len(files)) # assign each image to a split
txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
[(path / x).unlink() for x in txt if (path / x).exists()] # remove existing
for i, img in tqdm(zip(indices, files)):
if img.suffix[1:] in img_formats:
with open(path / txt[i], 'a') as f:
f.write(str(img) + '\n') # add image to txt file

正在加载...
取消
保存