Add `DATASETS_DIR` global in general.py (#6578)

This commit is contained in:
Glenn Jocher 2022-02-08 22:20:44 +01:00 committed by GitHub
parent f40854b61b
commit 9c513ca629
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 8 deletions

View File

@ -27,7 +27,7 @@ from torch.utils.data import DataLoader, Dataset, dataloader, distributed
from tqdm import tqdm from tqdm import tqdm
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
from utils.general import (LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str, from utils.general import (DATASETS_DIR, LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn) segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
from utils.torch_utils import torch_distributed_zero_first from utils.torch_utils import torch_distributed_zero_first
@ -817,15 +817,15 @@ def create_folder(path='./new'):
os.makedirs(path) # make new output folder os.makedirs(path) # make new output folder
def flatten_recursive(path='../datasets/coco128'): def flatten_recursive(path=DATASETS_DIR / 'coco128'):
# Flatten a recursive directory by bringing all files to top level # Flatten a recursive directory by bringing all files to top level
new_path = Path(path + '_flat') new_path = Path(str(path) + '_flat')
create_folder(new_path) create_folder(new_path)
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)): for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
shutil.copyfile(file, new_path / Path(file).name) shutil.copyfile(file, new_path / Path(file).name)
def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *; extract_boxes() def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.datasets import *; extract_boxes()
# Convert detection dataset into classification dataset, with one directory per class # Convert detection dataset into classification dataset, with one directory per class
path = Path(path) # images dir path = Path(path) # images dir
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
@ -859,7 +859,7 @@ def extract_boxes(path='../datasets/coco128'): # from utils.datasets import *;
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False): def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0.0), annotated_only=False):
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files """ Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
Usage: from utils.datasets import *; autosplit() Usage: from utils.datasets import *; autosplit()
Arguments Arguments
@ -939,7 +939,7 @@ def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profil
""" Return dataset statistics dictionary with images and instances counts per split per class """ Return dataset statistics dictionary with images and instances counts per split per class
To run in parent directory: export PYTHONPATH="$PWD/yolov5" To run in parent directory: export PYTHONPATH="$PWD/yolov5"
Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True) Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True)
Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip') Usage2: from utils.datasets import *; dataset_stats('path/to/coco128_with_yaml.zip')
Arguments Arguments
path: Path to data.yaml or data.zip (with data.yaml inside data.zip) path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
autodownload: Attempt to download dataset if not found locally autodownload: Attempt to download dataset if not found locally

View File

@ -35,6 +35,7 @@ from utils.metrics import box_iou, fitness
# Settings # Settings
FILE = Path(__file__).resolve() FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory ROOT = FILE.parents[1] # YOLOv5 root directory
DATASETS_DIR = ROOT.parent / 'datasets' # YOLOv5 datasets directory
NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads NUM_THREADS = min(8, max(1, os.cpu_count() - 1)) # number of YOLOv5 multiprocessing threads
VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode VERBOSE = str(os.getenv('YOLOv5_VERBOSE', True)).lower() == 'true' # global verbose mode
FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf FONT = 'Arial.ttf' # https://ultralytics.com/assets/Arial.ttf
@ -398,8 +399,8 @@ def check_dataset(data, autodownload=True):
# Download (optional) # Download (optional)
extract_dir = '' extract_dir = ''
if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip if isinstance(data, (str, Path)) and str(data).endswith('.zip'): # i.e. gs://bucket/dir/coco128.zip
download(data, dir='../datasets', unzip=True, delete=False, curl=False, threads=1) download(data, dir=DATASETS_DIR, unzip=True, delete=False, curl=False, threads=1)
data = next((Path('../datasets') / Path(data).stem).rglob('*.yaml')) data = next((DATASETS_DIR / Path(data).stem).rglob('*.yaml'))
extract_dir, autodownload = data.parent, False extract_dir, autodownload = data.parent, False
# Read yaml (optional) # Read yaml (optional)