diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh index 5e488c8..6bdaa9b 100644 --- a/data/scripts/get_voc.sh +++ b/data/scripts/get_voc.sh @@ -25,7 +25,7 @@ end=$(date +%s) runtime=$((end - start)) echo "Completed in" $runtime "seconds" -echo "Spliting dataset..." +echo "Splitting dataset..." python3 - "$@" <=4.41.0 # pycocotools>=2.0 # export -------------------------------------- -# packaging # for coremltools # coremltools==4.0 # onnx>=1.7.0 # scikit-learn==0.19.2 # for coreml quantization diff --git a/utils/datasets.py b/utils/datasets.py index 18ad02b..45b950e 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -1,13 +1,15 @@ import glob +import math import os import random import shutil import time +from itertools import repeat +from multiprocessing.pool import ThreadPool from pathlib import Path from threading import Thread import cv2 -import math import numpy as np import torch from PIL import Image, ExifTags @@ -474,10 +476,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing self.imgs = [None] * n if cache_images: gb = 0 # Gigabytes of cached images - pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n - for i in pbar: # max 10k images - self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized + results = ThreadPool(8).imap_unordered(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads + pbar = tqdm(enumerate(results), total=n) + for i, x in pbar: + self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i) gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)