Browse Source

Multi-threaded image caching

5.0
Glenn Jocher 3 years ago
parent
commit
194f16844e
3 changed files with 8 additions and 6 deletions
  1. +1
    -1
      data/scripts/get_voc.sh
  2. +0
    -1
      requirements.txt
  3. +7
    -4
      utils/datasets.py

+ 1
- 1
data/scripts/get_voc.sh View File

@@ -25,7 +25,7 @@ end=$(date +%s)
runtime=$((end - start))
echo "Completed in" $runtime "seconds"

echo "Spliting dataset..."
echo "Splitting dataset..."
python3 - "$@" <<END
import xml.etree.ElementTree as ET
import pickle

+ 0
- 1
requirements.txt View File

@@ -20,7 +20,6 @@ tqdm>=4.41.0
# pycocotools>=2.0

# export --------------------------------------
# packaging # for coremltools
# coremltools==4.0
# onnx>=1.7.0
# scikit-learn==0.19.2 # for coreml quantization

+ 7
- 4
utils/datasets.py View File

@@ -1,13 +1,15 @@
import glob
import math
import os
import random
import shutil
import time
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from threading import Thread

import cv2
import math
import numpy as np
import torch
from PIL import Image, ExifTags
@@ -474,10 +476,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing
self.imgs = [None] * n
if cache_images:
gb = 0 # Gigabytes of cached images
pbar = tqdm(range(len(self.img_files)), desc='Caching images')
self.img_hw0, self.img_hw = [None] * n, [None] * n
for i in pbar: # max 10k images
self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
results = ThreadPool(8).imap_unordered(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)


Loading…
Cancel
Save