Browse Source

Multi-threaded image caching

5.0
Glenn Jocher 4 years ago
parent
commit
194f16844e
3 changed files with 8 additions and 6 deletions
  1. +1
    -1
      data/scripts/get_voc.sh
  2. +0
    -1
      requirements.txt
  3. +7
    -4
      utils/datasets.py

+ 1
- 1
data/scripts/get_voc.sh View File

runtime=$((end - start)) runtime=$((end - start))
echo "Completed in" $runtime "seconds" echo "Completed in" $runtime "seconds"


echo "Spliting dataset..."
echo "Splitting dataset..."
python3 - "$@" <<END python3 - "$@" <<END
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import pickle import pickle

+ 0
- 1
requirements.txt View File

# pycocotools>=2.0 # pycocotools>=2.0


# export -------------------------------------- # export --------------------------------------
# packaging # for coremltools
# coremltools==4.0 # coremltools==4.0
# onnx>=1.7.0 # onnx>=1.7.0
# scikit-learn==0.19.2 # for coreml quantization # scikit-learn==0.19.2 # for coreml quantization

+ 7
- 4
utils/datasets.py View File

import glob import glob
import math
import os import os
import random import random
import shutil import shutil
import time import time
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
from threading import Thread from threading import Thread


import cv2 import cv2
import math
import numpy as np import numpy as np
import torch import torch
from PIL import Image, ExifTags from PIL import Image, ExifTags
self.imgs = [None] * n self.imgs = [None] * n
if cache_images: if cache_images:
gb = 0 # Gigabytes of cached images gb = 0 # Gigabytes of cached images
pbar = tqdm(range(len(self.img_files)), desc='Caching images')
self.img_hw0, self.img_hw = [None] * n, [None] * n self.img_hw0, self.img_hw = [None] * n, [None] * n
for i in pbar: # max 10k images
self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
results = ThreadPool(8).imap_unordered(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes gb += self.imgs[i].nbytes
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)



Loading…
Cancel
Save