浏览代码

Multi-threaded image caching

5.0
Glenn Jocher 3 年前
父节点
当前提交
194f16844e
共有 3 个文件被更改,包括 8 次插入6 次删除
  1. +1
    -1
      data/scripts/get_voc.sh
  2. +0
    -1
      requirements.txt
  3. +7
    -4
      utils/datasets.py

+ 1
- 1
data/scripts/get_voc.sh 查看文件

@@ -25,7 +25,7 @@ end=$(date +%s)
runtime=$((end - start))
echo "Completed in" $runtime "seconds"

echo "Spliting dataset..."
echo "Splitting dataset..."
python3 - "$@" <<END
import xml.etree.ElementTree as ET
import pickle

+ 0
- 1
requirements.txt 查看文件

@@ -20,7 +20,6 @@ tqdm>=4.41.0
# pycocotools>=2.0

# export --------------------------------------
# packaging # for coremltools
# coremltools==4.0
# onnx>=1.7.0
# scikit-learn==0.19.2 # for coreml quantization

+ 7
- 4
utils/datasets.py 查看文件

@@ -1,13 +1,15 @@
import glob
import math
import os
import random
import shutil
import time
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from threading import Thread

import cv2
import math
import numpy as np
import torch
from PIL import Image, ExifTags
@@ -474,10 +476,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing
self.imgs = [None] * n
if cache_images:
gb = 0 # Gigabytes of cached images
pbar = tqdm(range(len(self.img_files)), desc='Caching images')
self.img_hw0, self.img_hw = [None] * n, [None] * n
for i in pbar: # max 10k images
self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(self, i) # img, hw_original, hw_resized
results = ThreadPool(8).imap_unordered(lambda x: load_image(*x), zip(repeat(self), range(n))) # 8 threads
pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)


正在加载...
取消
保存