Updated cache v0.2 with `hashlib` (#3350)
* Update cache v0.2 to include parent hash Possible fix for https://github.com/ultralytics/yolov5/issues/3349 * Update datasets.py
This commit is contained in:
parent
1f8d716ec9
commit
c6b5bfca85
|
|
@ -1,6 +1,7 @@
|
||||||
# Dataset utils and dataloaders
|
# Dataset utils and dataloaders
|
||||||
|
|
||||||
import glob
|
import glob
|
||||||
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
|
|
@ -36,9 +37,12 @@ for orientation in ExifTags.TAGS.keys():
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def get_hash(files):
|
def get_hash(paths):
|
||||||
# Returns a single hash value of a list of files
|
# Returns a single hash value of a list of paths (files or dirs)
|
||||||
return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
|
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
||||||
|
h = hashlib.md5(str(size).encode()) # hash sizes
|
||||||
|
h.update(''.join(paths).encode()) # hash paths
|
||||||
|
return h.hexdigest() # return hash
|
||||||
|
|
||||||
|
|
||||||
def exif_size(img):
|
def exif_size(img):
|
||||||
|
|
@ -383,7 +387,7 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
||||||
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
|
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
|
||||||
if cache_path.is_file():
|
if cache_path.is_file():
|
||||||
cache, exists = torch.load(cache_path), True # load
|
cache, exists = torch.load(cache_path), True # load
|
||||||
if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed
|
if cache['hash'] != get_hash(self.label_files + self.img_files): # changed
|
||||||
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
|
cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
|
||||||
else:
|
else:
|
||||||
cache, exists = self.cache_labels(cache_path, prefix), False # cache
|
cache, exists = self.cache_labels(cache_path, prefix), False # cache
|
||||||
|
|
@ -501,9 +505,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
|
||||||
|
|
||||||
x['hash'] = get_hash(self.label_files + self.img_files)
|
x['hash'] = get_hash(self.label_files + self.img_files)
|
||||||
x['results'] = nf, nm, ne, nc, i + 1
|
x['results'] = nf, nm, ne, nc, i + 1
|
||||||
x['version'] = 0.1 # cache version
|
x['version'] = 0.2 # cache version
|
||||||
try:
|
try:
|
||||||
torch.save(x, path) # save for next time
|
torch.save(x, path) # save cache for next time
|
||||||
logging.info(f'{prefix}New cache created: {path}')
|
logging.info(f'{prefix}New cache created: {path}')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
|
logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue