Objects365 update
This commit is contained in:
parent
801b469878
commit
54652fe3ff
|
|
@ -7,22 +7,34 @@
|
||||||
# /images
|
# /images
|
||||||
# /labels
|
# /labels
|
||||||
|
|
||||||
|
|
||||||
from pycocotools.coco import COCO
|
from pycocotools.coco import COCO
|
||||||
|
|
||||||
coco = COCO("zhiyuan_objv2_train.json")
|
from utils.general import download, Path
|
||||||
cats = coco.loadCats(coco.getCatIds())
|
|
||||||
nms = [cat["name"] for cat in cats]
|
# Make Directories
|
||||||
print("COCO categories: \n{}\n".format(" ".join(nms)))
|
dir = Path('../datasets/objects365') # dataset directory
|
||||||
for categoryId, cat in enumerate(nms):
|
for p in 'images', 'labels':
|
||||||
|
(dir / p).mkdir(parents=True, exist_ok=True)
|
||||||
|
for q in 'train', 'val':
|
||||||
|
(dir / p / q).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Download
|
||||||
|
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
|
||||||
|
download(url + 'zhiyuan_objv2_train.tar.gz', dir=dir, threads=8) # annotations json
|
||||||
|
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', threads=8)
|
||||||
|
|
||||||
|
# Labels
|
||||||
|
coco = COCO(dir / 'zhiyuan_objv2_train.json')
|
||||||
|
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
||||||
|
for categoryId, cat in enumerate(names):
|
||||||
catIds = coco.getCatIds(catNms=[cat])
|
catIds = coco.getCatIds(catNms=[cat])
|
||||||
imgIds = coco.getImgIds(catIds=catIds)
|
imgIds = coco.getImgIds(catIds=catIds)
|
||||||
print(cat)
|
|
||||||
# Create a subfolder in this directory called "labels". This is where the annotations will be saved in YOLO format
|
|
||||||
for im in coco.loadImgs(imgIds):
|
for im in coco.loadImgs(imgIds):
|
||||||
width, height = im["width"], im["height"]
|
width, height = im["width"], im["height"]
|
||||||
path = im["file_name"].split("/")[-1] # image filename
|
path = Path(im["file_name"]) # image filename
|
||||||
try:
|
try:
|
||||||
with open("labels/train/" + path.replace(".jpg", ".txt"), "a+") as file:
|
with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file:
|
||||||
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
||||||
for a in coco.loadAnns(annIds):
|
for a in coco.loadAnns(annIds):
|
||||||
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
||||||
|
|
|
||||||
|
|
@ -183,7 +183,7 @@ def check_dataset(dict):
|
||||||
raise Exception('Dataset not found.')
|
raise Exception('Dataset not found.')
|
||||||
|
|
||||||
|
|
||||||
def download(url, dir='.', multi_thread=False):
|
def download(url, dir='.', threads=1):
|
||||||
# Multi-threaded file download and unzip function
|
# Multi-threaded file download and unzip function
|
||||||
def download_one(url, dir):
|
def download_one(url, dir):
|
||||||
# Download 1 file
|
# Download 1 file
|
||||||
|
|
@ -200,8 +200,8 @@ def download(url, dir='.', multi_thread=False):
|
||||||
|
|
||||||
dir = Path(dir)
|
dir = Path(dir)
|
||||||
dir.mkdir(parents=True, exist_ok=True) # make directory
|
dir.mkdir(parents=True, exist_ok=True) # make directory
|
||||||
if multi_thread:
|
if threads > 1:
|
||||||
ThreadPool(8).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # 8 threads
|
ThreadPool(threads).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
|
||||||
else:
|
else:
|
||||||
for u in tuple(url) if isinstance(url, str) else url:
|
for u in tuple(url) if isinstance(url, str) else url:
|
||||||
download_one(u, dir)
|
download_one(u, dir)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue