Dataset download bash script updates (#1132)

This commit is contained in:
Glenn Jocher 2020-10-13 15:58:03 +02:00 committed by GitHub
parent 00917a6225
commit 4346b13a40
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 74 deletions

View File

@ -8,14 +8,17 @@
# /yolov5 # /yolov5
# Download/unzip labels # Download/unzip labels
echo 'Downloading COCO 2017 labels ...'
d='../' # unzip directory d='../' # unzip directory
f='coco2017labels.zip' && curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
unzip -q $f -d $d && rm $f f='coco2017labels.zip' # 68 MB
echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
# Download/unzip images # Download/unzip images
echo 'Downloading COCO 2017 images ...'
d='../coco/images' # unzip directory d='../coco/images' # unzip directory
f='train2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 19G, 118k images url=http://images.cocodataset.org/zips/
f='val2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 1G, 5k images f1='train2017.zip' # 19G, 118k images
# f='test2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 7G, 41k images f2='val2017.zip' # 1G, 5k images
f3='test2017.zip' # 7G, 41k images (optional)
for f in $f1 $f2; do
echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
done

View File

@ -8,79 +8,23 @@
# /yolov5 # /yolov5
start=$(date +%s) start=$(date +%s)
# handle optional download dir
if [ -z "$1" ]; then
# navigate to ~/tmp
echo "navigating to ../tmp/ ..."
mkdir -p ../tmp mkdir -p ../tmp
cd ../tmp/ cd ../tmp/
else
# check if is valid directory
if [ ! -d $1 ]; then
echo $1 "is not a valid directory"
exit 0
fi
echo "navigating to" $1 "..."
cd $1
fi
echo "Downloading VOC2007 trainval ..." # Download/unzip images and labels
# Download data d='.' # unzip directory
curl -LO http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
echo "Downloading VOC2007 test data ..." f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
curl -LO http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
echo "Done downloading." f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
for f in $f1 $f2 $f3; do
# Extract data echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
echo "Extracting trainval ..." done
tar -xf VOCtrainval_06-Nov-2007.tar
echo "Extracting test ..."
tar -xf VOCtest_06-Nov-2007.tar
echo "removing tars ..."
rm VOCtrainval_06-Nov-2007.tar
rm VOCtest_06-Nov-2007.tar
end=$(date +%s) end=$(date +%s)
runtime=$((end - start)) runtime=$((end - start))
echo "Completed in" $runtime "seconds" echo "Completed in" $runtime "seconds"
start=$(date +%s)
# handle optional download dir
if [ -z "$1" ]; then
# navigate to ~/tmp
echo "navigating to ../tmp/ ..."
mkdir -p ../tmp
cd ../tmp/
else
# check if is valid directory
if [ ! -d $1 ]; then
echo $1 "is not a valid directory"
exit 0
fi
echo "navigating to" $1 "..."
cd $1
fi
echo "Downloading VOC2012 trainval ..."
# Download data
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
echo "Done downloading."
# Extract data
echo "Extracting trainval ..."
tar -xf VOCtrainval_11-May-2012.tar
echo "removing tar ..."
rm VOCtrainval_11-May-2012.tar
end=$(date +%s)
runtime=$((end - start))
echo "Completed in" $runtime "seconds"
cd ../tmp
echo "Spliting dataset..." echo "Spliting dataset..."
python3 - "$@" <<END python3 - "$@" <<END
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET