浏览代码

Dataset download bash script updates (#1132)

5.0
Glenn Jocher GitHub 4 年前
父节点
当前提交
4346b13a40
找不到此签名对应的密钥 GPG 密钥 ID: 4AEE18F83AFDEB23
共有 2 个文件被更改,包括 22 次插入75 次删除
  1. +10
    -7
      data/scripts/get_coco.sh
  2. +12
    -68
      data/scripts/get_voc.sh

+ 10
- 7
data/scripts/get_coco.sh 查看文件

@@ -8,14 +8,17 @@
# /yolov5

# Download/unzip labels
echo 'Downloading COCO 2017 labels ...'
d='../' # unzip directory
f='coco2017labels.zip' && curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f
unzip -q $f -d $d && rm $f
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f='coco2017labels.zip' # 68 MB
echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove

# Download/unzip images
echo 'Downloading COCO 2017 images ...'
d='../coco/images' # unzip directory
f='train2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 19G, 118k images
f='val2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 1G, 5k images
# f='test2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 7G, 41k images
url=http://images.cocodataset.org/zips/
f1='train2017.zip' # 19G, 118k images
f2='val2017.zip' # 1G, 5k images
f3='test2017.zip' # 7G, 41k images (optional)
for f in $f1 $f2; do
echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
done

+ 12
- 68
data/scripts/get_voc.sh 查看文件

@@ -8,79 +8,23 @@
# /yolov5

start=$(date +%s)

# handle optional download dir
if [ -z "$1" ]; then
# navigate to ~/tmp
echo "navigating to ../tmp/ ..."
mkdir -p ../tmp
cd ../tmp/
else
# check if is valid directory
if [ ! -d $1 ]; then
echo $1 "is not a valid directory"
exit 0
fi
echo "navigating to" $1 "..."
cd $1
fi

echo "Downloading VOC2007 trainval ..."
# Download data
curl -LO http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
echo "Downloading VOC2007 test data ..."
curl -LO http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
echo "Done downloading."

# Extract data
echo "Extracting trainval ..."
tar -xf VOCtrainval_06-Nov-2007.tar
echo "Extracting test ..."
tar -xf VOCtest_06-Nov-2007.tar
echo "removing tars ..."
rm VOCtrainval_06-Nov-2007.tar
rm VOCtest_06-Nov-2007.tar

end=$(date +%s)
runtime=$((end - start))

echo "Completed in" $runtime "seconds"

start=$(date +%s)

# handle optional download dir
if [ -z "$1" ]; then
# navigate to ~/tmp
echo "navigating to ../tmp/ ..."
mkdir -p ../tmp
cd ../tmp/
else
# check if is valid directory
if [ ! -d $1 ]; then
echo $1 "is not a valid directory"
exit 0
fi
echo "navigating to" $1 "..."
cd $1
fi

echo "Downloading VOC2012 trainval ..."
# Download data
curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
echo "Done downloading."

# Extract data
echo "Extracting trainval ..."
tar -xf VOCtrainval_11-May-2012.tar
echo "removing tar ..."
rm VOCtrainval_11-May-2012.tar
mkdir -p ../tmp
cd ../tmp/

# Download/unzip images and labels
d='.' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
for f in $f1 $f2 $f3; do
echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
done

end=$(date +%s)
runtime=$((end - start))

echo "Completed in" $runtime "seconds"

cd ../tmp
echo "Spliting dataset..."
python3 - "$@" <<END
import xml.etree.ElementTree as ET

正在加载...
取消
保存