Browse Source

Parse URL authentication (#3424)

* Parse URL authentication

* urllib.parse.unquote()

* improved error handling

* improved error handling

* remove %3F

* update check_file()
modifyDataloader
Glenn Jocher GitHub 3 years ago
parent
commit
f8651c388f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 8 deletions
  1. +3
    -1
      utils/general.py
  2. +10
    -7
      utils/google_utils.py

+ 3
- 1
utils/general.py View File

@@ -9,6 +9,7 @@ import random
import re
import subprocess
import time
import urllib
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
@@ -183,7 +184,8 @@ def check_file(file):
if Path(file).is_file() or file == '': # exists
return file
elif file.startswith(('http://', 'https://')): # download
url, file = file, Path(file).name
url, file = file, Path(urllib.parse.unquote(str(file))).name # url, file (decode '%2F' to '/' etc.)
file = file.split('?')[0] # parse authentication https://url.com/file.txt?auth...
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check

+ 10
- 7
utils/google_utils.py View File

@@ -4,6 +4,7 @@ import os
import platform
import subprocess
import time
import urllib
from pathlib import Path

import requests
@@ -19,30 +20,32 @@ def gsutil_getsize(url=''):
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file)
try: # GitHub
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes # check
except Exception as e: # GCP
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads
print(f'Download error: {e}\nRe-attempting {url2 or url} to {file}...')
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: Download failure: {error_msg or url}')
print(f"ERROR: {assert_msg}\n{error_msg}")
print('')


def attempt_download(file, repo='ultralytics/yolov5'):
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.google_utils import *; attempt_download()
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", ''))

if not file.exists():
# URL specified
name = file.name
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
safe_download(file=name, url=url, min_bytes=1E5)
return name


Loading…
Cancel
Save