Browse Source

Parse URL authentication (#3424)

* Parse URL authentication

* urllib.parse.unquote()

* improved error handling

* improved error handling

* remove %3F

* update check_file()
modifyDataloader
Glenn Jocher GitHub 3 years ago
parent
commit
f8651c388f
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 8 deletions
  1. +3
    -1
      utils/general.py
  2. +10
    -7
      utils/google_utils.py

+ 3
- 1
utils/general.py View File

import re import re
import subprocess import subprocess
import time import time
import urllib
from itertools import repeat from itertools import repeat
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from pathlib import Path from pathlib import Path
if Path(file).is_file() or file == '': # exists if Path(file).is_file() or file == '': # exists
return file return file
elif file.startswith(('http://', 'https://')): # download elif file.startswith(('http://', 'https://')): # download
url, file = file, Path(file).name
url, file = file, Path(urllib.parse.unquote(str(file))).name # url, file (decode '%2F' to '/' etc.)
file = file.split('?')[0] # parse authentication https://url.com/file.txt?auth...
print(f'Downloading {url} to {file}...') print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file) torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check

+ 10
- 7
utils/google_utils.py View File

import platform import platform
import subprocess import subprocess
import time import time
import urllib
from pathlib import Path from pathlib import Path


import requests import requests
def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''): def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
file = Path(file) file = Path(file)
try: # GitHub
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
print(f'Downloading {url} to {file}...') print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, str(file)) torch.hub.download_url_to_file(url, str(file))
assert file.exists() and file.stat().st_size > min_bytes # check
except Exception as e: # GCP
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
file.unlink(missing_ok=True) # remove partial downloads file.unlink(missing_ok=True) # remove partial downloads
print(f'Download error: {e}\nRe-attempting {url2 or url} to {file}...')
print(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail os.system(f"curl -L '{url2 or url}' -o '{file}' --retry 3 -C -") # curl download, retry and resume on fail
finally: finally:
if not file.exists() or file.stat().st_size < min_bytes: # check if not file.exists() or file.stat().st_size < min_bytes: # check
file.unlink(missing_ok=True) # remove partial downloads file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: Download failure: {error_msg or url}')
print(f"ERROR: {assert_msg}\n{error_msg}")
print('') print('')




def attempt_download(file, repo='ultralytics/yolov5'):
def attempt_download(file, repo='ultralytics/yolov5'): # from utils.google_utils import *; attempt_download()
# Attempt file download if does not exist # Attempt file download if does not exist
file = Path(str(file).strip().replace("'", '')) file = Path(str(file).strip().replace("'", ''))


if not file.exists(): if not file.exists():
# URL specified # URL specified
name = file.name
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(('http:/', 'https:/')): # download if str(file).startswith(('http:/', 'https:/')): # download
url = str(file).replace(':/', '://') # Pathlib turns :// -> :/ url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
name = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
safe_download(file=name, url=url, min_bytes=1E5) safe_download(file=name, url=url, min_bytes=1E5)
return name return name



Loading…
Cancel
Save