Update `check_file()` avoid repeat URL downloads (#5526)

This commit is contained in:
Glenn Jocher 2021-11-05 19:22:47 +01:00 committed by GitHub
parent 5f603a9dba
commit 32b8738735
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 3 deletions

View File

@ -338,6 +338,9 @@ def check_file(file, suffix=''):
elif file.startswith(('http:/', 'https:/')): # download elif file.startswith(('http:/', 'https:/')): # download
url = str(Path(file)).replace(':/', '://') # Pathlib turns :// -> :/ url = str(Path(file)).replace(':/', '://') # Pathlib turns :// -> :/
file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth
if Path(file).is_file():
print(f'Found {url} locally at {file}') # file already exists
else:
print(f'Downloading {url} to {file}...') print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file) torch.hub.download_url_to_file(url, file)
assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check