downloads.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
  2. """
  3. Download utils
  4. """
  5. import logging
  6. import os
  7. import subprocess
  8. import urllib
  9. from pathlib import Path
  10. import requests
  11. import torch
  12. def is_url(url, check=True):
  13. # Check if string is URL and check if URL exists
  14. try:
  15. url = str(url)
  16. result = urllib.parse.urlparse(url)
  17. assert all([result.scheme, result.netloc]) # check if is url
  18. return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
  19. except (AssertionError, urllib.request.HTTPError):
  20. return False
  21. def gsutil_getsize(url=''):
  22. # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
  23. output = subprocess.check_output(['gsutil', 'du', url], shell=True, encoding='utf-8')
  24. if output:
  25. return int(output.split()[0])
  26. return 0
  27. def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
  28. # Return downloadable file size in bytes
  29. response = requests.head(url, allow_redirects=True)
  30. return int(response.headers.get('content-length', -1))
  31. def curl_download(url, filename, *, silent: bool = False) -> bool:
  32. """
  33. Download a file from a url to a filename using curl.
  34. """
  35. silent_option = 'sS' if silent else '' # silent
  36. proc = subprocess.run([
  37. 'curl',
  38. '-#',
  39. f'-{silent_option}L',
  40. url,
  41. '--output',
  42. filename,
  43. '--retry',
  44. '9',
  45. '-C',
  46. '-',])
  47. return proc.returncode == 0
  48. def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
  49. # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
  50. from utils.general import LOGGER
  51. file = Path(file)
  52. assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
  53. try: # url1
  54. LOGGER.info(f'Downloading {url} to {file}...')
  55. torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
  56. assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
  57. except Exception as e: # url2
  58. if file.exists():
  59. file.unlink() # remove partial downloads
  60. LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
  61. # curl download, retry and resume on fail
  62. curl_download(url2 or url, file)
  63. finally:
  64. if not file.exists() or file.stat().st_size < min_bytes: # check
  65. if file.exists():
  66. file.unlink() # remove partial downloads
  67. LOGGER.info(f'ERROR: {assert_msg}\n{error_msg}')
  68. LOGGER.info('')
  69. def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'):
  70. # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc.
  71. from utils.general import LOGGER
  72. def github_assets(repository, version='latest'):
  73. # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])
  74. if version != 'latest':
  75. version = f'tags/{version}' # i.e. tags/v7.0
  76. response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api
  77. return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets
  78. file = Path(str(file).strip().replace("'", ''))
  79. if not file.exists():
  80. # URL specified
  81. name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
  82. if str(file).startswith(('http:/', 'https:/')): # download
  83. url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
  84. file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
  85. if Path(file).is_file():
  86. LOGGER.info(f'Found {url} locally at {file}') # file already exists
  87. else:
  88. safe_download(file=file, url=url, min_bytes=1E5)
  89. return file
  90. # GitHub assets
  91. assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default
  92. try:
  93. tag, assets = github_assets(repo, release)
  94. except Exception:
  95. try:
  96. tag, assets = github_assets(repo) # latest release
  97. except Exception:
  98. try:
  99. tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
  100. except Exception:
  101. tag = release
  102. if name in assets:
  103. file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
  104. safe_download(file,
  105. url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
  106. min_bytes=1E5,
  107. error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}')
  108. return str(file)