download.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. import collections
  2. from dataclasses import dataclass, field
  3. import glob
  4. import hashlib
  5. import json
  6. from pathlib import Path
  7. import requests
  8. import shutil
  9. import tempfile
  10. from typing import List, Optional
  11. from .config import Arch, LinuxTag
  12. from ..utils.deps import CACHE_DIR
  13. from ..utils.log import debug, log
  14. CHUNK_SIZE = 8189
  15. SUCCESS = 200
  16. class DownloadError(Exception):
  17. pass
  18. class TarError(Exception):
  19. pass
  20. @dataclass(frozen=True)
  21. class Downloader:
  22. '''Manylinux tag.'''
  23. tag: LinuxTag
  24. '''Platform architecture.'''
  25. arch: Optional[Arch] = None
  26. '''Docker image.'''
  27. image: str = field(init=False)
  28. '''Authentication token.'''
  29. token: str = field(init=False)
  30. def __post_init__(self):
  31. # Set host arch if not explictly specified.
  32. if self.arch is None:
  33. arch = Arch.from_host()
  34. object.__setattr__(self, 'arch', arch)
  35. # Set image name.
  36. image = f'{self.tag}_{self.arch}'
  37. object.__setattr__(self, 'image', image)
  38. def default_destination(self):
  39. return Path(CACHE_DIR) / f'share/images/{self.image}'
  40. def download(
  41. self,
  42. destination: Optional[Path]=None,
  43. tag: Optional[str] = 'latest'):
  44. destination = destination or self.default_destination()
  45. # Authenticate to quay.io.
  46. repository = f'pypa/{self.image}'
  47. url = 'https://quay.io/v2/auth'
  48. url = f'{url}?service=quay.io&scope=repository:{repository}:pull'
  49. debug('GET', url)
  50. r = requests.request('GET', url)
  51. if r.status_code == SUCCESS:
  52. object.__setattr__(self, 'token', r.json()['token'])
  53. else:
  54. raise DownloadError(r.status_code, r.text, r.headers)
  55. # Fetch image manifest.
  56. repository = f'pypa/{self.image}'
  57. url = f'https://quay.io/v2/{repository}/manifests/{tag}'
  58. headers = {
  59. 'Authorization': f'Bearer {self.token}',
  60. 'Accept': 'application/vnd.docker.distribution.manifest.v2+json'
  61. }
  62. debug('GET', url)
  63. r = requests.request('GET', url, headers=headers)
  64. if r.status_code == SUCCESS:
  65. image_digest = r.headers['Docker-Content-Digest'].split(':', 1)[-1]
  66. manifest = r.json()
  67. else:
  68. raise DownloadError(r.status_code, r.text, r.headers)
  69. # Check missing layers to download.
  70. required = [layer['digest'].split(':', 1)[-1] for layer in
  71. manifest['layers']]
  72. missing = []
  73. for hash_ in required:
  74. path = destination / f'layers/{hash_}.tar.gz'
  75. if path.exists():
  76. hasher = hashlib.sha256()
  77. with path.open('rb') as f:
  78. while True:
  79. chunk = f.read(CHUNK_SIZE)
  80. if not chunk:
  81. break
  82. else:
  83. hasher.update(chunk)
  84. h = hasher.hexdigest()
  85. if h != hash_:
  86. missing.append(hash_)
  87. else:
  88. debug('FOUND', f'{hash_}.tar.gz')
  89. else:
  90. missing.append(hash_)
  91. # Fetch missing layers.
  92. with tempfile.TemporaryDirectory() as tmpdir:
  93. workdir = Path(tmpdir)
  94. for i, hash_ in enumerate(missing):
  95. log('DOWNLOAD', f'{self.image} ({tag}) '
  96. f'[{i + 1} / {len(missing)}]')
  97. filename = f'{hash_}.tar.gz'
  98. url = f'https://quay.io/v2/{repository}/blobs/sha256:{hash_}'
  99. debug('GET', url)
  100. r = requests.request('GET', url, headers=headers, stream=True)
  101. if r.status_code == SUCCESS:
  102. debug('STREAM', filename)
  103. else:
  104. raise DownloadError(r.status_code, r.text, r.headers)
  105. hasher = hashlib.sha256()
  106. tmp = workdir / 'layer.tgz'
  107. with open(tmp, "wb") as f:
  108. for chunk in r.iter_content(CHUNK_SIZE):
  109. if chunk:
  110. f.write(chunk)
  111. hasher.update(chunk)
  112. h = hasher.hexdigest()
  113. if h != hash_:
  114. raise DownloadError(
  115. f'bad hash (expected {name}, found {h})'
  116. )
  117. layers_dir = destination / 'layers'
  118. layers_dir.mkdir(exist_ok=True, parents=True)
  119. shutil.move(tmp, layers_dir / filename)
  120. tags_dir = destination / 'tags'
  121. tags_dir.mkdir(exist_ok=True, parents=True)
  122. with open(tags_dir / f'{tag}.json', "w") as f:
  123. json.dump({'digest': image_digest, 'layers': required}, f)
  124. # Remove unused layers.
  125. required = set(required)
  126. for tag in glob.glob(str(destination / 'tags/*.json')):
  127. with open(tag) as f:
  128. tag = json.load(f)
  129. required |= set(tag["layers"])
  130. required = [f'{hash_}.tar.gz' for hash_ in required]
  131. for layer in glob.glob(str(destination / 'layers/*.tar.gz')):
  132. layer = Path(layer)
  133. if layer.name not in required:
  134. debug('REMOVE', f'{self.image} [layer/{layer.stem}]')
  135. layer.unlink()