| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- from dataclasses import dataclass, field
- import glob
- import hashlib
- import json
- from pathlib import Path
- import requests
- import shutil
- import tempfile
- from typing import Optional
- from .config import Arch, LinuxTag
- from ..utils.deps import CACHE_DIR
- from ..utils.log import debug, log
- CHUNK_SIZE = 8189
- SUCCESS = 200
- class DownloadError(Exception):
- pass
- @dataclass(frozen=True)
- class Downloader:
- '''Manylinux tag.'''
- tag: LinuxTag
- '''Platform architecture.'''
- arch: Optional[Arch] = None
- '''Docker image.'''
- image: str = field(init=False)
- '''Authentication token.'''
- token: str = field(init=False)
- def __post_init__(self):
- # Set host arch if not explictly specified.
- if self.arch is None:
- arch = Arch.from_host()
- object.__setattr__(self, 'arch', arch)
- # Set image name.
- image = f'{self.tag}_{self.arch}'
- object.__setattr__(self, 'image', image)
- def default_destination(self):
- return Path(CACHE_DIR) / f'share/images/{self.image}'
- def download(
- self,
- destination: Optional[Path]=None,
- tag: Optional[str] = 'latest'
- ):
- '''Download Manylinux image'''
- destination = destination or self.default_destination()
- # Authenticate to quay.io.
- repository = f'pypa/{self.image}'
- log('PULL', f'{self.image}:{tag}')
- url = 'https://quay.io/v2/auth'
- url = f'{url}?service=quay.io&scope=repository:{repository}:pull'
- debug('GET', url)
- r = requests.request('GET', url)
- if r.status_code == SUCCESS:
- object.__setattr__(self, 'token', r.json()['token'])
- else:
- raise DownloadError(r.status_code, r.text, r.headers)
- # Fetch image manifest.
- repository = f'pypa/{self.image}'
- url = f'https://quay.io/v2/{repository}/manifests/{tag}'
- headers = {
- 'Authorization': f'Bearer {self.token}',
- 'Accept': 'application/vnd.docker.distribution.manifest.v2+json'
- }
- debug('GET', url)
- r = requests.request('GET', url, headers=headers)
- if r.status_code == SUCCESS:
- image_digest = r.headers['Docker-Content-Digest'].split(':', 1)[-1]
- manifest = r.json()
- else:
- raise DownloadError(r.status_code, r.text, r.headers)
- # Check missing layers to download.
- required = [layer['digest'].split(':', 1)[-1] for layer in
- manifest['layers']]
- missing = []
- for hash_ in required:
- path = destination / f'layers/{hash_}.tar.gz'
- if path.exists():
- hasher = hashlib.sha256()
- with path.open('rb') as f:
- while True:
- chunk = f.read(CHUNK_SIZE)
- if not chunk:
- break
- else:
- hasher.update(chunk)
- h = hasher.hexdigest()
- if h != hash_:
- missing.append(hash_)
- else:
- debug('FOUND', f'{hash_}.tar.gz')
- else:
- missing.append(hash_)
- # Fetch missing layers.
- with tempfile.TemporaryDirectory() as tmpdir:
- workdir = Path(tmpdir)
- for i, hash_ in enumerate(missing):
- log('DOWNLOAD', f'{self.image} ({tag}) '
- f'[{i + 1} / {len(missing)}]')
- filename = f'{hash_}.tar.gz'
- url = f'https://quay.io/v2/{repository}/blobs/sha256:{hash_}'
- debug('GET', url)
- r = requests.request('GET', url, headers=headers, stream=True)
- if r.status_code == SUCCESS:
- debug('STREAM', filename)
- else:
- raise DownloadError(r.status_code, r.text, r.headers)
- hasher = hashlib.sha256()
- tmp = workdir / 'layer.tgz'
- with open(tmp, "wb") as f:
- for chunk in r.iter_content(CHUNK_SIZE):
- if chunk:
- f.write(chunk)
- hasher.update(chunk)
- h = hasher.hexdigest()
- if h != hash_:
- raise DownloadError(
- f'bad hash (expected {hash_}, found {h})'
- )
- layers_dir = destination / 'layers'
- layers_dir.mkdir(exist_ok=True, parents=True)
- shutil.move(tmp, layers_dir / filename)
- tags_dir = destination / 'tags'
- tags_dir.mkdir(exist_ok=True, parents=True)
- with open(tags_dir / f'{tag}.json', "w") as f:
- json.dump({'digest': image_digest, 'layers': required}, f)
- # Remove unused layers.
- required = set(required)
- for tag in glob.glob(str(destination / 'tags/*.json')):
- with open(tag) as f:
- tag = json.load(f)
- required |= set(tag["layers"])
- required = [f'{hash_}.tar.gz' for hash_ in required]
- for layer in glob.glob(str(destination / 'layers/*.tar.gz')):
- layer = Path(layer)
- if layer.name not in required:
- debug('REMOVE', f'{self.image} [layer/{layer.stem}]')
- layer.unlink()
|