# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # Support for running tasks that download remote content and re-export # it as task artifacts. import os import re from dataclasses import dataclass from textwrap import dedent from typing import Callable from voluptuous import Extra, Optional, Required import taskgraph from ..util import path from ..util.cached_tasks import add_optimization from ..util.schema import Schema, validate_schema from ..util.treeherder import join_symbol from .base import TransformSequence CACHE_TYPE = "content.v1" #: Schema for fetch transforms FETCH_SCHEMA = Schema( { Required( "name", description=dedent( """ Name of the task. """.lstrip() ), ): str, Optional( "task-from", description=dedent( """ Relative path (from config.path) to the file the task was defined in. """.lstrip() ), ): str, Required( "description", description=dedent( """ Description of the task. """.lstrip() ), ): str, Optional("expires-after"): str, Optional("docker-image"): object, Optional( "fetch-alias", description=dedent( """ An alias that can be used instead of the real fetch task name in fetch stanzas for tasks. """.lstrip() ), ): str, Optional( "artifact-prefix", description=dedent( """ The prefix of the taskcluster artifact being uploaded. Defaults to `public/`; if it starts with something other than `public/` the artifact will require scopes to access. """.lstrip() ), ): str, Optional("attributes"): {str: object}, Required("fetch"): { Required("type"): str, Extra: object, }, } ) # define a collection of payload builders, depending on the worker implementation fetch_builders = {} @dataclass(frozen=True) class FetchBuilder: schema: Schema builder: Callable def fetch_builder(name, schema): schema = Schema({Required("type"): name}).extend(schema) def wrap(func): fetch_builders[name] = FetchBuilder(schema, func) # type: ignore return func return wrap transforms = TransformSequence() transforms.add_validate(FETCH_SCHEMA) @transforms.add def process_fetch_task(config, tasks): # Converts fetch-url entries to the run schema. for task in tasks: typ = task["fetch"]["type"] name = task["name"] fetch = task.pop("fetch") if typ not in fetch_builders: raise Exception(f"Unknown fetch type {typ} in fetch {name}") validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") task.update(configure_fetch(config, typ, name, fetch)) yield task def configure_fetch(config, typ, name, fetch): if typ not in fetch_builders: raise Exception(f"No fetch type {typ} in fetch {name}") validate_schema(fetch_builders[typ].schema, fetch, f"In task.fetch {name!r}:") return fetch_builders[typ].builder(config, name, fetch) @transforms.add def make_task(config, tasks): # Fetch tasks are idempotent and immutable. Have them live for # essentially forever. if config.params["level"] == "3": expires = "1000 years" else: expires = config.graph_config._config.get("task-expires-after", "28 days") for task in tasks: name = task["name"] artifact_prefix = task.get("artifact-prefix", "public") env = task.get("env", {}) env.update({"UPLOAD_DIR": "/builds/worker/artifacts"}) attributes = task.get("attributes", {}) attributes["fetch-artifact"] = path.join(artifact_prefix, task["artifact_name"]) alias = task.get("fetch-alias") if alias: attributes["fetch-alias"] = alias task_desc = { "attributes": attributes, "name": name, "description": task["description"], "expires-after": task.get("expires-after", expires), "label": f"fetch-{name}", "run-on-projects": [], "run": { "using": "run-task", "checkout": False, "command": task["command"], }, "worker-type": "images", "worker": { "chain-of-trust": True, "docker-image": task.get("docker-image", {"in-tree": "fetch"}), "env": env, "max-run-time": 900, "artifacts": [ { "type": "directory", "name": artifact_prefix, "path": "/builds/worker/artifacts", } ], }, } if "treeherder" in config.graph_config: task_desc["treeherder"] = { "symbol": join_symbol("Fetch", name), "kind": "build", "platform": "fetch/opt", "tier": 1, } if task.get("secret", None): task_desc["scopes"] = ["secrets:get:" + task.get("secret")] task_desc["worker"]["taskcluster-proxy"] = True if not taskgraph.fast: cache_name = task_desc["label"].replace(f"{config.kind}-", "", 1) # This adds the level to the index path automatically. add_optimization( config, task_desc, cache_type=CACHE_TYPE, cache_name=cache_name, digest_data=task["digest_data"], ) yield task_desc @fetch_builder( "static-url", schema={ # The URL to download. Required("url"): str, # The SHA-256 of the downloaded content. Required("sha256"): str, # Size of the downloaded entity, in bytes. Required("size"): int, # GPG signature verification. Optional("gpg-signature"): { # URL where GPG signature document can be obtained. Can contain the # value ``{url}``, which will be substituted with the value from # ``url``. Required("sig-url"): str, # Path to file containing GPG public key(s) used to validate # download. Required("key-path"): str, }, # The name to give to the generated artifact. Defaults to the file # portion of the URL. Using a different extension converts the # archive to the given type. Only conversion to .tar.zst is # supported. Optional("artifact-name"): str, # Strip the given number of path components at the beginning of # each file entry in the archive. # Requires an artifact-name ending with .tar.zst. Optional("strip-components"): int, # Add the given prefix to each file entry in the archive. # Requires an artifact-name ending with .tar.zst. Optional("add-prefix"): str, # Headers to pass alongside the request. Optional("headers"): { str: str, }, # IMPORTANT: when adding anything that changes the behavior of the task, # it is important to update the digest data used to compute cache hits. }, ) def create_fetch_url_task(config, name, fetch): artifact_name = fetch.get("artifact-name") if not artifact_name: artifact_name = fetch["url"].split("/")[-1] command = [ "fetch-content", "static-url", ] # Arguments that matter to the cache digest args = [ "--sha256", fetch["sha256"], "--size", f"{fetch['size']}", ] if fetch.get("strip-components"): args.extend(["--strip-components", f"{fetch['strip-components']}"]) if fetch.get("add-prefix"): args.extend(["--add-prefix", fetch["add-prefix"]]) command.extend(args) env = {} if "gpg-signature" in fetch: sig_url = fetch["gpg-signature"]["sig-url"].format(url=fetch["url"]) key_path = os.path.join(taskgraph.GECKO, fetch["gpg-signature"]["key-path"]) # type: ignore with open(key_path) as fh: gpg_key = fh.read() env["FETCH_GPG_KEY"] = gpg_key command.extend( [ "--gpg-sig-url", sig_url, "--gpg-key-env", "FETCH_GPG_KEY", ] ) if "headers" in fetch: for k, v in fetch["headers"].items(): command.extend(["-H", f"{k}:{v}"]) command.extend([fetch["url"], f"/builds/worker/artifacts/{artifact_name}"]) return { "command": command, "artifact_name": artifact_name, "env": env, # We don't include the GPG signature in the digest because it isn't # materially important for caching: GPG signatures are supplemental # trust checking beyond what the shasum already provides. "digest_data": args + [artifact_name], } @fetch_builder( "git", schema={ Required("repo"): str, Required("revision"): str, Optional("include-dot-git"): bool, Optional("artifact-name"): str, Optional("path-prefix"): str, # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key) # In the secret dictionary, the key should be specified as # "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..." # n.b. The OpenSSH private key file format requires a newline at the end of the file. Optional("ssh-key"): str, }, ) def create_git_fetch_task(config, name, fetch): path_prefix = fetch.get("path-prefix") if not path_prefix: path_prefix = fetch["repo"].rstrip("/").rsplit("/", 1)[-1] artifact_name = fetch.get("artifact-name") if not artifact_name: artifact_name = f"{path_prefix}.tar.zst" if not re.match(r"[0-9a-fA-F]{40}", fetch["revision"]): raise Exception(f'Revision is not a sha1 in fetch task "{name}"') args = [ "fetch-content", "git-checkout-archive", "--path-prefix", path_prefix, fetch["repo"], fetch["revision"], f"/builds/worker/artifacts/{artifact_name}", ] ssh_key = fetch.get("ssh-key") if ssh_key: args.append("--ssh-key-secret") args.append(ssh_key) digest_data = [fetch["revision"], path_prefix, artifact_name] if fetch.get("include-dot-git", False): args.append("--include-dot-git") digest_data.append(".git") return { "command": args, "artifact_name": artifact_name, "digest_data": digest_data, "secret": ssh_key, }