#!/usr/bin/env python3
"""
Migration script from mosh-based repo layout to distrepos-based repo layout.
This script rearranges the repo directories given on the command line
to the layout used by the EL9 repo server, which, among a few other changes,
groups RPMs into subdirectories based on their starting letter.

To work with existing repo metadata, the script creates symlinks from the new
locations to the old locations.

A migration should be followed up with an rsync from the new repo server,
with the `--delay-updates --delete-delay` flags added.  For example,

    rsync -av --delay-updates --delete-delay \\
        repo-rsync-itb.osg-htc.org::osg/24-main/ /mirror/osg/24-main

This will update the metadata and delete the symlinks, making sure everything
has been downloaded before making any changes.
"""

import logging
import os
import re
import shutil
import sys
import typing as t
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from pathlib import Path

BINARY_ARCHES = ["aarch64", "x86_64"]
CONDOR_RPM_GLOBS = [
    "condor-*.rpm",
    "htcondor-ce-*.rpm",
    "htcondor-release-*.rpm",
    "minicondor-*.rpm",
    "pelican-*.rpm",
    "python3-condor-*.rpm",
]


_log = logging.getLogger(__name__)


def move_and_symlink(frompath: os.PathLike, topath: os.PathLike):
    """
    Move a file and create a symlink at its original location pointing
    to its new location.
    """
    os.rename(frompath, topath)
    os.symlink(os.path.relpath(topath, os.path.dirname(frompath)), frompath)


def hardlink_or_copy_file(frompath: os.PathLike, topath: os.PathLike):
    """
    Try to hardlink a file from one path to another; if that fails,
    make a copy instead.
    """
    try:
        os.link(frompath, topath)
    except OSError:
        shutil.copy2(frompath, topath)


def get_condor_package_subdirs(repo: Path):
    """
    Get the names of the Packages/condor-* subdirectories for the given
    repo based on if it's development, release, or testing.
    If we don't know, return all three possibilities.
    """
    if repo.name == "debug" or repo.name == "SRPMS":
        parent_name = repo.resolve().parent.parent.name
    else:
        parent_name = repo.resolve().parent.name
    if parent_name in ["testing", "release"]:
        return [
            "condor-release",
            "condor-update",
        ]
    elif parent_name == "development":
        return ["condor-daily"]
    else:
        return [
            "condor-release",
            "condor-update",
            "condor-daily",
        ]


def migrate_one_repo(repo: Path, packages_dir: Path, dry_run: bool = False) -> bool:
    """
    Migrate all of the RPMs in one repo to the new layout.  Skips a repo if
    there are any RPMs from OSG 3.6 or earlier, since the layouts for those
    repos didn't change.

    Args:
        repo: The repo directory to migrate.
        packages_dir: The Packages directory to move RPMs to.  Symlinks will be
            created in the original locations.
        dry_run: Set this to True to avoid making actual changes and only print
            what would be done.

    Returns:
        True if RPMs were migrated, False if the migration was skipped, for
        example due to pre-OSG-23 RPMs being found.
    """
    all_rpms = sorted(repo.glob("*.rpm"))
    for rpm in all_rpms:
        if re.search(r"[.]osg(3[123456]|devops)", rpm.name):
            _log.warning(f"Pre-OSG-23 RPM found: {rpm}.  Not migrating {repo}")
            return False

    condor_package_subdirs = get_condor_package_subdirs(repo)

    for rpm in all_rpms:
        if rpm.is_symlink():
            # This directory might have already been migrated.
            _log.debug(f"Skipping symlink {rpm}")
            continue

        # The new repo layout puts RPMs taken from the Condor repos into
        # subdirectories based on which Condor repo they were taken from.
        is_condor_rpm = any(rpm.match(gl) for gl in CONDOR_RPM_GLOBS)
        if is_condor_rpm:
            destdir = packages_dir / condor_package_subdirs[0]
        # Other RPMs are moved into directories based on the first letter of
        # the RPM (or '0' if the first character is a number).
        elif rpm.name[0] in "0123456789":
            destdir = packages_dir / "0"
        else:
            destdir = packages_dir / rpm.name[0].lower()

        destfile = destdir / rpm.name
        _log.info(f"Move {rpm} to {destfile}")
        if not dry_run:
            destdir.mkdir(exist_ok=True, parents=True)
            move_and_symlink(rpm, destfile)

        if is_condor_rpm:
            # The Condor RPMs in this repo might be from a combination of UW
            # repos, e.g., both condor-release and condor-update.  We don't
            # know _which_ condor repo they were taken from so to be safe,
            # put the RPM in all of them.  Use hardlinks if possible to save
            # disk space.
            for other_subdir in condor_package_subdirs[1:]:
                other_destdir = packages_dir / other_subdir
                other_destfile = other_destdir / rpm.name
                _log.info(f"Copy {rpm} to {other_destfile}")
                if not dry_run:
                    other_destdir.mkdir(exist_ok=True, parents=True)
                    hardlink_or_copy_file(rpm, other_destfile)

    return True


def migrate_source(args):
    """
    Migrate SRPMs.  This is two steps:
    1. Move the RPMs into Packages/<letter> subdirectories as usual
    2. Move the `source/SRPMS` dir to `src` and create a compat symlink.

    If step 1 does not migrate any RPMs (because it's a pre-OSG-23 repo) then
    the rest is skipped.

    If `source/SRPMS` is already a symlink, we assume it's been migrated
    and leave it alone.  Also if `src` exists, we assume it's been migrated
    and also do nothing.
    """
    for repo in repos(args.dirs):
        if repo.parts[-2:] != ("source", "SRPMS"):
            continue
        if repo.is_symlink():
            _log.info(f"{repo} is already a symlink; skipping")
            return
        dest = repo.resolve().parent.parent / "src"
        if dest.exists():
            _log.info(f"{dest} already exists; skipping")
            return

        _log.info(f"Migrating {repo}")
        if migrate_one_repo(repo, repo / "Packages", dry_run=args.dry_run):
            _log.info(f"Rename {repo} to {dest} and create symlink")
            if not args.dry_run:
                move_and_symlink(repo, dest)
        else:
            _log.info(f"Skipping rename of {repo} to {dest}")


def migrate_binary(args):
    """
    Migrate RPMs in arch-specific repos.
    """
    for repo in repos(args.dirs):
        if repo.name not in BINARY_ARCHES:
            continue
        _log.info(f"Migrating {repo}")
        migrate_one_repo(repo, repo / "Packages", dry_run=args.dry_run)


def migrate_debug(args):
    """
    Migrate the debuginfo and debugsource RPMs.
    In the new repo layout, the debug RPMs are mixed in with the non-debug RPMs,
    though the repo metadata remains in the "debug" subdirectory.  A "pkglist"
    file is used to list which files are in the debug repo vs the main repo,
    but the migrate script uses symlinks instead.
    """
    for repo in repos(args.dirs):
        if repo.name != "debug" and repo.parent.name not in BINARY_ARCHES:
            continue
        _log.info(f"Migrating {repo}")
        migrate_one_repo(repo, repo.parent / "Packages", dry_run=args.dry_run)


def repos(dirs: t.Sequence[os.PathLike]) -> t.Iterator[Path]:
    """
    Iterate over the repos in the directory trees of `dirs`.
    """
    for dir_ in dirs:
        repodatas = Path(dir_).glob("**/repodata")
        for repodata in repodatas:
            repo = repodata.parent
            yield repo


def get_args(argv):
    """
    Parse and validate arguments
    """
    all_actions = ["source", "binary", "debug"]
    parser = ArgumentParser(
        description=__doc__, formatter_class=RawDescriptionHelpFormatter
    )
    parser.add_argument("dirs", nargs="*", help="Directories to migrate")
    parser.add_argument(
        "--source",
        action="append_const",
        dest="actions",
        const="source",
        help="Migrate source RPMs",
    )
    parser.add_argument(
        "--binary",
        action="append_const",
        dest="actions",
        const="binary",
        help="Migrate binary RPMs",
    )
    parser.add_argument(
        "--debug",
        action="append_const",
        dest="actions",
        const="debug",
        help="Migrate debuginfo and debugsource RPMs",
    )
    parser.add_argument(
        "--all",
        action="store_const",
        dest="actions",
        const=all_actions,
        help="Run all migrations (default)",
    )
    parser.add_argument(
        "-n",
        "--dry-run",
        action="store_true",
        help="Only show what would be done, do not migrate",
    )
    parser.set_defaults(actions=[], dirs=[])

    args = parser.parse_args(argv[1:])
    if not args.actions:
        args.actions = all_actions
    return args


def main(argv=None):
    """
    Main function. Get arguments and run the desired actions.
    """
    args = get_args(argv or sys.argv)
    if "source" in args.actions:
        migrate_source(args)
    if "binary" in args.actions:
        migrate_binary(args)
    if "debug" in args.actions:
        migrate_debug(args)

    return 0


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG, format="%(message)s")
    sys.exit(main())