#!/bin/sh
""":" .

exec python "$0" "$@"
"""

# MIT License
#
# Copyright (c) 2021 Takahiro Ueda
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from __future__ import print_function

__version__ = "1.0.0"

import argparse
import contextlib
import copy
import math
import os
import re
import subprocess
import sys

try:
    from typing import TYPE_CHECKING
except ImportError:
    TYPE_CHECKING = False

if TYPE_CHECKING:
    from typing import (
        Any,
        Dict,
        Iterator,
        List,
        Optional,
        Sequence,
        TextIO,
        Tuple,
        Union,
        overload,
    )

    from typing_extensions import Literal
else:

    def overload(f):  # noqa: D103
        return None


__doc__ = """\
Generate form.set suited for the local machine.

Example
-------
$ formset.py -o
$ tform `formset.py -f` calcdia.frm
$ minos `formset.py -m` minos.file

Python versions
---------------
2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
"""


if "check_output" not in dir(subprocess):
    # For old systems where Python 2.6 + argparse available.
    def check_output(*popenargs, **kwargs):  # type: ignore[no-untyped-def]
        """Run a command."""
        if "stdout" in kwargs:  # pragma: no cover
            raise ValueError("stdout argument not allowed, " "it will be overridden.")
        process = subprocess.Popen(  # type: ignore[call-overload]  # noqa: E501,S603
            stdout=subprocess.PIPE, *popenargs, **kwargs
        )
        output, _ = process.communicate()
        retcode = process.poll()
        if retcode:
            cmd = kwargs.get("args")
            if cmd is None:
                cmd = popenargs[0]
            # `output` keyword is not available in 2.6.
            raise subprocess.CalledProcessError(retcode, cmd)
        return output

    subprocess.check_output = check_output


@contextlib.contextmanager
def open_w_or_stdout(filename=None):
    # type: (Optional[str]) -> Iterator[TextIO]
    """Context manager for a file or stdout."""
    if filename:
        # See https://stackoverflow.com/a/2333979.
        tmpfilename = "{0}.tmp{1}".format(filename, os.getpid())
        f = open(tmpfilename, "w")
        try:
            yield f
        finally:
            f.flush()
            os.fsync(f.fileno())
            f.close()
            os.rename(tmpfilename, filename)
    else:
        yield sys.stdout


def round_down(x, n):
    # type: (int, int) -> int
    """Round down `x` to nearest `n`."""
    return x // n * n


def round_up(x, n):
    # type: (int, int) -> int
    """Round up `x` to nearest `n`."""
    return (x + (n - 1)) // n * n


def metric_prefix(s):
    # type: (str) -> int
    """Parse a metric prefix as a number."""
    s_old = s
    s = s.strip().lower()
    if s == "":
        return 1
    if s == "k":
        return 1000
    if s == "m":
        return 1000 ** 2
    if s == "g":
        return 1000 ** 3
    if s == "t":
        return 1000 ** 4
    raise ValueError("unknown metric prefix: {0}".format(s_old))


def parse_number(s):
    # type: (str) -> int
    """Parse a string as a number with a possible metric prefix."""
    scale = 1
    m = re.match(r"(.*)([kmgtKMGT])$", s)
    if m:
        s = m.group(1)
        scale = metric_prefix(m.group(2))
    # May raise ValueError for bad `s`.
    return int(float(s) * scale)


@overload
def round_human_readable(x, up, tostring):  # noqa: D103
    # type: (int, bool, Literal[True]) -> str
    pass


@overload  # noqa: F811
def round_human_readable(x, up, tostring):  # noqa: D103, F811
    # type: (int, bool, Literal[False]) -> int
    pass


def round_human_readable(x, up, tostring):  # noqa: F811
    # type: (int, bool, bool) -> Union[int, str]
    """Round off `x` within a human readable form."""
    round_off = round_up if up else round_down
    # Take 3 significant figures.
    n = 10 ** (int(math.floor(math.log10(x))) - 2)
    x = round_off(x, n)
    # Find a good suffix which doesn't change the value.
    xx = round_off(x, 1000 ** 4)
    if xx == x:
        return "{0}T".format(xx // 1000 ** 4) if tostring else xx
    xx = round_off(x, 1000 ** 3)
    if xx == x:
        return "{0}G".format(xx // 1000 ** 3) if tostring else xx
    xx = round_off(x, 1000 ** 2)
    if xx == x:
        return "{0}M".format(xx // 1000 ** 2) if tostring else xx
    xx = round_off(x, 1000)
    if xx == x:
        return "{0}K".format(xx // 1000) if tostring else xx
    return str(x) if tostring else x


class classproperty(property):  # noqa: N801
    """Decorator to make a property of a class."""

    def __get__(self, cls, owner=None):
        # type: (Any, Optional[type]) -> Any
        """Getter."""
        return classmethod(self.fget).__get__(None, owner)()  # type: ignore[arg-type]


class SystemInfo(object):
    """System information."""

    _cpu_info = None  # type: Optional[Dict[str, str]]
    _mem_info = None  # type: Optional[Dict[str, List[str]]]

    verbose = False

    @classproperty
    def number_of_nodes(cls):  # noqa: N805
        # type: () -> int
        """Return the number of nodes."""
        info = cls._get_cpu_info()
        if "NUMA node(s)" in info:
            return int(info["NUMA node(s)"])
        else:
            return 1

    @classproperty
    def number_of_cpus(cls):  # noqa: N805
        # type: () -> int
        """Return the number of cpus."""
        info = cls._get_cpu_info()
        return int(info["CPU(s)"])

    @classproperty
    def number_of_physical_cores(cls):  # noqa: N805
        # type: () -> int
        """Return the number of physical cores."""
        info = cls._get_cpu_info()
        return int(info["Socket(s)"]) * int(info["Core(s) per socket"])

    @classproperty
    def total_memory(cls):  # noqa: N805
        # type: () -> int
        """Return the total physical memory in bytes."""
        info = cls._get_mem_info()
        return int(info["Mem"][0])

    @classmethod
    def _get_cpu_info(cls):
        # type: () -> Dict[str, str]
        if cls._cpu_info is None:
            if cls.verbose:
                sys.stderr.write("running lscpu...\n")
            info = subprocess.check_output(  # noqa: S603,S607
                ["lscpu"], env={"LANG": "C"}
            ).decode("utf-8")
            info_list = info.strip().split("\n")
            info_list_list = [[ss.strip() for ss in s.split(":")] for s in info_list]
            info_items = [(s[0], s[1]) for s in info_list_list]
            cls._cpu_info = dict(info_items)
        return cls._cpu_info

    @classmethod
    def _get_mem_info(cls):
        # type: () -> Dict[str, List[str]]
        if cls._mem_info is None:
            if cls.verbose:
                sys.stderr.write("running free...\n")
            info = subprocess.check_output(  # noqa: S603,S607
                ["free", "-b"], env={"LANG": "C"}
            ).decode("utf-8")
            info_list = info.strip().split("\n")
            info_list_list = [[ss.strip() for ss in s.split(":")] for s in info_list]
            info_pairs = [s for s in info_list_list if len(s) == 2]
            info_items = [(s[0], s[1].split()) for s in info_pairs]
            cls._mem_info = dict(info_items)
        return cls._mem_info


class Setup(object):
    """Setup parameters."""

    def __init__(self, target):
        # type: (Tuple[int, int, int]) -> None
        """Construct a set of setup parameters."""
        self._target = target  # the target version (major, minor, patch).

        # v4.2.0
        # We take "WORDSIZE32" (64-bit) values.

        self.compresssize = 90000
        self.filepatches = 256
        self.hidesize = 0
        self.largepatches = 256
        self.largesize = 50000000
        self.maxtermsize = 40000  # 64-bit
        self.numstorecaches = 4
        self.scratchsize = 50000000
        self.sizestorecache = 32768
        self.smallextension = 20000000
        self.smallsize = 10000000
        self.sortiosize = 100000
        self.termsinsmall = 100000
        self.threadbucketsize = 500
        self.threads = -1  # form
        self.threadscratchoutsize = 2500000
        self.threadscratchsize = 100000
        self.workspace = 40000000  # 64-bit

        self.bracketindexsize = 200000
        self.constindex = 128
        self.continuationlines = 15
        self.functionlevels = 30
        self.maxnumbersize = 200
        self.maxwildcards = 100
        self.parentheses = 100
        self.processbucketsize = 1000
        self.subfilepatches = 64
        self.sublargepatches = 64
        self.sublargesize = 4000000
        self.subsmallextension = 800000
        self.subsmallsize = 500000
        self.subsortiosize = 32768
        self.subtermsinsmall = 10000

        # 64-bit
        self._ptrsize = 8
        self._possize = 8
        self._wordsize = 4

        if self._target >= (4, 2, 1):
            # v4.2.1
            # We take "WITHPTHREADS" (TFORM) values.
            self.largesize = 1500000000
            self.scratchsize = 500000000
            self.smallextension = 600000000
            self.smallsize = 300000000
            self.sortiosize = 200000
            self.termsinsmall = 3000000

    def items(self):
        # type: () -> Tuple[Tuple[str, int]]
        """Return pairs of parameters and values."""
        items = [(k, v) for (k, v) in self.__dict__.items() if k[0] != "_"]
        items.sort()
        return tuple(items)  # type: ignore[return-value]

    def __str__(self):
        # type: () -> str
        """Return the string representation."""
        mem = self.calc()
        params = ["{0}: {1}".format(k, v) for (k, v) in self.items()]
        return "<Setup: {0} bytes, {1}>".format(mem, ", ".join(params))

    def copy(self):
        # type: () -> Setup
        """Return a shallow copy."""
        return copy.copy(self)

    def calc(self):
        # type: () -> int
        """Return an estimation of memory usage."""
        self.maxtermsize = max(self.maxtermsize, 200)

        self.compresssize = max(
            self.compresssize, 2 * self.maxtermsize * self._wordsize
        )
        self.sortiosize = max(self.sortiosize, self.maxtermsize * self._wordsize)

        # The strange factor WordSize**2 is used in the FORM source...
        self.scratchsize = max(
            self.scratchsize, 4 * self.maxtermsize * self._wordsize ** 2
        )
        if self.hidesize > 0:
            self.hidesize = max(
                self.hidesize, 4 * self.maxtermsize * self._wordsize ** 2
            )

        self.threadscratchsize = max(
            self.threadscratchsize, 4 * self.maxtermsize * self._wordsize ** 2
        )
        self.threadscratchoutsize = max(
            self.threadscratchoutsize, 4 * self.maxtermsize * self._wordsize ** 2
        )

        # constraints in RecalcSetups()

        self.filepatches = max(self.filepatches, self.threads)

        self.termsinsmall = round_up(self.termsinsmall, 16)

        numberofblocksinsort = 10
        minimumnumberofterms = 10
        n = numberofblocksinsort * minimumnumberofterms
        if self.threads >= 0:
            minbufsize = self.threads * (1 + n) * self.maxtermsize * self._wordsize
            if self.largesize + self.smallextension < minbufsize:
                self.largesize = minbufsize - self.smallextension

        # constraints in AllocSort()

        self.filepatches = max(self.filepatches, 4)

        self.smallsize = max(self.smallsize, 16 * self.maxtermsize * self._wordsize)

        self.smallextension = max(self.smallextension, self.smallsize * 3 // 2)

        if self.largesize > 0:
            self.largesize = max(self.largesize, 2 * self.smallsize)

        compinc = 2
        minbufsize = self.filepatches * (
            self.sortiosize + (compinc + 2 * self.maxtermsize) * self._wordsize
        )
        if self.largesize + self.smallextension < minbufsize:
            if self.largesize == 0:
                self.smallextension = minbufsize
            else:
                self.largesize = minbufsize - self.smallextension

        iotry = (
            (
                (self.largesize + self.smallextension)
                // self.filepatches
                // self._wordsize
            )
            - 2 * self.maxtermsize
            - compinc
        )  # in words
        self.sortiosize = max(self.sortiosize, iotry)  # bytes vs. words??

        # Compute the memory usage.

        mem = 0
        mem += self.scratchsize * 2 + (
            self.hidesize if self.hidesize > 0 else self.scratchsize
        )
        mem += self.workspace * self._wordsize
        mem += (self.compresssize + 10) * self._wordsize
        mem += (
            self.largesize
            + self.smallextension
            + 3 * self.termsinsmall * self._ptrsize
            + self.sortiosize
        )

        storecachesize = self._possize * 2 * self._ptrsize + self._wordsize
        # ignore the padding
        storecachesize += self.sizestorecache
        mem += storecachesize * self.numstorecaches

        if self.threads >= 1:
            mem += (
                self.threadscratchoutsize + self.threadscratchsize * 2
            ) * self.threads
            mem += self.workspace * self._wordsize * self.threads
            mem += (self.compresssize + 10) * self._wordsize * self.threads

            mem += (
                self._thread_alloc_sort(
                    self.largesize // self.threads,
                    self.smallsize // self.threads,
                    self.smallextension // self.threads,
                    self.termsinsmall,
                    self.largepatches,
                    self.filepatches // self.threads,
                    self.sortiosize,
                )
                * self.threads
            )

            mem += storecachesize * self.numstorecaches * self.threads

            sizethreadbuckets = (
                (self.threadbucketsize + 1) * self.maxtermsize + 2
            ) * self._wordsize
            if self.threadbucketsize >= 250:
                sizethreadbuckets //= 4
            elif self.threadbucketsize >= 90:
                sizethreadbuckets //= 3
            elif self.threadbucketsize >= 40:
                sizethreadbuckets //= 2
            sizethreadbuckets //= self._wordsize
            mem += (
                (
                    2 * sizethreadbuckets * self._wordsize
                    + (self.threadbucketsize + 1) * self._possize
                )
                * 2
                * self.threads
            )
            if self.threads >= 3:
                mem += (
                    self.workspace * self._wordsize // 8
                    + 2 * self.maxtermsize * self._wordsize
                ) * (self.threads - 2)

        return mem

    def _thread_alloc_sort(
        self,
        largesize,
        smallsize,
        smallextension,
        termsinsmall,
        largepatches,
        filepatches,
        sortiosize,
    ):
        # type: (int, int, int, int, int, int, int) -> int

        filepatches = max(filepatches, 4)

        smallsize = max(smallsize, 16 * self.maxtermsize * self._wordsize)

        smallextension = max(smallextension, smallsize * 3 // 2)

        if largesize > 0:
            largesize = max(largesize, 2 * smallsize)

        compinc = 2
        minbufsize = filepatches * (
            sortiosize + (compinc + 2 * self.maxtermsize) * self._wordsize
        )
        if largesize + smallextension < minbufsize:
            if largesize == 0:
                smallextension = minbufsize
            else:
                largesize = minbufsize - smallextension

        iotry = (
            ((largesize + smallextension) // filepatches // self._wordsize)
            - 2 * self.maxtermsize
            - compinc
        )  # in words
        sortiosize = max(sortiosize, iotry)  # bytes vs. words??

        return (
            largesize + smallextension + 3 * termsinsmall * self._ptrsize + sortiosize
        )

    def scale(self, total_memory, lowest_scale=0.0, human_readable=False):
        # type: (int, float, bool) -> Setup
        """
        Scale to the given memory usage.

        Search for a scaling of the given setup parameters that
        results in the requested total memory usage, and return
        the rescaled setup object. If the requested memory usage
        is too high, return the parameters with the lowest
        possible usage (scaled to lowest_scale).
        """
        sp0 = self.copy()
        # Presumably increasing MaxTermSize requires increasing WorkSpace, too.
        sp0.workspace = max(sp0.workspace, sp0.maxtermsize * 250)

        def f(x):
            # type: (float) -> Tuple[int, Setup]
            # Hopefully monotonically increasing.
            sp = sp0.copy()
            sp.smallsize = int(sp.smallsize * x)
            sp.largesize = int(sp.largesize * x)
            sp.termsinsmall = int(sp.termsinsmall * x)
            sp.scratchsize = int(sp.scratchsize * x)
            m = sp.calc()
            if human_readable:
                m = round_human_readable(m, True, False)
            return (-(total_memory - m), sp)

        miny, minsp = f(lowest_scale)
        if miny >= 0:
            return minsp
        # Optimize the memory usage by bisection.
        max_iteration = 50
        x1 = 1.0
        x2 = None  # type: Optional[float]
        y1 = f(x1)[0]
        y2 = None  # type: Optional[int]
        for _i in range(max_iteration):
            if x2 is None:
                if y1 < 0:
                    x = x1 * 2.0
                    y = f(x)[0]
                    if y > 0:
                        x2 = x
                        y2 = y
                    else:
                        x1 = x
                        y1 = y
                else:
                    x = x1 * 0.5
                    y = f(x)[0]
                    if y < 0:
                        x2 = x1
                        y2 = y1
                        x1 = x
                        y1 = y
                    else:
                        x1 = x
                        y1 = y
            else:
                x = (x1 + x2) * 0.5
                y = f(x)[0]
                if y < 0:
                    x1 = x
                    y1 = y
                else:
                    x2 = x
                    y2 = y
            if x2 is not None:
                if not (y2 is not None):
                    raise AssertionError()
                if not (x1 < x2):
                    raise AssertionError()
                if not (y1 < y2):
                    raise AssertionError()
        return f(x1)[1]


def main(args=None):
    # type: (Optional[Sequence[str]]) -> None
    """Entry point."""
    if args is None:
        args = sys.argv[1:]

    # See https://bugs.python.org/issue22240, but the workaround given in that issue
    # gives a wrong result for console scripts.
    prog = os.path.basename(sys.argv[0])
    if prog == "__main__.py":
        prog = __name__.split(".")[-1]

    # Parse the command line arguments.
    parser = argparse.ArgumentParser(
        prog=prog,
        usage=("%(prog)s [options] [--] " "[par=val].. [par+=int].. [par*=float].."),
        epilog=(
            "On non-Linux systems, the number of physical CPUs and memory "
            "available on the machine may be not automatically detected. "
            "In such a case, one cannot use the default parameters "
            "depending on those values and needs to explicitly specify "
            "--ncpus, --total-cpus and --total-memory."
        ),
        add_help=False,
    )
    parser.add_argument(
        "-h",
        "--help",
        action="store_const",
        const=True,
        help="show this help message and exit",
    )
    parser.add_argument(
        "-o",
        "--output",
        action="store",
        nargs="?",
        const="form.set",
        help=("output to FILE (default: no (stdout), " "FILE=form.set)"),
        metavar="FILE",
    )
    parser.add_argument(
        "-f",
        "--form",
        action="store_const",
        const=True,
        help="print tform options (e.g., -w4) and exit",
    )
    parser.add_argument(
        "-m",
        "--minos",
        action="store_const",
        const=True,
        help="print minos options (e.g., -m2x4) and exit",
    )
    parser.add_argument(
        "-u",
        "--usage",
        action="store_const",
        const=True,
        help="print expected initial memory usage and exit",
    )
    parser.add_argument(
        "-H",
        "--human-readable",
        action="store_const",
        const=True,
        help=("adjust to human-readable numbers " "(e.g., 1K, 23M, 456G)"),
    )
    parser.add_argument(
        "-1",
        "--one",
        action="store_const",
        const=-1,
        dest="ncpus",
        help="use cpus in a node on the machine (default)",
    )
    parser.add_argument(
        "--full",
        action="store_const",
        const=-99999,
        dest="ncpus",
        help="use cpus in all nodes on the machine",
    )
    parser.add_argument(
        "-n", "--ncpus", action="store", type=int, help="use N cpus", metavar="N"
    )
    parser.add_argument(
        "-p",
        "--percentage",
        action="store",
        default=75.0,
        type=float,
        help=("percentage of initial memory usage " "(default: 75.0)"),
        metavar="N",
    )
    parser.add_argument(
        "-t",
        "--target",
        action="store",
        default="4.2.1",
        type=str,
        help="target version of FORM (default: 4.2.1)",
        metavar="VER",
    )
    parser.add_argument(
        "--total-cpus",
        action="store",
        type=int,
        help="specify the total cpus on the machine",
        metavar="N",
    )
    parser.add_argument(
        "--total-memory",
        action="store",
        help="specify the total memory on the machine",
        metavar="N",
    )
    parser.add_argument(
        "-v", "--verbose", action="store_const", const=True, help="verbose output"
    )
    parser.add_argument("args", nargs="*", help=argparse.SUPPRESS)
    opts = parser.parse_args(args=args)
    pars = {}

    # NOTE: when all of `--ncpus`, `--total-cpus` and `--total-memory` are
    # specified, we don't need to access the system information.

    if opts.verbose:
        SystemInfo.verbose = True

    if opts.total_cpus:
        total_cpus = opts.total_cpus
    else:
        total_cpus = SystemInfo.number_of_physical_cores

    if opts.total_memory:
        try:
            total_memory = parse_number(opts.total_memory)
        except ValueError:
            parser.error(
                "non-integer value for total memory: {0}".format(opts.total_memory)
            )
    else:
        total_memory = SystemInfo.total_memory

    # Help message.
    if opts.help:
        parser.print_help()
        return

    # Number of CPUs.
    if opts.ncpus is not None:
        ncpus = opts.ncpus
    else:
        # Use 1 node for each job by default.
        ncpus = -1
    if ncpus < 0:
        # Use (-ncpus) nodes.
        ncpus = -ncpus * (total_cpus // SystemInfo.number_of_nodes)
    ncpus = max(ncpus, 1)
    ncpus = min(ncpus, total_cpus)

    # Target version.
    target_input = opts.target.split(".")
    if len(target_input) > 3 or any(not x.isdigit() for x in target_input):
        parser.error("invalid target version given: {0}".format(opts.target))
    if len(target_input) == 3:
        target = (int(target_input[0]), int(target_input[1]), int(target_input[2]))
    elif len(target_input) == 2:
        target = (int(target_input[0]), int(target_input[1]), 0)
    else:
        target = (int(target_input[0]), 0, 0)

    # Setup parameter in the arguments.
    sp = Setup(target)
    sp.threads = ncpus if ncpus >= 2 else -1

    for a in opts.args:
        m = re.match(r"([a-zA-Z][a-zA-Z0-9]*)([+*]?)=(.*)", a)
        if m:
            par = m.group(1).lower()
            ope = m.group(2)
            val = m.group(3)
            if par in sp.__dict__:
                # Known parameter.
                if ope == "" or ope == "+":
                    # We have par=val or par+=int.
                    try:
                        val = parse_number(val)
                    except ValueError:
                        parser.error("non-integer value for parameter: {0}".format(a))
                    if ope == "":
                        setattr(sp, par, val)
                    else:
                        setattr(sp, par, getattr(sp, par) + val)
                    continue
                else:
                    # We have par*=float.
                    try:
                        val = float(val)
                    except ValueError:
                        parser.error("non-float value for parameter: {0}".format(a))
                    setattr(sp, par, int(getattr(sp, par) * val))
                    continue
            elif ope == "":
                # Unknown parameter given by par=val. Add it to the dictionary.
                pars[par] = val
                continue
        parser.error("unrecognized argument: {0}".format(a))

    # Our resource.
    cpus = max(sp.threads, 1)
    memory = int(total_memory * opts.percentage / 100.0 * cpus / total_cpus)

    # For --form option.
    if opts.form:
        print("-w{0}".format(cpus))
        return

    # For --minos option.
    if opts.minos:
        print("-m{0}x{1}".format(total_cpus // cpus, cpus))
        return

    sp = sp.scale(memory, human_readable=opts.human_readable)

    # Final memory usage we've found.
    memory_usage = sp.calc()

    if memory_usage > memory:
        shortage = memory_usage - memory
        parser.exit(
            -1,
            ("failed to find parameters: {0} bytes shortage\n").format(
                round_human_readable(shortage, True, True)
                if opts.human_readable
                else str(shortage)
            ),
        )

    # For --usage option.
    if opts.usage:
        if opts.human_readable:
            memory_usage_str = round_human_readable(memory_usage, True, True)
        else:
            memory_usage_str = str(memory_usage)
        print(memory_usage_str)
        return

    # Output.
    with open_w_or_stdout(opts.output) as fi:

        def round_memory(m):
            # type: (int) -> Union[int, str]
            return round_human_readable(m, False, True) if opts.human_readable else m

        print(
            (
                "# {0}{1} (cpu: {2}, mem: {3}; "
                "total cpu: {4}, total mem: {5}; {6}x{7})"
            ).format(
                parser.prog + " " + __version__,
                (" " if len(sys.argv) >= 2 else "") + " ".join(sys.argv[1:]),
                cpus,
                round_memory(memory),
                total_cpus,
                round_memory(total_memory),
                total_cpus // cpus,
                cpus,
            ),
            file=fi,
        )

        sp0 = Setup(target)  # default value
        dic0 = dict(sp0.items())
        for k, v in sp.items():
            if k == "threads":
                # 'threads N' doesn't work, must be given by tform option -wN.
                continue
            if v == dic0[k]:
                # Don't write when same as the default value.
                continue
            if opts.human_readable:
                v_str = round_human_readable(v, False, True)
            else:
                v_str = str(v)
            print("{0} {1}".format(k, v_str), file=fi)
        for k, v in pars.items():
            print("{0} {1}".format(k, v), file=fi)


if __name__ == "__main__":
    main()