#!/usr/bin/env python3

import signal
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
signal.signal(signal.SIGINT, signal.SIG_DFL)

import sys
import difflib
from os import get_terminal_size, environ

# XXX: Since I'm using ydiff like `git diff | ydiff | less`,
# both stdin and stdout are redirected, so we'll get inappropriate ioctl for those devices.
# But we can use stderr (fd 2)!
terminal_width = int(environ.get("YDIFF_WIDTH") or get_terminal_size(2)[0])

COLOR_RESET = "\x1b[0m"
COLOR_REVERSE = "\x1b[7m"
COLOR_PLAIN = "\x1b[22m"
COLOR_RED = "\x1b[31m"
COLOR_GREEN = "\x1b[32m"
COLOR_YELLOW = "\x1b[33m"
COLOR_CYAN = "\x1b[36m"
COLOR_GRAY = "\x1b[37m"

hunk_meta_display = f"{COLOR_GRAY}{'┈' * terminal_width}{COLOR_RESET}"


def strsplit(text, width):
    """strsplit() splits a given string into two substrings, \x1b-aware.

    It returns 3-tuple: (first string, second string, number of visible chars
    in the first string).

    If some color was active at the splitting point, then the first string is
    appended with the resetting sequence, and the second string is prefixed
    with all active colors.
    """
    first = ""
    found_colors = ""
    chars_cnt = 0
    append_len = 0

    while len(text):
        # First of all, check if current string begins with any escape sequence.
        if text[0] == "\x1b":
            color_end = text.find("m")
            if color_end != -1:
                color = text[:color_end+1]
                if color == COLOR_RESET:
                    found_colors = ""
                else:
                    found_colors += color
                append_len = len(color)

        if not append_len:
            # Current string does not start with any escape sequence, so,
            # either add one more visible char to the "first" string, or
            # break if that string is already large enough.
            if chars_cnt >= width:
                break
            chars_cnt += 1
            # would popfront be more efficient here?
            first += text[0]
            text = text[1:]
            continue

        first += text[:append_len]
        text = text[append_len:]
        append_len = 0

    second = text

    # If the first string has some active colors at the splitting point,
    # reset it and append the same colors to the second string.
    if found_colors:
        return first + COLOR_RESET, found_colors + second, chars_cnt

    return first, second, chars_cnt


class Hunk(object):
    def __init__(self, hunk_headers, old_addr, new_addr):
        self._hunk_headers = hunk_headers
        self._old_addr = old_addr  # tuple (start, offset)
        self._new_addr = new_addr  # tuple (start, offset)
        self._hunk_list = []  # list of tuple (attr, line)

    def append(self, hunk_line):
        """hunk_line is a 2-element tuple: (attr, text), where attr is:
                '-': old, '+': new, ' ': common
        """
        self._hunk_list.append(hunk_line)

    def mdiff(self):
        """The difflib._mdiff() function returns an interator which returns a
        tuple: (from line tuple, to line tuple, boolean flag)

        from/to line tuple -- (line num, line text)
            line num -- integer or None (to indicate a context separation)
            line text -- original line text with following markers inserted:
                '\0+' -- marks start of added text
                '\0-' -- marks start of deleted text
                '\0^' -- marks start of changed text
                '\1' -- marks end of added/deleted/changed text

        boolean flag -- None indicates context separation, True indicates
            either "from" or "to" line contains a change, otherwise False.
        """
        return difflib._mdiff(self._get_old_text(), self._get_new_text())

    def _get_old_text(self):
        return [line for attr, line in self._hunk_list if attr != "+"]

    def _get_new_text(self):
        return [line for attr, line in self._hunk_list if attr != "-"]

    def is_completed(self):
        old_completed = self._old_addr[1] == len(self._get_old_text())
        if not old_completed:
            return False
        # new_completed
        return self._new_addr[1] == len(self._get_new_text())


class UnifiedDiff(object):
    def __init__(self, headers=None, old_path=None, new_path=None, hunks=None):
        self._headers = headers or []
        self._old_path = old_path or None
        self._new_path = new_path or None
        self._hunks = hunks or []

    def is_old_path(self, line):
        return line.startswith("--- ")

    def is_new_path(self, line):
        return line.startswith("+++ ")

    def is_hunk_meta(self, line):
        return (
            line.startswith("@@ -")
            and line.find(" @@") >= 8
        )

    def parse_hunk_meta(self, hunk_meta):
        # @@ -3,7 +3,6 @@
        a = hunk_meta.split()[1].split(",")  # -3 7
        if len(a) > 1:
            old_addr = (int(a[0][1:]), int(a[1]))
        else:
            # @@ -1 +1,2 @@
            old_addr = (int(a[0][1:]), 1)

        b = hunk_meta.split()[2].split(",")  # +3 6
        if len(b) > 1:
            new_addr = (int(b[0][1:]), int(b[1]))
        else:
            # @@ -0,0 +1 @@
            new_addr = (int(b[0][1:]), 1)

        return old_addr, new_addr

    def parse_hunk_line(self, line):
        return line[0], line[1:]

    def is_old(self, line):
        return (
            line.startswith("-")
            and not self.is_old_path(line)
        )

    def is_new(self, line):
        return line.startswith("+") and not self.is_new_path(line)

    def is_common(self, line):
        return line.startswith(" ")

    def is_eof(self, line):
        # \ No newline at end of file
        # \ No newline at end of property
        return line.startswith(r"\ No newline at end of")

    def is_only_in_dir(self, line):
        return line.startswith("Only in ")

    def is_binary_differ(self, line):
        return line.startswith("Binary files") and line.endswith("differ")


class DiffParser(object):
    def __init__(self, stream):
        self._stream = stream

    def get_diff_generator(self):
        """parse all diff lines, construct a list of UnifiedDiff objects"""
        diff = UnifiedDiff()
        headers = []

        for line in self._stream:
            if diff.is_old_path(line):
                # This is a new diff when current hunk is not yet genreated or
                # is completed.  We yield previous diff if exists and construct
                # a new one for this case.  Otherwise it's acutally an 'old'
                # line starts with '--- '.
                if not diff._hunks or diff._hunks[-1].is_completed():
                    if diff._old_path and diff._new_path and diff._hunks:
                        yield diff
                    diff = UnifiedDiff(headers, line, None, None)
                    headers = []
                else:
                    diff._hunks[-1].append(diff.parse_hunk_line(line))

            elif diff.is_new_path(line) and diff._old_path:
                if not diff._new_path:
                    diff._new_path = line
                else:
                    diff._hunks[-1].append(diff.parse_hunk_line(line))

            elif diff.is_hunk_meta(line):
                hunk_meta = line
                old_addr, new_addr = diff.parse_hunk_meta(hunk_meta)
                hunk = Hunk(headers, old_addr, new_addr)
                headers = []
                diff._hunks.append(hunk)

            elif (
                diff._hunks
                and not headers
                and (diff.is_old(line) or diff.is_new(line) or diff.is_common(line))
            ):
                diff._hunks[-1].append(diff.parse_hunk_line(line))

            elif diff.is_eof(line):
                pass

            elif diff.is_only_in_dir(line) or diff.is_binary_differ(line):
                # 'Only in foo:' and 'Binary files ... differ' are considered
                # as separate diffs, so yield current diff, then this line
                #
                if diff._old_path and diff._new_path and diff._hunks:
                    # Current diff is comppletely constructed
                    yield diff
                headers.append(line)
                yield UnifiedDiff(headers, None, None, None)
                headers = []
                diff = UnifiedDiff()

            else:
                # All other non-recognized lines are considered as headers or
                # hunk headers respectively
                headers.append(line)

        # Validate and yield the last patch set if it is not yielded yet
        if diff._old_path:
            assert diff._new_path is not None
            if diff._hunks:
                assert len(diff._hunks[-1]._hunk_list) > 0
            yield diff

        if headers:
            # Tolerate dangling headers, just yield a UnifiedDiff object with
            # only header lines
            yield UnifiedDiff(headers, None, None, None)


class DiffMarker(object):
    def markup_side_by_side(self, diff):
        def _fit_with_marker_mix(text):
            """Wrap input text which contains mdiff tags, markup at the
            meantime
            """
            out = COLOR_PLAIN
            while text:
                if text.startswith("\x00-"):
                    out += f'{COLOR_REVERSE}{COLOR_RED}'
                    text = text[2:]
                elif text.startswith("\x00+"):
                    out += f'{COLOR_REVERSE}{COLOR_GREEN}'
                    text = text[2:]
                elif text.startswith("\x00^"):
                    out += f'{COLOR_REVERSE}{COLOR_YELLOW}'
                    text = text[2:]
                elif text.startswith("\x01"):
                    if len(text) > 1:
                        out += f'{COLOR_RESET}{COLOR_PLAIN}'
                    text = text[1:]
                else:
                    # FIXME: utf-8 wchar might break the rule here, e.g.
                    # u'\u554a' takes double width of a single letter, also
                    # this depends on your terminal font.  I guess audience of
                    # this tool never put that kind of symbol in their code :-)
                    out += text[0]
                    text = text[1:]

            return out + COLOR_RESET

        # Set up number width, note last hunk might be empty
        try:
            start, offset = diff._hunks[-1]._old_addr
            max1 = start + offset - 1
            start, offset = diff._hunks[-1]._new_addr
            max2 = start + offset - 1
        except IndexError:
            max1 = max2 = 0

        num_width = max(len(str(max1)), len(str(max2)))

        # Each line is like 'nnn TEXT nnn TEXT\n', so width is half of
        # [terminal size minus the line number columns and 3 separating spaces.
        width = (terminal_width - num_width * 2 - 3) // 2

        for line in diff._headers:
            yield f"{COLOR_CYAN}{line}{COLOR_RESET}"

        if diff._old_path is not None and diff._new_path is not None:
            yield f"{COLOR_YELLOW}{diff._old_path}{COLOR_RESET}"
            yield f"{COLOR_YELLOW}{diff._new_path}{COLOR_RESET}"

        for hunk in diff._hunks:
            for hunk_header in hunk._hunk_headers:
                yield f"{COLOR_CYAN}{hunk_header}{COLOR_RESET}"

            yield hunk_meta_display

            for old, new, changed in hunk.mdiff():
                if old[0]:
                    left_num = str(hunk._old_addr[0] + int(old[0]) - 1)
                else:
                    left_num = " "

                if new[0]:
                    right_num = str(hunk._new_addr[0] + int(new[0]) - 1)
                else:
                    right_num = " "

                left = old[1].replace("\t", " " * 8).replace("\n", "").replace("\r", "")
                right = new[1].replace("\t", " " * 8).replace("\n", "").replace("\r", "")

                if changed:
                    if not old[0]:
                        left = ""
                        right = right.rstrip("\x01")
                        if right.startswith("\x00+"):
                            right = right[2:]
                        right = f"{COLOR_GREEN}{right}{COLOR_RESET}"
                    elif not new[0]:
                        left = left.rstrip("\x01")
                        if left.startswith("\x00-"):
                            left = left[2:]
                        left = f"{COLOR_RED}{left}{COLOR_RESET}"
                        right = ""
                    else:
                        left = _fit_with_marker_mix(left)
                        right = _fit_with_marker_mix(right)
                else:
                    right = f"{COLOR_RESET}{right}"

                # Need to wrap long lines, so here we'll iterate,
                # shaving off `width` chars from both left and right
                # strings, until both are empty. Also, line number needs to
                # be printed only for the first part.
                lncur = left_num
                rncur = right_num
                while left or right:
                    # Split both left and right lines, preserving escaping
                    # sequences correctly.
                    lcur, left, llen = strsplit(left, width)
                    rcur, right, rlen = strsplit(right, width)

                    # Pad left line with spaces if needed
                    if llen < width:
                        lcur += " " * (width - llen)
                        # XXX: this doesn't work lol
                        # lcur = f"{lcur: <{width}}"

                    yield f"{COLOR_GRAY}{lncur:>{num_width}}{COLOR_RESET} {lcur} {COLOR_GRAY}{rncur:>{num_width}}{COLOR_RESET} {rcur}\n"

                    # Clean line numbers for further iterations
                    lncur = ""
                    rncur = ""


for diff in DiffParser(sys.stdin).get_diff_generator():
    for line in DiffMarker().markup_side_by_side(diff):
        sys.stdout.buffer.write(line.encode())