import builtins
import difflib
import inspect
import os

# Hack that modifies the built-in `open` function in such a way that
# an assignment can be done even at other places than the server.

def find_filename(filename):
    if os.path.exists(filename):
        return filename
    path = os.path.dirname(inspect.getfile(inspect.currentframe()))
    path = os.path.join(path, os.path.basename(filename))
    if os.path.exists(path):
        return path
    return filename

old_open = builtins.open

def new_open(filename, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None):
    return old_open(find_filename(filename), mode, buffering, encoding, errors, newline, closefd, opener)

builtins.open = new_open

# Lab code proper

def _diff(gold_tokens, pred_tokens):
    """Iterator over pairs describing longest differing subsequences
    within `gold_tokens` and `pred_tokens`.

    """
    matcher = difflib.SequenceMatcher(None, gold_tokens, pred_tokens)
    a_lo = b_lo = 0
    for a_hi, b_hi, n in matcher.get_matching_blocks():
        if a_lo < a_hi or b_lo < b_hi:
            yield gold_tokens[a_lo:a_hi], pred_tokens[b_lo:b_hi]
        a_lo = a_hi + n
        b_lo = b_hi + n

def diff(gold_tokens, pred_tokens):
    """Return a list of pairs describing longest differing subsequences
    within `gold_tokens` and `pred_tokens`.

    """
    return list(_diff(gold_tokens, pred_tokens))

def _n_matches(gold_tokens, pred_tokens):
    """Return the number of elements that match within `gold_tokens` and
    `pred_tokens`.

    """
    matcher = difflib.SequenceMatcher(None, gold_tokens, pred_tokens)
    return sum(match.size for match in matcher.get_matching_blocks())

def n_errors(gold_tokens, pred_tokens):
    """Return the number of errors in the tokenization given by
    `pred_tokens`, relative to the gold-standard tokenization given by
    `gold_tokens`.

    """
    return len(gold_tokens) + len(pred_tokens) - 2 * _n_matches(gold_tokens, pred_tokens)

def precision(gold_tokens, pred_tokens):
    """Return the precision of the tokenization given by `pred_tokens`,
    relative to the gold-standard tokenization given by `gold_tokens`.

    """
    n_pred_tokens = len(pred_tokens)
    n_matches = _n_matches(gold_tokens, pred_tokens)
    return n_matches / n_pred_tokens if n_pred_tokens > 0 else float('NaN')

def recall(gold_tokens, pred_tokens):
    """Return the recall of the tokenization given by `pred_tokens`,
    relative to the gold-standard tokenization given by `gold_tokens`.

    """
    n_gold_tokens = len(gold_tokens)
    n_matches = _n_matches(gold_tokens, pred_tokens)
    return n_matches / n_gold_tokens if n_gold_tokens > 0 else float('NaN')