#very basic, one seq per line only, no pysam for max portability
#doesn't care if sequences are in revcomp or forward order
import sys

fasta1 = sys.argv[1]
fasta2 = sys.argv[2]

revcomp = lambda x: ''.join([{'A':'T','C':'G','G':'C','T':'A'}[B] for B in x][::-1])
def normalize(seq):
    rev = revcomp(seq)
    return min(rev,seq)

def read_seqs(fasta):
    seqs = set()
    for  line in open(fasta):
        if line[0] == '>': continue
        seqs.add(normalize(line.strip()))
    return seqs

s1 = read_seqs(fasta1)
s2 = read_seqs(fasta2)

if s1 == s2:
    sys.exit(0)

if len(s1) == len(s2) and len(s1) == 1:
    # special case, e.g. for read50 and genome10k, let's see if one is included in the other
    seq1= s1.copy().pop()
    seq2= s2.copy().pop()
    if seq1 in seq2 or seq2 in seq1:
        print("one seq is perfectly included in the other")
    else:
        print("BAD: one sequence doesn't exactly match the other")

print("NOT EQUAL: %d sequence(s) in %s not in %s" % (len(s1.difference(s2)), fasta1, fasta2))
sys.exit(1)