# _*_ coding:UTF-8 _*_
# with open(r"D:\data_file\test.fa","r") as input_file:
#     seq = ""
#     header = input_file.readline().strip()[1:]
#     for line in input_file:
#         line = line.strip()
#         if line[0] != ">":
#             seq = seq + line
#         else:
#             print header
#             print seq
#             header = line[1:]
#             seq  = ""
#     print header
#     print seq

def read_fasta(file_path=""):
    """
    Loading FASTA file and return a iterative object
    """

    line = ""

    try:
        fasta_handle = open(file_path,"r")
    except:
        raise IOError("Your input FASTA file is not right!")

    # make sure the file is not empty
    while True:
        line = fasta_handle.readline()
        if line == "":
            return
        if line[0] == ">":
            break

    # when the file is not empty, we try to load FASTA file
    while True:
        if line[0] != ">":
            raise ValueError("Records in Fasta files should start with '>' character")
        title = line[1:].rstrip()
        lines = []
        line = fasta_handle.readline()
        while True:
            if not line:
                break
            if line[0] == ">":
                break
            lines.append(line.rstrip())
            line = fasta_handle.readline()

        yield title,"".join(lines).replace(" ","").replace("\r","")

        if not line:
            return

    fasta_handle.close()
    assert False, "Your input FASTA file have format problem."

for header,seq in read_fasta(file_path=r"D:\data_file\test.fa"):
    print header
    print seq

hg19_genome = {}
for chr_name , chr_seq in read_fasta(file_path=r"D:/data_file/hg19_only_chromosome.fa"):
    hg19_genome[chr_name] = chr_seq