#include "BCFasta.h"

int check_alpha(char val, string alpha);
void Fasta_vector::print_to_fasta(const char *filename){


	ofstream file1(filename, ios::app);

	file1 << "\nAmino acid sequences alignment for " << filename << endl;
	file1 << "-----------------------------------------------------------------" << endl;	



	for(unsigned int i=0;i<names.size();++i){
		file1 << ">" << names[i] << endl;
		file1 << sequences[i] << endl;
	}
	file1.close();

}


/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  set_aminos
 *  Description:  
 * =====================================================================================
 */
void set_aminos (map<string, char>& convert)
{



        convert["ATG"]='M';
        convert["TAA"]='-';
        convert["TAG"]='-';
        convert["TGA"]='-';
        convert["---"]='-';
        convert["TGG"]='W';

        convert["TTT"]='F';         
        convert["TTC"]='F';         
        convert["TAT"]='Y';         
        convert["TAC"]='Y';         
        convert["CAT"]='H';         
        convert["CAC"]='H';         
        convert["CAA"]='Q';         
        convert["CAG"]='Q';         
        convert["AAA"]='K';         
        convert["AAG"]='K';         
        convert["GAT"]='D';         
        convert["GAC"]='D';         
        convert["GAA"]='E';         
        convert["GAG"]='E';         
        convert["AAT"]='N';         
        convert["AAC"]='N';         
        convert["TGT"]='C';         
        convert["TGC"]='C';         
 convert["ATT"]='I';
        convert["ATC"]='I';
        convert["ATA"]='I';
        convert["GTA"]='V';
        convert["GTT"]='V';
        convert["GTC"]='V';
        convert["GTG"]='V';
        convert["CCA"]='P';
        convert["CCT"]='P';
        convert["CCC"]='P';
        convert["CCG"]='P';
        convert["ACA"]='T';
        convert["ACT"]='T';
        convert["ACC"]='T';
        convert["ACG"]='T';
        convert["GCA"]='A';
        convert["GCT"]='A';
        convert["GCC"]='A';
        convert["GCG"]='A';
        convert["GGA"]='G';
        convert["GGT"]='G';
        convert["GGC"]='G';
        convert["GGG"]='G';
        convert["TTA"]='L';
        convert["TTG"]='L';
        convert["CTA"]='L';
        convert["CTT"]='L';
        convert["CTC"]='L';
        convert["CTG"]='L';
        convert["TCA"]='S';
        convert["TCT"]='S';
        convert["TCC"]='S';
        convert["TCG"]='S';
        convert["AGT"]='S';
        convert["AGC"]='S';
        convert["AGA"]='R';
        convert["AGG"]='R';
        convert["CGA"]='R';
        convert["CGT"]='R';
        convert["CGC"]='R';
        convert["CGG"]='R';




}		/* -----  end of function set_aminos  ----- */



/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  Fasta_vactor::check_for_tree
 *  Description:  
 * =====================================================================================
 */
string Fasta_vector::check_for_tree (string& folder, string& filename){


	string tempdb = folder;
	tempdb += "/";
	tempdb += filename;

	vector<string> tok;

	Tokenize(tempdb.c_str(), tok, ".");

	tok[0] += ".tre";

	return tok[0];	

	//Read_pdb(pdb.filename);
	//exit(-1);


}		/* -----  end of function Fasta_vactor::check_for_pdb  ----- */
/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  Fasta_vactor::check_for_pdb
 *  Description:  
 * =====================================================================================
 */
void Fasta_vector::check_for_pdb (string& folder, string& filename){


	string tempdb = folder;
	tempdb += "/";
	tempdb += filename;

	vector<string> tok;

	Tokenize(tempdb.c_str(), tok, ".");

	tok[0] += ".pdb";

	pdb.filename = tok[0];	


	Read_pdb(pdb.filename);
	if(has_pdb){	
	Identify_chain();
	pdb.surface_num=0;
	}

	//exit(-1);


}		/* -----  end of function Fasta_vactor::check_for_pdb  ----- */



/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  Fasta_vactor::get_identity_score
 *  Description:  
 * =====================================================================================
 */
void Fasta_vector::get_identity_level (){

	double total = 0.0;
	for(unsigned int j=0;j<sequences[0].size();++j){
		map<char, int> colid;
		for(unsigned int i=0;i<sequences.size();++i){
			if(colid.find(sequences[i][j])!=colid.end()){

				colid[sequences[i][j]]++;

			}else{
				colid[sequences[i][j]]=1;

			}

		}

		map<char, int>::iterator mit;

		int top = 0;
		for(mit=colid.begin();mit!=colid.end();++mit){
			if(mit->second>top){
				top=mit->second;
			}
		}
		total += (double)top/(double)sequences.size();


	}
	identity = total/(double)sequences[0].size();	

}		/* -----  end of function Fasta_vactor::check_for_pdb  ----- */

/* 
 * ===  FUNCTION  ======================================================================
 *         Name:  Read_pdb
 *  Description:  
 * =====================================================================================
 */


Fasta_vector::Fasta_vector(){

	has_pdb=false;

}

void Fasta_vector::Identify_chain(){

	map<int, map<char, double> > identity;
	for(unsigned int i=0;i<sequences.size();++i){
		identity[i] = compare_seqs(sequences[i]);		
	}

	map<int, map<char, double> >::iterator it;
	map<char, double>::iterator mit;
int larint=0;
	char larchar;
	double lardouble =0.0;
	for(it=identity.begin();it!=identity.end();++it){
		for(mit=it->second.begin();mit!=it->second.end();++mit){

			if(mit->second>=lardouble){
				lardouble = mit->second;
				larchar = mit->first;
				larint = it->first;
			}
		}
	}

	ref_num = larint;
	ref_chain = larchar;
	ref_chan_num = chain_nums[ref_chain];
}



map<char, double> Fasta_vector::compare_seqs(string& seq){

	map<char, double> chain_id;

	for(unsigned int i=0;i<pdb.chains.size();++i){

		double id = 0.0;
		int gaps=0;
		for(unsigned int j=0;j<seq.size();++j){
			if(seq[j]!='-'){
				if(pdb.chains[i].res_map.find(j-gaps+1)!=pdb.chains[i].res_map.end())
				if(pdb.chains[i].aminos[pdb.chains[i].res_map[j-gaps+1]].letter==seq[j]){
					id+=1;
				}

			}else{
				gaps++;
			}

		}
		id/=(double)pdb.chains[i].aminos.size();
		chain_id[pdb.chains[i].name]=id;

	} 

	return chain_id;
}



int Fasta_vector::Get_neighbours(double dist, int col){


	if(has_pdb==false){
		cerr << "Error: there is no associated pdb" << endl;
		exit(-1);
	}

int total=0;
	for(unsigned int i=0;i<pdb.chains.size();++i){

		if(pdb.chains[i].name==ref_chain){
			
			for(unsigned int j=0;j<pdb.chains[i].aminos.size();++j){
				if((signed)j!=col){
					if((unsigned)col<pdb.chains[i].aminos.size())
					if(Distance(pdb.chains[i].aminos[j].mean_pos,pdb.chains[i].aminos[col].mean_pos)<=dist){
						++total;
					}
				}
			}
			//ref =i;		
			break;
		}

	}

return total;


}



void Fasta_vector::get_amino_neighbours(int dist){

	if(!has_pdb){
		cerr << "Error: there is no associated pdb" << endl;
		exit(-1);
	}

	
			for(unsigned int j=0;j<pdb.chains[ref_chan_num].aminos.size();++j){
				int total=0;
				for(unsigned int k=j+1;k<pdb.chains[ref_chan_num].aminos.size();++k){
					if(Distance(pdb.chains[ref_chan_num].aminos[j].mean_pos,pdb.chains[ref_chan_num].aminos[k].mean_pos)<=dist){
					++total;
					}							
				}
				pdb.chains[ref_chan_num].aminos[j].neighbours=total;
				if(total<=6){
					++pdb.surface_num;
				}
			}


}




double Fasta_vector::Get_average_neighbours(double dist){


	if(has_pdb==false){
		cerr << "Error: there is no associated pdb" << endl;
		exit(-1);
	}

int total=0;
int ref=0;
	for(unsigned int i=0;i<pdb.chains.size();++i){

		if(pdb.chains[i].name==ref_chain){
			
			for(unsigned int j=0;j<pdb.chains[i].aminos.size();++j){
				for(unsigned int k=j+1;k<pdb.chains[i].aminos.size();++k){
					if(Distance(pdb.chains[i].aminos[j].mean_pos,pdb.chains[i].aminos[k].mean_pos)<=dist){
					++total;
					}							
				}
			}
			ref =i;		
			break;
		}

	}
int N = pdb.chains[ref].aminos.size();

return (double)total/(double)N;


}


double Distance(atom i, atom j){
return sqrt(pow((i.x-j.x),2) + pow((i.y-j.y),2) + pow((i.z-j.z), 2));
}


void Fasta_vector::Read_pdb (string& filename){
	ifstream fil;
	char buffer[2048];


	map<string, char> amin;
	set_aminos(amin);
	fil.open(filename.c_str());
	if(fil.is_open()){

		has_pdb=true;

		chain temp_chain;
		amino temp_amino;

		int res = 1;
		char chain='\0';
		bool firsta=true;
		bool firstc=true;
		while(fil.getline(buffer, 2048)){


			if(buffer[0]=='E' && buffer[1]=='N' && buffer[2]=='D' && buffer[3]=='M' && buffer[4]=='D' && buffer[5]=='L'){
				break;
			}


			if(buffer[0]=='A'&&buffer[1]=='T'&& buffer[2]=='O'&&buffer[3]=='M'){

				string temp;
				temp += buffer;


				string tem(temp, 22, 4);
				atom temp_atom;
				if(firsta==true){
					res=atoi(tem.c_str());
					firsta=false;
				}
				if(res!=atoi(tem.c_str())){
					temp_chain.res_map[res]=temp_chain.aminos.size();
					temp_chain.aminos.push_back(temp_amino);
					temp_amino.atoms.clear();
					res=atoi(tem.c_str());
				}
				temp_amino.entry=atoi(tem.c_str());
				tem.clear();
				tem.insert(tem.begin(), temp.begin()+12, temp.begin()+16);
				temp_atom.name=tem;
				tem.clear();
				tem.insert(tem.begin(), temp.begin()+17, temp.begin()+20);
				temp_amino.name=tem;
				temp_amino.letter=amin[tem];
				tem.clear();
				if(firstc==true){
					chain=temp[21];
					firstc=false;
				}
				if(temp[21]!=chain){
					pdb.chains.push_back(temp_chain);
					temp_chain.aminos.clear();
					temp_chain.res_map.clear();
					temp_chain.clear();
					firstc=true;

				}

				temp_chain.name=temp[21];
				tem.insert(tem.begin(), temp.begin()+22, temp.begin()+26);

				temp_amino.res=atoi(tem.c_str());


				tem.clear();
				tem.insert(tem.begin(), temp.begin()+30, temp.begin()+38);
				temp_atom.x=atof(tem.c_str());	
				tem.clear();
				tem.insert(tem.begin(), temp.begin()+38, temp.begin()+46);
				temp_atom.y=atof(tem.c_str());	
				tem.clear();
				tem.insert(tem.begin(), temp.begin()+46, temp.begin()+54);
				temp_atom.z=atof(tem.c_str());	
				temp_amino.atoms.push_back(temp_atom);	

				//		tem.insert(temp.begin()+5, temp.begin()+ 	


			}else if(buffer[0]=='S'&& buffer[1]=='O'&&buffer[2]=='U'){

			/*	string str, line;
			cerr << "line=" << __LINE__ << endl;
				str += "ORGANISM_COMMON:";
				size_t found;
				line = buffer;
			cerr << "line=" << __LINE__ << endl;
				found = line.find(str);
			cerr << "line=" << __LINE__ << endl;
				if(found!=string::npos){
					vector<string> tok, tok2;
					Tokenize(line, tok, ":");
					Tokenize(tok[1], tok2, ", ;");
					ref_seq=tok2[0];	
					//ref_seq.erase(remove_if(ref_seq.begin(), ref_seq.end(), isspace), ref_seq.end());

					if(ref_seq[0]==' '){
						ref_seq.erase(0,1);

					}


				}
			cerr << "line=" << __LINE__ << endl;
*/

			}

		}

		temp_chain.aminos.push_back(temp_amino);
		pdb.chains.push_back(temp_chain);
		temp_chain.res_map.clear();
		temp_chain.clear();
		

	}else{
		return;
	}






	pdb.calc_mean_pos();

	for(unsigned int i=0;i<pdb.chains.size();++i){
		chain_nums[pdb.chains[i].name]=i;
	}
	fil.close();
}		/* -----  end of function Read_pdb  ----- */

void amino::clear(){
	atoms.clear();
}


void chain::clear(){
	aminos.clear();
	res_map.clear();
}

void structure::clear(){

	for(unsigned int i=0;i<chains.size();++i){
		for(unsigned int j=0;j<chains[i].aminos.size();++j){
		chains[i].aminos[j].atoms.clear();
		}
		chains[i].res_map.clear();
	}
	
	//res_map.clear();

}

void amino::calculate_mean_pos(){


	mean_pos.x=0.0;		
	mean_pos.y=0.0;		
	mean_pos.z=0.0;		
	for(unsigned int i=0;i<atoms.size();++i){
		mean_pos.x+=atoms[i].x;		
		mean_pos.y+=atoms[i].y;		
		mean_pos.z+=atoms[i].z;		

	}
	mean_pos.x/=atoms.size();		
	mean_pos.y/=atoms.size();		
	mean_pos.z/=atoms.size();		

}



void structure::calc_mean_pos(){

	for(unsigned int i=0;i<chains.size();++i){
		for(unsigned int j=0;j<chains[i].aminos.size();++j){
			chains[i].aminos[j].calculate_mean_pos();
		}
	}

}



int Fasta_vector::get_number_of_sequences(){

	return names.size();

}

void Fasta_vector::clear(){
	filename.clear();
	sequences.clear();
	names.clear();
	Tags.clear();
	chain_nums.clear();
	pdb.clear();
}

void Fasta_map::clear(){
	filename.clear();
	sequences.clear();
	Tags.clear();

}


int Fasta_vector::check_valid_alignment(const char* Alphabet){

	string alpha;

	if(strcmp(Alphabet, "DNA")==0){
		alpha = "ACGT-";

	}else if(strcmp(Alphabet, "AMINO")==0){
		alpha = "ACDEFGHIKLMNPQRSTVWY-";

	}else if(strcmp(Alphabet, "AMINOX")==0){
		alpha = "ACDEFGHIKLMNPQRSTVWYX-";
	}else{
		cerr << "Error: Alphabet other than DNA or AMINO passed to check_valid_alignment" << endl;
		exit(-1);
	}
	int x;

	x=check_length_conservation();
	if(x==-1){
		cerr << "Error: problem with alignment length, all sequences are not the same length\n";
		return(-1);
	}
	x=check_composition(alpha, Alphabet);
	if(x==-1){
		return(-1);
	}

	return 0;

}

int Fasta_vector::getAveGaps(){

	int numGaps=0;
	for(unsigned int i=0;i<sequences.size();++i){
		for(unsigned int j=0;j<sequences[0].size();++j){
			if(sequences[i][j]=='-'){
				++numGaps;
			}		
		}
	}


	return ceil((double)numGaps/(double)sequences.size());

}

bool Fasta_vector::check_DNA(){
string al = "ACGTNX-";

	for(unsigned int i=0;i<sequences.size();++i){
		for(unsigned int j=0;j<sequences[i].length();++j){
			//regex rx((const char *)sequences[i][j]);
			if(check_alpha(sequences[i][j], al)){	
				return(true);
			} /*else{
				return false;
			}*/
		}

	}


return false;

}

void Fasta_vector::convert_DNA(){

vector< string > prot;
map<string, char> mapam;
map<string, char>::iterator mit;;
set_aminos(mapam);
/*
for(mit=mapam.begin();mit!=mapam.end();++mit){

cerr << "fir=" << mit->first << "\tsec=" << mit->second << endl;

}
*/

	for(unsigned int i=0;i<sequences.size();++i){
		string tem;
		for(unsigned int j=0;j<sequences[i].length()-3;j+=3){
			string cod;
			cod += sequences[i][j];
			cod += sequences[i][j+1];
			cod += sequences[i][j+2];
			
			mit= mapam.find(cod.c_str());
			if(mit!=mapam.end()){
			tem += mapam[cod.c_str()];
			}else{
			tem += '-';
			}
		}
		prot.push_back(tem);
		sequences[i].clear();
	}
sequences.clear();
for(int i=0;i<prot.size();++i){
sequences.push_back(prot[i]);
}


}


int Fasta_vector::check_composition(string alpha, const char* Alphabet){

	//	int alpha_length = alpha.length();

	if(strcmp(Alphabet, "AMINO")==0 || strcmp(Alphabet, "AMINOX")==0){
		if(check_DNA()){

		}else{
			convert_DNA();				
		}

	}

	for(unsigned int i=0;i<sequences.size();++i){
		for(unsigned int j=0;j<sequences[i].length();++j){
			//regex rx((const char *)sequences[i][j]);


			if(check_alpha(sequences[i][j], alpha)){	
				cerr << "Warning: The alignment has a non-alphabet character: " << sequences[i][j] << endl;
				cerr << "These will be treated as gaps." << endl;
				return(-1);
			} 
		}

	}

	return(0);
}


int Fasta_map::check_composition(string alpha){

	//	int alpha_length = alpha.length();
	map<string, string>::iterator i;


	for(i=sequences.begin();i!=sequences.end();++i){
		for(unsigned int j=0;j<i->second.length();++j){
			//regex rx((const char *)sequences[i][j]);


			if(check_alpha(i->second[j], alpha)){	




				//		if(sequences[i].compare(j,1, alpha, 0, alpha_length)!=0){
				cerr << "Warning: The alignment has a non-alphabet character: " << i->second[j] << endl;
				cerr << "These will be treated as gaps." << endl;
				return(-1);
			} 
			}

		}
		return(0);
	}


	/*		TreeTemplate<Node>* Fasta_Newick_map::create_Bionj(const char* model, const char* alphabet){


			DistanceMatrix *DS;
			Alphabet *alpha1 = new NucleicAlphabet(); 
			Alphabet *alpha2 = new ProteicAlphabet();
			DiscreteDistribution * rdist = new ConstantDistribution(1.);
			if(alphabet=="DNA"){

			}else if(alphabet=="AMINO"){

			const ProteicAlphabet * alpha = dynamic_cast<const ProteicAlphabet *>(alpha2);
			SubstitutionModel * model = new JTT92(alpha);
			VectorSequenceContainer *vsc = new VectorSequenceContainer(alpha2);
			map<string, string>::iterator it;
			for(it=sequences.begin();it!=sequences.end();++it){
			vsc->addSequence(Sequence(it->first, it->second, alpha));
			}
			VectorSiteContainer * sites = new VectorSiteContainer(alpha2);
			for(it=sequences.begin();it!=sequences.end();++it){
			const Sequence *myseq = vsc->getSequence(it->first);
			sites->addSequence(*myseq, true);


			}
			SiteContainerTools::changeGapsToUnknownCharacters(*sites);
			DistanceEstimation MyDS(model, rdist, sites, 1, true);
			delete sites;
			delete vsc;
			delete alphabet;
			delete rdist;
			DS = MyDS.getMatrix();

			}else{
			cerr << "Error: Can only use DNA or AMINO alphabets." << endl; 
			exit(-1);
			}
			delete alpha1;
			delete alpha2;

			}


	 */
	TreeTemplate<Node>* Fasta_Newick_map::Read_Newick(const char* filename){



		TreeTemplate<Node> *tree_for = NULL;    
		Newick * NewickReader = new Newick(false); //No comment allowed!
		try{
			tree_for = NewickReader->read(filename);  
		} catch (Exception e){
			cerr << "Error: Couldn't read a Newick tree from file " << filename << endl;

		}
		delete NewickReader;

		return tree_for;




	}


	int check_alpha(char val, string alpha){
// returns a 1 if val is not in the alphabet
// else returns a 0
		for(unsigned int i=0;i<alpha.length();++i){
			if(val==alpha[i]){
				return 0;
			}
		}

		return 1;

	}



	int Fasta_vector::check_length_conservation(){

		unsigned int length = sequences[0].length();
		for(unsigned int i=1;i<sequences.size();++i){

			if(sequences[i].length()!=length){
				cerr << "Error: Not all of the sequences are the same length, hence not aligned properly" << endl;
				return(-1);
			}
		}
		return(0);
	}

	int Fasta_map::check_length_conservation(){
		map<string, string>::iterator it;

		unsigned int length = sequences.begin()->second.length();
		for(it=sequences.begin();it!=sequences.end();++it){

			if(it->second.length()!=length){
				cerr << "Error: Not all of the sequences are the same length, hence not aligned properly" << endl;
				return(-1);
			}
		}
		return(0);
	}


	void Fasta_map::print_to_fasta(const char *filename){

		ofstream file1(filename, ios::app);

		/*				file1 << "\n\t\t*******************************************************************************************\n\t\t";
						file1 << "* CAPS: Co-Evolution Analysis using Protein Sequences                                     *\n\t\t";
						file1 <<  "* Author: Brian E. Caffrey                                                                 *\n\t\t";
						file1 <<  "* Code for Inter-protein co-evolution clustering: David McNally                                  *\n\t\t";
						file1 <<  "* Evolutionary Genetics and Bioinformatics Laboratory                                    *\n\t\t";
						file1 <<  "* Department of Genetics                                                                 *\n\t\t";
						file1 <<  "* Smurfit Institute of Genetics                                                                  *\n\t\t";
						file1 <<  "* University of Dublin, Trinity College                                                          *\n\t\t";
						file1 <<  "* Mathematical Model: Fares and Travers, Genetics (2006)173: 9 - 23                      *\n\t\t";
						file1 <<  "* Conversion to C and addition of multiple allignment functionality: Brian Caffrey (2010) *\n\t\t";
						file1 <<  "*******************************************************************************************\n";
		 */			file1 << "\nAmino acid sequences alignment for " << filename << endl;
		file1 << "-----------------------------------------------------------------" << endl;	

		map<string, string>::iterator it;

		for(it=sequences.begin();it!=sequences.end();++it){
			file1 << ">" << it->first << endl;
			file1 << it->second << endl;
		}


		file1.close();

	}




	int Fasta_map::get_number_of_sequences(){

		return sequences.size();

	}

	void Read_Fasta_vector(const char *fasta_filename, Fasta_vector& fasta){
		/* things to note: file lines shouldn't be longer than 100000 characters 
		   Files should have the regular Fasta format*/

		ifstream file1(fasta_filename); // read the given file

		if(file1.is_open()!=1){
			cerr << "Error: Couldn't open file " << fasta_filename << endl;
			exit(-1);
		}

		char temp[100000];
		string sequence;
		int first=0;
		fasta.filename= fasta_filename;


		file1.getline(temp, 100000);
		if(temp[0]!='>')
			fasta.Tags.push_back(temp);
		while(temp[0]!='>'){

			file1.getline(temp, 100000);
			if(temp[0]!='>')
				fasta.Tags.push_back(temp);

		}
		//temp.erase(0,1);
		fasta.names.push_back(temp);/* push back the first name*/
		//	fasta.names[0].erase(0,1);

		//		fasta.names[0].erase(0,1);

		/*		while(fasta.names[0][0]==' '){
				fasta.names[0].erase(0, 1);
				}
		 */

		int d=1;

		first=1;
		while(file1.getline(temp, 100000)){

			if(temp[0]=='>'){
				//vector<string>::iterator it;
				//fasta.names.find(temp);
				for(unsigned int j=0;j<fasta.names.size();++j){
					if(strcmp(temp, fasta.names[j].c_str())==0){
						char tem[10];
						sprintf(tem, "_%d", d);
						strcat(temp, tem);
						++d;
						break;
					}
				}


				fasta.names.push_back(temp);
				//	fasta.names[fasta.names.size()-1].erase(0,1);

				while(fasta.names[fasta.names.size()-1][0]==' '){
					fasta.names[fasta.names.size()-1].erase(0, 1);
				}

				//	if(first!=0){
				fasta.sequences.push_back(sequence);
				sequence.clear();
				//	}
				//	first=1;
			}else{
				sequence += temp;
			}


		}

		for(unsigned int i=0;i<fasta.names.size();++i){
			if(fasta.names[i][0]=='>'){
				fasta.names[i].erase(0, 1);
			}
			for(int j=0;j<fasta.names[i].size();++j){
				if(fasta.names[i][j]=='\r'){
					fasta.names[i].erase(fasta.names[i].begin()+j);
				}
			}
		}

		fasta.sequences.push_back(sequence);
		for(unsigned int i=0;i<fasta.sequences.size();++i){
			for(int j=0;j<fasta.sequences[i].size();++j){
				if(fasta.sequences[i][j]=='\r'){
					fasta.sequences[i].erase(fasta.sequences[i].begin()+j);
				}
			}
	}

		file1.close();


	}


	void Read_Fasta_map(const char *fasta_filename, Fasta_map& fasta){
		/* things to note: file lines shouldn't be longer than 100000 characters 
		   Fasta files can have junk at the start, it will be written to a vector<string> called
		   tags
		   Files should have the regular Fasta format*/

		ifstream file1(fasta_filename); // read the given file

		if(file1.is_open()!=1){
			cerr << "Error: Couldn't open file " << fasta_filename << endl;
			exit(-1);

		}
		fasta.filename= fasta_filename;
		char temp[100000];
		string sequence, name;
		int first=0;

		file1.getline(temp, 100000);
		if(temp[0]!='>'){
			fasta.Tags.push_back(temp);
		}


		while(temp[0]!='>'){

			file1.getline(temp, 100000);
			if(temp[0]!='>'){
				fasta.Tags.push_back(temp);
				fasta.reference = temp;
			}
		}
		name = temp;
		name.erase(0,1);
		fasta.reference = name;
		while(name[0]==' '){
			name.erase(0, 1);
		}

		while(file1.getline(temp, 100000)){
			if(temp[0]=='>'){


				//		if(first!=0){
				fasta.sequences[name] = sequence;
				//fasta.sequences[name] = sequence;
				sequence.clear();
				//		}

				name = temp;
				name.erase(0, 1);
				while(name[0]==' '){
					name.erase(0, 1);
				}
				first=1;
			}else{
				sequence += temp;
			}


		}
		fasta.sequences[name] = sequence;

		file1.close();


	}

