/* log: v33 program didnt output filter sites unless -printInfo was added, noticed by @cursorially. 1may 2020 ngsAdmix32: program now works on osx icpc ngsAdmix32.cpp -lz -lpthread -O3 -o ngsAdmix32 ngsAdmix31: reads gz frequencies icpc ngsAdmix31.cpp -lz -lpthread -O3 -o ngsAdmix31 ngsAdmix30: prints information of the filtered sites icpc ngsAdmix30.cpp -lz -lpthread -O3 -o ngsAdmix30 ngsAdmix29: more speed. DO_MIS fucked icpc ngsAdmix29.cpp -lz -lpthread -O3 -o ngsAdmix29 ngsAdmix26: more speed. No longer estimate likelihoods icpc ngsAdmix26.cpp -lz -lpthread -O3 -o ngsAdmix26domis -DDO_MIS ngsAdmix25: small speed up (expG1 = expG/fstar, expG2 = (2-expG) / ( 1 - fstar)) icpc ngsAdmix25.cpp -lz -lpthread -O3 -o ngsAdmix25 icpc ngsAdmix25.cpp -lz -lpthread -O3 -o ngsAdmix25domis -DDO_MIS ngsAdmix24: clean up. no more genotype calling. CHECK must now be compiled. DO_MIS fixed, nMiss ->minInd icpc ngsAdmix24.cpp -lz -lpthread -O3 -o ngsAdmix24 ngsAdmix23: prints gz Q matrix. dynamic bounderies icpc ngsAdmix23.cpp -lz -lpthread -O3 -o ngsAdmix23 ngsAdmix22: prints gz Q matrix icpc ngsAdmix22and2.cpp -lz -lpthread -O3 -o ngsAdmix22and2 ngsAdmix21: If missing info -maxLike sets GLikes to 0.33 same for -hweLike but probably never used icpc ngsAdmix21.cpp -lz -lpthread -O3 -o ngsAdmix21 ngsAdmix20: minLRT was compared to likelihood - now fixed icpc ngsAdmix20.cpp -lz -lpthread -O3 -o ngsAdmix20 version 19 cutoff for HWE fixed - did the same as HWE without cutoff before. icpc ngsAdmix19.cpp -lz -lpthread -O3 -o ngsAdmix19 still a problem with -DDO_MIS giving -nan likelihoods - not fixed version 18 line er soed cutoff for genotype calling -cut (only works when compiled with -DDO_MIS) call genotypes (maxLike, hweLike) and update freq estimate before filtering - because this is what people will do icpc ngsAdmix18.cpp -lz -lpthread -O3 -o ngsAdmix18 icpc ngsAdmix18.cpp -lz -lpthread -DDO_MIS -O3 -o ngsAdmix18mis //version 17 add option to change the difference tolerance for calling missing //version 16: fox chix simple missing implemented 1) skip missing genotypes icpc ngsAdmix16.cpp -lz -lpthread -DDO_MIS -O3 2) use missing genotypes icpc ngsAdmix16.cpp -lz -lpthread -O3 //not major allert. if hwe genotyep calling. alleles are swiched. You cannot trust frequency output //version 15: Thorfinn fox chix added minmaf filter, added hwe genotype calling added lrt filter added char** keeps indicating if a site/sample has data added nMis which will remove sites with miss>nMissallowed //version 14: anders sux cux after the alpha guess the values are mapped to the domain program for estimating admixture from beagle files. beagle files contains posterior in normal space #introducing squareem icpc -O3 -o ngsAdmix ngsAdmix14.cpp -lz -lpthread ngsAdmix13: Use random seed. can set seed in options */ //optimazation parameteres for maf estimation #define MAF_START 0.3 #define MAF_ITER 20 #include #include #include #include #include #include #include #include #include #include #include #include //This is taken from here: //http://blog.albertarmea.com/post/47089939939/using-pthread-barrier-on-mac-os-x #ifdef __APPLE__ #ifndef PTHREAD_BARRIER_H_ #define PTHREAD_BARRIER_H_ #include #include typedef int pthread_barrierattr_t; typedef struct { pthread_mutex_t mutex; pthread_cond_t cond; int count; int tripCount; } pthread_barrier_t; int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count) { if(count == 0) { errno = EINVAL; return -1; } if(pthread_mutex_init(&barrier->mutex, 0) < 0) { return -1; } if(pthread_cond_init(&barrier->cond, 0) < 0) { pthread_mutex_destroy(&barrier->mutex); return -1; } barrier->tripCount = count; barrier->count = 0; return 0; } int pthread_barrier_destroy(pthread_barrier_t *barrier) { pthread_cond_destroy(&barrier->cond); pthread_mutex_destroy(&barrier->mutex); return 0; } int pthread_barrier_wait(pthread_barrier_t *barrier) { pthread_mutex_lock(&barrier->mutex); ++(barrier->count); if(barrier->count >= barrier->tripCount) { barrier->count = 0; pthread_cond_broadcast(&barrier->cond); pthread_mutex_unlock(&barrier->mutex); return 1; } else { pthread_cond_wait(&barrier->cond, &(barrier->mutex)); pthread_mutex_unlock(&barrier->mutex); return 0; } } #endif // PTHREAD_BARRIER_H_ #endif // __APPLE__ //global stuff below, this is very beautifull char **keeps=NULL; #define LENS 100000 //this is the max number of bytes perline, should make bigger int SIG_COND =1;//if we catch signal then quit program nicely double tol=1e-5; //stopping criteria double errTolMin=1e-9; double errTolStart=0.1; double errTol=errTolStart;//frequencies and admixture coef cannot be less than this or more than 1-this double misTol=0.05; //this struct contains nescerray information needed for threading typedef struct{ int ID; int threadNumber; int nThreads; double **F_1; double **Q_1; double **F; double **Q; int start; int stop; int startI; int stopI; double ***prodA; double ***prodB; double nPop; double **genos; int nInd; int nSites; double lres;//this is the likelihood for a block of data. total likelihood is sum of lres. }pars; //to make life simple we make stuff relating to the threading global pthread_t *threads = NULL; pars * myPars= NULL; //den ovenfor definerede struct type double likeFixedMinor(double p,double *likes,int numInds,char *keepInd){ // should these actually be normed genotype likelihoods? Or not normed? // only used for checking minLrt // returns minus the log of the likelihood double totalLike=0; for(int i=0;i1-accu2)) break; temp_p=p; } if(std::isnan(p)){ fprintf(stderr,"[%s] caught nan will not exit\n",__FUNCTION__); fprintf(stderr,"logLike (3*nInd). nInd=%d\n",numInds); //print_array(stderr,loglike,3*numInds); fprintf(stderr,"keepList (nInd)\n"); //print_array(stderr,keep,numInds); fprintf(stderr,"used logLike (3*length(keep))=%d\n",keepInd); for(int ii=0;0&&ii::quiet_NaN(); // expG2[j][i]= std::numeric_limits::quiet_NaN(); // } // else{ #endif double fpart=0; for(int k=0;k1-errTol) F[s][k] =1-errTol; } } void map2domainQ(double** Q, int nInd, int K){ for(int i=0;i(1-errTol)) Q[i][k] =1-errTol; sum+=Q[i][k]; } for(int k=0;k dumpedFiles; FILE *openFile(const char* a,const char* b){ if(0) fprintf(stderr,"[%s] %s %s",__FUNCTION__,a,b); char *c = new char[strlen(a)+strlen(b)+1]; strcpy(c,a); strcat(c,b); //fprintf(stderr,"\t-> Dumping file: %s\n",c); if(0&&fexists(c)){//ANDERS DAEMON DRAGON HATES THIS fprintf(stderr,"File: %s exists will exist\n",c); fflush(stderr); exit(0); } dumpedFiles.push_back(strdup(c)); FILE *fp = fopen(c,"w"); delete [] c; return fp; } gzFile openFileGz(const char* a,const char* b){ if(0) fprintf(stderr,"[%s] %s %s",__FUNCTION__,a,b); char *c = new char[strlen(a)+strlen(b)+1]; strcpy(c,a); strcat(c,b); // fprintf(stderr,"\t-> Dumping file: %s\n",c); if(0&&fexists(c)){//ANDERS DAEMON DRAGON HATES THIS fprintf(stderr,"File: %s exists will exist\n",c); fflush(stderr); exit(0); } dumpedFiles.push_back(strdup(c)); gzFile fp = gzopen(c,"w"); delete [] c; return fp; } //some struct will all the data from the beagle file typedef struct{ double **genos; char *major; char *minor; char **ids; int nSites; int nInd; char **keeps; //matrix containing 0/1 indicating if data or missing int *keepInd; //keepInd[nSites] this is the number if informative samples float *mafs; }bgl; //utility function for cleaning up out datastruct void dalloc(bgl &b){ for(int i=0;i tmp; while(gzgets(fp,buf,LENS)) tmp.push_back(strdup(buf)); //now we now the number of sites ret.nSites=tmp.size(); ret.major= new char[ret.nSites]; ret.minor= new char[ret.nSites]; ret.ids = new char*[ret.nSites]; ret.genos= new double*[ret.nSites]; //then loop over the vector and parsing every line for(int s=0;SIG_COND&& (s0)){ fprintf(stderr,"The sum of likelihoods for a genotypes must be positive\n"); fprintf(stderr,"individual %d site %d has sum %f\n",i,s,tmpS); exit(0); } } free(tmp[s]); } // Here additional stuff is calculated from the likelihoods // this must be done again while filtering later in main ret.keeps=new char*[ret.nSites]; // array nSites x nInd 0 if missing info ret.keepInd = new int[ret.nSites]; ret.mafs = new float[ret.nSites]; for(int s=0;s1 || Q[i][k]<0){ fprintf(stderr,"[%s] error Q %f\n",function,Q[i][k]); exit(0); } } if(sum>1+errTol || sum < 1- errTol){ fprintf(stderr,"[%s] error sumQ %f\n",function,sum); exit(0); } } for(int j = 0; j < nSites; j++) { for(int k = 0; k < K; k++) { if(F[j][k]<0 || F[j][k] > 1-0){ fprintf(stderr,"[%s] error freq %f\n",function,F[j][k]); exit(0); } } } } int printer =0; //threaded double likelihood(double** Q, double** F,int nSites, int nInd,int K,double **genos){ // F is sites times npop // Q is nInd times npop double prod_sit = 0.0; for(int i = 0; i < nInd; i++) { double prod_ind = 0.0; for(int j = 0; j < nSites; j++) { double *gg = genos[j]+i*3; double freq = 0.0; for(int k = 0; k < K; k++) freq += (F[j][k])*Q[i][k]; double f =1- freq; double sum = gg[0] * f * f; sum += gg[1]*2*f*(1-f); sum += gg[2]*(1-f)*(1-f); prod_ind += log(sum); } prod_sit += prod_ind; } #ifdef CHECK checkFQ(F,Q,nSites,nInd,K,__FUNCTION__); #endif return prod_sit; } // log likelihood for single site // fills it directly into the structure void likelihood_thd(double** Q, double** F,int nSites,int startI, int stopI,int K,double **genos,double &prod_sit){ // F is sites times npop // Q is nInd times npop prod_sit =0; for(int i = startI; i < stopI; i++) { double prod_ind = 0.0; for(int j = 0; j < nSites; j++) { double *gg = genos[j]+i*3; double freq = 0.0; for(int k = 0; k < K; k++) freq += (F[j][k])*Q[i][k]; double f =1- freq; double sum = gg[0] * f * f; sum += gg[1]*2*f*(1-f); sum += gg[2]*(1-f)*(1-f); prod_ind += log(sum); //collect all site for a single individual } prod_sit += prod_ind; } //checkFQ(F,Q,nSites,nInd,K,__FUNCTION__); } void *lkWrap(void *a){ pars *p = (pars *)a; likelihood_thd(p->Q,p->F,p->nSites,p->startI,p->stopI,p->nPop,p->genos,p->lres); return NULL; } // total log likelihood double like_tsk(double **Q,double **F,int nThreads){ for(int i=0;i1-errTol) F_1[s][k] =1-errTol; } for(int j = startI; j < stopI; j++) for(int k = 0; k < K; k++) { if(Q_1[j][k](1-errTol)) Q_1[j][k] =1-errTol; } } } void *emWrap(void *a){ pars *p = (pars *)a; emSQ_threads(p->Q,p->F,p->start,p->stop,p->nInd,p->nPop,p->genos,p->F_1,p->Q_1,p->nSites,p->prodA,p->prodB,p->startI,p->stopI,p->threadNumber,p->nThreads); return NULL; } void em_threadStart(double **Q,double **Q_new,double **F,double **F_new,int nThreads){ for(int t=0;t(1-errTol)) Q_new[i][k] =1-errTol; sum+=Q_new[i][k]; } for(int k=0;k 0.01){ #ifdef CHECK // fprintf(stderr,"em Q2 F2 \n"); #endif em(*Q_new, *F_new, d.nSites, d.nInd, nPop,d.genos,F_tmp,Q_tmp); #ifdef CHECK checkFQ(F_tmp,Q_tmp,d.nSites,d.nInd,nPop,"bad em\n"); #endif // double ** tmp; // Q_new = T_tmp // *Q_new = Q_tmp; // *F_new = F_tmp; std::swap(*Q_new,Q_tmp); std::swap(*F_new,F_tmp); } double lnew =0; #ifdef DO_MIS lnew = -likelihood(*Q_new, *F_new, d.nSites, d.nInd, nPop,d.genos); if ( (lnew > lold + objfnInc)) { fprintf(stderr,"bad guess in %s\n", __FUNCTION__); // *Q_new = Q_em2; // *F_new =F_em2; std::swap(*Q_new,Q_em2); std::swap(*F_new,F_em2); lnew = -likelihood(*Q_new, *F_new, d.nSites, d.nInd, nPop,d.genos); if (alpha == stepMax) stepMax = std::max(stepMax0, stepMax/mstep); } #endif if (alpha == stepMax) { stepMax = mstep*stepMax; } #ifdef CHECK checkFQ(*F_new,*Q_new,d.nSites,d.nInd,nPop,"bad F new\n"); #endif lold=lnew; return 1; } //returnval =1 continue, returnval =0, convergence has been achieved int emAccelThread(const bgl &d,int nPop,double **F,double **Q,double ***F_new,double ***Q_new,double &lold,int nThreads){ //maybe these should be usersettable? double stepMin =1; double stepMax0 = 1; static double stepMax=stepMax0; double mstep=4; double objfnInc=1; //we make these huge structures static such that we just allocate them the first time static double **F_em1 =NULL; static double **Q_em1 =NULL; static double **F_diff1 =NULL; static double **Q_diff1 =NULL; static double **F_em2 =NULL; static double **Q_em2 =NULL; static double **F_diff2 =NULL; static double **Q_diff2 =NULL; static double **F_diff3 =NULL; static double **Q_diff3 =NULL; static double **F_tmp =NULL; static double **Q_tmp =NULL; if(F_em1==NULL){ F_em1 =allocDouble(d.nSites,nPop); Q_em1 =allocDouble(d.nInd,nPop); F_diff1 =allocDouble(d.nSites,nPop); Q_diff1 =allocDouble(d.nInd,nPop); F_em2 =allocDouble(d.nSites,nPop); Q_em2 =allocDouble(d.nInd,nPop); F_diff2 =allocDouble(d.nSites,nPop); Q_diff2 =allocDouble(d.nInd,nPop); F_diff3 =allocDouble(d.nSites,nPop); Q_diff3 =allocDouble(d.nInd,nPop); F_tmp =allocDouble(d.nSites,nPop); Q_tmp =allocDouble(d.nInd,nPop); } if(F==NULL){ dalloc(F_em1,d.nSites); dalloc(F_em2,d.nSites); dalloc(F_diff1,d.nSites); dalloc(F_diff2,d.nSites); dalloc(F_diff3,d.nSites); dalloc(F_tmp,d.nSites); dalloc(Q_em1,d.nInd); dalloc(Q_em2,d.nInd); dalloc(Q_diff1,d.nInd); dalloc(Q_diff2,d.nInd); dalloc(Q_diff3,d.nInd); dalloc(Q_tmp,d.nInd); return 0; } em_threadStart(Q,Q_em1,F,F_em1,nThreads); minus(F_em1,F,d.nSites,nPop,F_diff1); minus(Q_em1,Q,d.nInd,nPop,Q_diff1 ); double sr2 = sumSquare(F_diff1,d.nSites,nPop)+sumSquare(Q_diff1,d.nInd,nPop); if(sqrt(sr2)1-1e-8){ // fprintf(stderr,"Fnew: %f F: %f Fem1: %f Fem2: %f alpha: %f diff1: %f diff3: %f\n",(*F_new)[i][j],F[i][j],F_em1[i][j],F_em2[i][j],alpha,F_diff1[i][j],F_diff3[i][j]); //(*F_new)[i][j]=F_em2[i][j]; //} // } map2domainF(*F_new,0,d.nSites,nPop); for(size_t i=0;i1-1e-8) // fprintf(stderr,"(*Q_new)[i][j]: %f\n",(*Q_new)[i][j]); } } map2domainQ(*Q_new,d.nInd,nPop); //fprintf(stderr,"lik %f\n",-likelihood(*Q_new, *F_new, d.nSites, d.nInd, nPop,d.genos)); // checkFQ(*F_new,*Q_new,d.nSites,d.nInd,nPop,__FUNCTION__); if (fabs(alpha - 1) > 0.01){ em_threadStart(*Q_new,Q_tmp,*F_new,F_tmp,nThreads); //*Q_new= Q_tmp; //*F_new = F_tmp; std::swap(*Q_new,Q_tmp); std::swap(*F_new,F_tmp); } // double lnew = -likelihood(Q_new, F_new, d.nSites, d.nInd, nPop,d.genos); double lnew = 1; #ifdef DO_MIS lnew = -like_tsk(*Q_new, *F_new,nThreads); if ( (lnew > lold + objfnInc)) { //*Q_new= Q_em2; // *F_new = F_em2; fprintf(stderr,"bad guess in %s\n", __FUNCTION__); std::swap(*Q_new,Q_em2); std::swap(*F_new,F_em2); // lnew = -likelihood(Q_new, F_new, d.nSites, d.nInd, nPop,d.genos); lnew = -like_tsk(*Q_new, *F_new, nThreads); if ((alpha - stepMax) > -0.001) stepMax = std::max(stepMax0, stepMax/mstep); } #endif if ((alpha - stepMax) > -0.001) { stepMax = mstep*stepMax; } //fprintf(stderr,"alpha %f stepMax %f\n",alpha,stepMax); lold=lnew; return 1; } void info(){ fprintf(stderr,"Arguments:\n"); fprintf(stderr,"\t-likes Beagle likelihood filename\n"); fprintf(stderr,"\t-K Number of ancestral populations\n"); fprintf(stderr,"Optional:\n"); fprintf(stderr,"\t-fname Ancestral population frequencies\n"); fprintf(stderr,"\t-qname Admixture proportions\n"); fprintf(stderr,"\t-outfiles Prefix for output files\n"); fprintf(stderr,"\t-printInfo print ID and mean maf for the SNPs that were analysed,\n\t along with sites retained for analysiss\n"); fprintf(stderr,"Setup:\n"); fprintf(stderr,"\t-seed Seed for initial guess in EM\n"); fprintf(stderr,"\t-P Number of threads\n"); fprintf(stderr,"\t-method If 0 no acceleration of EM algorithm\n"); fprintf(stderr,"\t-misTol Tolerance for considering site as missing\n"); fprintf(stderr,"Stop chriteria:\n"); fprintf(stderr,"\t-tolLike50 Loglikelihood difference in 50 iterations\n"); fprintf(stderr,"\t-tol Tolerance for convergence\n"); fprintf(stderr,"\t-dymBound Use dymamic boundaries (1: yes (default) 0: no)\n"); fprintf(stderr,"\t-maxiter Maximum number of EM iterations\n"); fprintf(stderr,"Filtering\n"); fprintf(stderr,"\t-minMaf Minimum minor allele frequency\n"); fprintf(stderr,"\t-minLrt Minimum likelihood ratio value for maf>0\n"); fprintf(stderr,"\t-minInd Minumum number of informative individuals\n"); exit(0); } int VERBOSE =1; void handler(int s) { if(VERBOSE) fprintf(stderr,"Caught SIGNAL: %d will try to exit nicely (no more threads are created, we will wait for the current threads to finish)\n",s); VERBOSE=0; SIG_COND=0; } float calcThres(double **d1,double **d2, int x,int y){ // finds the largest difference between 2 arrays // arrays has dimention x times y float diff=0; for(int i=0;idiff) diff=fabs(d1[i][j]-d2[i][j]); } return diff; } int whichMax(double *g){ // equality signs such that when some are equal // the lowest genotype is preferred if(g[0]>=g[1]&&g[0]>=g[2]) return 0; if(g[1]>g[0]&&g[1]>=g[2]) return 1; else return 2; } void printLikes(bgl &d){ // to write likelihoods for debugging for(int s=0;sminMaf&&d.mafs[s]<1-minMaf){ d.genos[posi] = d.genos[s]; d.major[posi] = d.major[s]; d.minor[posi] = d.minor[s]; d.ids[posi] = d.ids[s]; d.keeps[posi] = d.keeps[s]; d.keepInd[posi] = d.keepInd[s]; d.mafs[posi] = d.mafs[s]; posi++; }else{ // fprintf(stderr,"skippping\n"); } } d.nSites=posi; } //void modLikesMiss(bgl &d,int minInd){ void filterMiss(bgl &d,int minInd){ // fprintf(stderr,"WARNING filtering mis=%d \n",minInd); int posi =0; for(int s=0;sminInd){ d.genos[posi] = d.genos[s]; d.major[posi] = d.major[s]; d.minor[posi] = d.minor[s]; d.ids[posi] = d.ids[s]; d.keeps[posi] = d.keeps[s]; d.keepInd[posi] = d.keepInd[s]; d.mafs[posi] = d.mafs[s]; posi++; }else{ // fprintf(stderr,"skippping\n"); } } d.nSites=posi; } //void modLikesMinLrt(bgl &d,float minLrt){ void filterMinLrt(bgl &d,float minLrt){ // fprintf(stderr,"WARNING filtering minlrt=%f \n",minLrt); int posi =0; for(int s=0;sminLrt){ d.genos[posi] = d.genos[s]; d.major[posi] = d.major[s]; d.minor[posi] = d.minor[s]; d.ids[posi] = d.ids[s]; d.keeps[posi] = d.keeps[s]; d.keepInd[posi] = d.keepInd[s]; d.mafs[posi] = d.mafs[s]; posi++; }else{ // fprintf(stderr,"skippping\n"); } } d.nSites=posi; } int main(int argc, char **argv){ if(argc==1){// if no arguments, print info on program info(); return 0; } //below for catching ctrl+c, and dumping files struct sigaction sa; sigemptyset (&sa.sa_mask); sa.sa_flags = 0; sa.sa_handler = handler; sigaction(SIGPIPE, &sa, 0); sigaction(SIGINT, &sa, 0); //initial values int dymBound = 0; int maxIter = 2000; int method = 1; int minInd = 0; int printInfo = 0; float minMaf =0.05; float minLrt =0; const char* lname = NULL; const char* fname = NULL; const char* qname = NULL; const char* outfiles = NULL; int nPop = 3; int seed =time(NULL); int nThreads = 1; float tolLike50=0.1; // reading arguments argv++; while(*argv){ if(strcmp(*argv,"-likes")==0 || strcmp(*argv,"-l")==0) lname=*++argv; else if(strcmp(*argv,"-K")==0) nPop=atoi(*++argv); // to read start values from output from previous run else if(strcmp(*argv,"-fname")==0 || strcmp(*argv,"-f")==0) fname=*++argv; else if(strcmp(*argv,"-qname")==0 || strcmp(*argv,"-q")==0) qname=*++argv; // prefix for output files else if(strcmp(*argv,"-outfiles")==0 || strcmp(*argv,"-o")==0) outfiles=*++argv; // settings: seed, threads and if method==0 not accelerated else if(strcmp(*argv,"-seed")==0||strcmp(*argv,"-s")==0) seed=atoi(*++argv); else if(strcmp(*argv,"-P")==0) nThreads=atoi(*++argv); else if(strcmp(*argv,"-printInfo")==0) printInfo=atoi(*++argv); else if(strcmp(*argv,"-method")==0 || strcmp(*argv,"-m")==0) method=atoi(*++argv); // different stop chriteria else if(strcmp(*argv,"-tolLike50")==0||strcmp(*argv,"-lt50")==0) tolLike50=atof(*++argv); else if(strcmp(*argv,"-tol")==0||strcmp(*argv,"-t")==0) tol=atof(*++argv); else if(strcmp(*argv,"-maxiter")==0 || strcmp(*argv,"-i")==0) maxIter=atoi(*++argv); // different filterings else if(strcmp(*argv,"-misTol")==0 || strcmp(*argv,"-mt")==0) misTol=atof(*++argv); else if(strcmp(*argv,"-minMaf")==0||strcmp(*argv,"-maf")==0) minMaf=atof(*++argv); else if(strcmp(*argv,"-minLrt")==0||strcmp(*argv,"-lrt")==0) minLrt=atof(*++argv); else if(strcmp(*argv,"-minInd")==0||strcmp(*argv,"-mis")==0) minInd=atoi(*++argv); // different genotype callers else if(strcmp(*argv,"-dymBound")==0) dymBound=atoi(*++argv); else{ fprintf(stderr,"Unknown arg:%s\n",*argv); info(); return 0; } ++argv; } if(lname==NULL){ fprintf(stderr,"Please supply beagle file: -likes"); info(); } if(outfiles==NULL){ fprintf(stderr,"Will use beagle fname as prefix for output\n"); outfiles=lname; } FILE *flog=openFile(outfiles,".log"); FILE *ffilter=NULL; if(printInfo) ffilter = openFile(outfiles,".filter"); fprintf(stderr,"Input: lname=%s nPop=%d, fname=%s qname=%s outfiles=%s\n",lname,nPop,fname,qname,outfiles); fprintf(stderr,"Setup: seed=%d nThreads=%d method=%d\n",seed,nThreads,method); fprintf(stderr,"Convergence: maxIter=%d tol=%f tolLike50=%f dymBound=%d\n",maxIter,tol,tolLike50,dymBound); fprintf(stderr,"Filters: misTol=%f minMaf=%f minLrt=%f minInd=%d\n",misTol,minMaf,minLrt,minInd); fprintf(flog,"Input: lname=%s nPop=%d, fname=%s qname=%s outfiles=%s\n",lname,nPop,fname,qname,outfiles); fprintf(flog,"Setup: seed=%d nThreads=%d method=%d\n",seed,nThreads,method); fprintf(flog,"Convergence: maxIter=%d tol=%f tolLike50=%f dymBound=%d\n",maxIter,tol,tolLike50,dymBound); fprintf(flog,"Filters: misTol=%f minMaf=%f minLrt=%f minInd=%d\n",misTol,minMaf,minLrt,minInd); if(dymBound==0){ errTolStart = errTolMin; errTol = errTolMin; } clock_t t=clock();//how long time does the run take time_t t2=time(NULL); bgl d=readBeagle(lname); fprintf(stderr,"Input file has dim: nsites=%d nind=%d\n",d.nSites,d.nInd); fprintf(flog,"Input file has dim: nsites=%d nind=%d\n",d.nSites,d.nInd); // filter sites if(minMaf!=0.0) filterMinMaf(d,minMaf); if(minLrt!=0.0) filterMinLrt(d,minLrt); if(minInd!=0) filterMiss(d,minInd); if(printInfo) printKeepSites(d,ffilter); #ifdef DO_MIS keeps = d.keeps; #endif // printLikes(d); fprintf(stderr,"Input file has dim (AFTER filtering): nsites=%d nind=%d\n",d.nSites,d.nInd); fprintf(flog,"Input file has dim (AFTER filtering): nsites=%d nind=%d\n",d.nSites,d.nInd); fflush(stderr); //set seed srand(seed); //unknown parameters double **F =allocDouble(d.nSites,nPop); double **Q =allocDouble(d.nInd,nPop); double **F_new =allocDouble(d.nSites,nPop); double **Q_new =allocDouble(d.nInd,nPop); //get start values if(fname==NULL){ for(int j=0;jerrTolMin){ errTol=errTol/5; if(errTolerrTolMin){ errTol=errTol/5; if(errTolerrTolMin){ errTol=errTol/10; if(errTol1) emAccelThread(d,0,NULL,NULL,NULL,NULL,nop,0); dalloc(F_new,d.nSites); dalloc(Q_new,d.nInd); dalloc(d); delete [] threads; delete [] myPars; if(nThreads==1){ em(NULL, NULL, 0, 0, 0,NULL,NULL,NULL); } for(int j = 0; j < d.nSites; j++) delete[] F[j]; delete[] F; for(int i = 0; i < d.nInd; i++) delete [] Q[i]; delete[] Q; for(int i=0;1&&i Dumpedfiles are: %s\n",dumpedFiles[i]); free(dumpedFiles[i]); } fprintf(stderr, "\t[ALL done] cpu-time used = %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(stderr, "\t[ALL done] walltime used = %.2f sec\n", (float)(time(NULL) - t2)); // print to log file if(flog){ fprintf(flog, "\t[ALL done] cpu-time used = %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); fprintf(flog, "\t[ALL done] walltime used = %.2f sec\n", (float)(time(NULL) - t2)); fprintf(flog,"best like=%f after %d iterations\n",lold,nit); fclose(flog); } if(ffilter) fclose(ffilter); return 0; }