using ArgParse, Distributed function parse_cli(args) settings = ArgParseSettings("ABC-MK CLI") @add_arg_table! settings begin # CMD 1 "rates", "R" help = "Function to solve fixation and polymorphic rates analitically. The function will create N random models from prior values. Use the arguments to defined the input range for each parameter.\nIf rho and/or theta are set to nothing, the function will input random values given the range 0.0005:0.0005:0.01. Otherwise you can fix the values.\nIf gL is set to nothing, the function will not account the role of the weakly selected alleles in the estimation.\nThe function returns a HDF5 file containing models solved and rates. The rates will be used to compute summary statistics required at ABC.\nPlease check the documentation to get more info about models parameters or detailed arguments description https://jmurga.github.io/Analytical.jl/dev/cli/ to check model " action = :command # CMD 2 "parse_data", "P" help = "Function to parse polymorphic and divergence data from Uricchio et. al (2019) and Murga-Moreno et al (2019). Please input a path to create a new analysis folder. You can filter the dataset using a file containing a list of Ensembl IDs. The function returns files containing raw polymorphic and divergence data, parsed SFS and parsed divegence required to estimate summary statistics. Please check the documentation to get more info https://jmurga.github.io/Analytical.jl/dev/cli/" action = :command # CMD 3 "summaries", "S" help = "Function to parse polymorphic and divergence data from Uricchio et. al (2019) and Murga-Moreno et al (2019). Please input a path to create a new analysis folder. You can filter the dataset using a file containing a list of Ensembl IDs. The function returns files containing raw polymorphic and divergence data, parsed SFS and parsed divegence required to estimate summary statistics. Please check the documentation to get more info https://jmurga.github.io/Analytical.jl/dev/cli/" action = :command # CMD 4 "inference", "I" help = "ABCreg inference. The function returns posterior distributions from ABC inference. Each posterior file contains information about alpha_w, alpha_s, alpha, gamNeg and shape parameter. The number of posterior distributions will depend on the number of bootstrap replicas. Check the documentation to get more info https://jmurga.github.io/Analytical.jl/dev/cli" action = :command end add_arg_table!(settings["rates"], ["--pop_size"], Dict( :help => "Population size", :arg_type => Int64, :default => 1000 ), ["--sample_size"], Dict( :help => "Sample size", :arg_type => Int64, :required => true ), ["--dac"], Dict( :help => "Derived Allele Count", :arg_type => String, :required => true ), ["--gam_neg"], Dict( :help => "Selection coefficient for deleterious alleles", :arg_type => String, :required => true ), ["--positive_strong"], Dict( :help => "Selection coefficient for strongly beneficial alleles", :arg_type => String, :required => true ), ["--positive_weak"], Dict( :help => "Selection coefficient for weakly beneficial alleles", :arg_type => String, :required => true ), ["--shape"], Dict( :help => "Shape value modeling Gamma distribution for deleterious alleles", :arg_type => Float64, :default => 0.184 ), ["--rho"], Dict( :help => "Recombination rate", :arg_type => Float64, :default => 0.001 ), ["--theta"], Dict( :help => "Mutation rate on coding locus", :arg_type => Float64, :default => 0.001 ), ["--solutions"], Dict( :help => "Mutation rate on coding locus", :arg_type => Int64, :default => 100_000 ), ["--output"], Dict( :help => "Output file", :arg_type => String, :default => "rates.jld2" ), ["--scheduler"], Dict( :help => "Select scheduler manager", :arg_type => String, :default => "local" ), ["--nthreads"], Dict( :help => "Select number of threads to parallelize", :arg_type => Int64, :default => 1 ), ) add_arg_table!(settings["parse_data"], "folder", Dict( :help => "a positional argument", :required => true, :arg_type => String ), ["--dataset","-d"], Dict( :help => "a positional argument", :default => "tgp", :arg_type => String ), ["--gene_list","-g"], Dict( :help => "a positional argument", :arg_type => Union{Bool,String}, :default => false ), ["--bins","-b"], Dict( :help => "a positional argument", :arg_type => Union{Bool,Int64}, :default => false ) ) add_arg_table!(settings["summaries"], "folder", Dict( :help => "Folder path containing SFS and divergence files to run the analysis", :required => true, :arg_type => String ), ["--rates"], Dict( :help => "H5 file containing precomputed rates", :required => true, :arg_type => String ), ["--sample_size"], Dict( :help => "Sample size", :required => true, :arg_type => Int64 ), ["--dac"], Dict( :help => "Derived allele count", :required => true, :arg_type => String ), ["--summstat_size"], Dict( :help => "Define number of summary estatistic to perform ABC", :required => true, :arg_type => Int64 ), ["--bootstrap"], Dict( :help => "Allow bootstrap following polyDFE manual", :arg_type => Bool, :default => false ), ["--replicas"], Dict( :help => "Number of bootstrap replicas", :arg_type => Int64, :default => 1 ), ["--nthreads"], Dict( :help => "Select scheduler manager", :arg_type => Int64, :default => 1 ), ["--scheduler"], Dict( :help => "Select scheduler manager", :arg_type => String, :default => "local" ) ) add_arg_table!(settings["inference"], "folder", Dict( :help => "Folder path containing SFS and divergence files to run the analysis", :required => true, :arg_type => String ), ["--S"], Dict( :help => "Define number of summary estatistic to perform ABC", :required => true, :arg_type => Int64 ), ["--tol"], Dict( :help => "Tolerance", :required => true, :arg_type => Float64 ), ["--abcreg"], Dict( :help => "ABCreg static binary", :required => true, :arg_type => String ), ["--nthreads"], Dict( :help => "Select scheduler manager", :arg_type => Int64, :default => 1 ), ["--scheduler"], Dict( :help => "Select scheduler manager", :arg_type => String, :default => "local" ) ) return parse_args(settings) end cli = parse_cli(ARGS) for cmd in keys(cli) if(cmd == "parse_data") @eval using Analytical, DataFrames, CSV folder = cli[cmd]["folder"] run(`mkdir -p $folder`) dataset = lowercase(cli[cmd]["dataset"]) data = folder * "/" * dataset * ".txt" @eval download("https://raw.githubusercontent.com/jmurga/Analytical.jl/master/data/"* $dataset * ".txt",$data) # Check if bins or gene_list are defined gene_list = cli[cmd]["gene_list"] @eval if $gene_list != false @eval gList = CSV.read($gene_list,DataFrame,header=false) |> Array else @eval gList = nothing end bins = cli[cmd]["bins"] @eval if $bins != 0 @eval bins_size = $bins else bins_size = nothing end # Parsing TGP data if dataset == "tgp" @eval α,sfs, divergence = Analytical.parse_sfs(sample_size=661,data=$data,gene_list=$gList,bins=$bins_size) elseif occursin("zi",dataset) @eval α,sfs, divergence = Analytical.parse_sfs(sample_size=154,data=$data,gene_list=$gList,bins=$bins_size,isolines=true) elseif occursin("ral",dataset) @eval α,sfs, divergence = Analytical.parse_sfs(sample_size=160,data=$data,gene_list=$gList,bins=$bins_size,isolines=true) end # Writting data to folder @eval sName = $folder * "/sfs.tsv" @eval dName = $folder * "/div.tsv" @eval CSV.write($sName,DataFrame($sfs,:auto),delim='\t',header=false) @eval CSV.write($dName,DataFrame($divergence',:auto),delim='\t',header=false) elseif (cmd == "rates") neg = parse.(Int,split(cli[cmd]["gam_neg"],":")) strong = parse.(Int,split(cli[cmd]["positive_strong"],":")) if (cli[cmd]["positive_weak"] == false) weak = nothing else weak = parse.(Int,split(cli[cmd]["positive_weak"],":")) end dac = parse.(Int,split(cli[cmd]["dac"],",")) if (cli[cmd]["rho"] == false) rho = nothing else rho = cli[cmd]["rho"] end if (cli[cmd]["theta"] == "nothing") theta = nothing else theta = cli[cmd]["theta"] end scheduler = cli[cmd]["scheduler"];nthreads = cli[cmd]["nthreads"] if scheduler == "slurm" @eval using ClusterManagers @eval addprocs_slurm($nthreads) elseif scheduler == "htcondor" @eval using ClusterManagers @eval addprocs_htc($nthreads) else @eval addprocs($nthreads) end ne = cli[cmd]["pop_size"];samples= cli[cmd]["sample_size"];shape = cli[cmd]["shape"] @eval @everywhere using Analytical, ParallelUtilities @eval adap = Analytical.parameters(N=$ne,n=$samples,dac=$dac,al=$shape) @eval cnv = Analytical.binomial_dict() @eval Analytical.binomOp!($adap,$cnv.bn); solutions = cli[cmd]["solutions"]; output = cli[cmd]["output"] @eval Analytical.rates(param = $adap,convoluted_samples=$cnv,gH=$strong[1]:$strong[2],gL=$weak[1]:$weak[2],gamNeg=$neg[1]:$neg[2],iterations = $solutions,rho=$rho,theta=$theta,shape=$adap.al,output=$output,scheduler=$scheduler); for i in workers() rmprocs(i) end elseif (cmd == "summaries") scheduler = cli[cmd]["scheduler"];nthreads = cli[cmd]["nthreads"]; if scheduler == "slurm" @eval using ClusterManagers @eval addprocs_slurm($nthreads) elseif scheduler == "htcondor" @eval using ClusterManagers @eval addprocs_htc($nthreads) else @eval addprocs($nthreads) end samples = cli[cmd]["sample_size"] dac = parse.(Int,split(cli[cmd]["dac"],",")) bootstrap = cli[cmd]["bootstrap"] replicas = cli[cmd]["replicas"] rates = cli[cmd]["rates"] summstat_size = cli[cmd]["summstat_size"] folder = cli[cmd]["folder"] @eval using JLD2, DataFrames, CSV, ProgressMeter @eval @everywhere using Analytical, ParallelUtilities @eval adap = Analytical.parameters(n=$samples,dac = $dac) @eval if ($bootstrap == true) @eval summstat = Analytical.summary_statistics(param=$adap,h5_file=$rates,analysis_folder=$folder,summstat_size=$summstat_size,replicas=$replicas,bootstrap=true) else @eval summstat = Analytical.summary_statistics(param=$adap,h5_file=$rates,analysis_folder=$folder,summstat_size=$summstat_size,replicas=$replicas,bootstrap=false) end for i in workers() rmprocs(i) end elseif (cmd == "inference") scheduler = cli[cmd]["scheduler"] nthreads = cli[cmd]["nthreads"] folder = cli[cmd]["folder"] S = cli[cmd]["S"] tol = cli[cmd]["tol"] abcreg = cli[cmd]["abcreg"] if scheduler == "slurm" @eval using ClusterManagers @eval addprocs_slurm($nthreads) elseif scheduler == "htcondor" @eval using ClusterManagers @eval addprocs_htc($nthreads) else @eval addprocs($nthreads) end @eval @everywhere using Analytical,ParallelUtilities @eval Analytical.ABCreg(analysis_folder=$folder,S=$S,tol=$tol,abcreg=$abcreg) for i in workers() rmprocs(i) end end end