#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Jan 6 10:56:17 2021 @author: Tobias Andermann (tobiasandermann88@gmail.com) """ import numpy as np import pandas as pd import math import pickle import csv import os, sys from collections import Counter from urllib.request import urlopen from io import StringIO import subprocess import datetime import warnings from scipy.optimize import curve_fit import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages class get_iucn_data(): def __init__(self, target_species_list, outdir, reference_group, reference_rank = None, iucn_key = None, no_online_sync = False, from_file = False): self._reference_group = reference_group self._reference_rank = reference_rank self._target_species_list = target_species_list self._outdir = outdir self._iucn_key = iucn_key self._no_online_sync = no_online_sync self._from_file = from_file self.run() def run(self): # get user input taxon_reference_group = self._reference_group reference_rank = self._reference_rank species_list = self._target_species_list load_from_file = self._from_file outdir = self._outdir iucn_key = self._iucn_key avoid_precompiled_iucn_data = self._no_online_sync # get iucn history of reference group iucn_history_file = get_iucn_history(reference_group=taxon_reference_group, reference_rank=reference_rank, iucn_key=iucn_key, avoid_precompiled_iucn_data=avoid_precompiled_iucn_data, outdir=outdir) self._iucn_history_file = iucn_history_file # get most recent status for each taxon in target species list extant_taxa_current_status = get_most_recent_status_target_species(species_list=species_list, iucn_history_file=iucn_history_file, iucn_key=iucn_key, load_from_file = load_from_file, outdir=outdir) self._species_data = extant_taxa_current_status # get info about possibly extinct taxa possibly_extinct_taxa = get_possibly_extinct_iucn_info(iucn_history_file, outdir=outdir) self._possibly_extinct_taxa = possibly_extinct_taxa class transition_rates(): def __init__(self, species_iucn_status, iucn_history_file, outdir, extinction_probs_mode=0, possibly_extinct_list=[], species_specific_regression=False, rate_samples=100, n_gen=100000, burnin=1000, seed=None, load_from_file=False): self._species_data = species_iucn_status self._iucn_history = iucn_history_file self._outdir = outdir self._extinction_probs_mode = extinction_probs_mode self._possibly_extinct_list = possibly_extinct_list self._species_specific_regression = species_specific_regression self._rate_samples = rate_samples self._n_gen = n_gen self._burnin = burnin self._seed = seed self._load_from_file = load_from_file self.run() def run(self): # get user input___________________________________________________________ iucn_history = self._iucn_history outdir = self._outdir try: extinction_probs_mode = int(self._extinction_probs_mode) except: print('\nInvalid extinction_probs_mode provided. Please choose between the currenlty available options 0 or 1') quit() possibly_extinct_list = self._possibly_extinct_list n_rep = int(self._rate_samples) n_gen = int(self._n_gen) burnin = int(self._burnin) if not os.path.exists(outdir): os.makedirs(outdir) seed = self._seed try: self._random_seed = False seed = int(seed) except: seed = np.random.randint(999999999) self._random_seed = True self._seed = seed np.random.seed(seed) np.savetxt(os.path.join(outdir,'starting_seed_transition_rates.txt'),np.array([seed]),fmt='%i') # get input data (either from file or from memory) if self._load_from_file: species_data_input = pd.read_csv(self._species_data,sep='\t',header=None) else: species_data_input = self._species_data species_data_input = species_data_input.dropna() invalid_status_taxa = species_data_input[~species_data_input.iloc[:,1].isin(['LC','NT','VU','EN','CR','DD','NE'])] if len(invalid_status_taxa)>0: print('\nFound invalid IUCN statuses:',list(invalid_status_taxa[1].values),'\n\nMake sure that the second column of your --species_data input contains the current IUCN status of your target species, which must be one of the following valid extant statuses: LC, NT, VU, EN, CR, DD, NE') # if this effects only a minority of taxa, continue after removing these if len(invalid_status_taxa)/len(species_data_input) < 0.5: print('\nAutomatically dropping the following taxa because of invalid IUCN status information:', list(invalid_status_taxa[0].values)) species_data_input = species_data_input[species_data_input.iloc[:,1].isin(['LC','NT','VU','EN','CR','DD','NE'])] else: quit('\nPlease fix your species_data input file. Check presence of current IUCN status information and column order.') # get the list of species species_list = species_data_input.iloc[:,0].values.astype(str) # replace underscores in species name in case they are present species_list = np.array([i.replace('_',' ') for i in species_list]) # Check if all species names are binomial for species in species_list: if len(species.split(' ')) != 2: print('ERROR','*'*50,'\nABORTED: All provided species names provided under --species_data flag must be binomial! Found non binomial name:\n%s\n'%species,'*'*50) quit() # get the current IUCN status of all species current_status = species_data_input.iloc[:,1].values.astype(str) # get GL data if provided gl_data_available = False if species_data_input.shape[1] > 2: gl_matrix = species_data_input.iloc[:,2:].values gl_data_available = True if self._load_from_file: possibly_extinct_list = pd.read_csv(possibly_extinct_list,sep='\t') else: possibly_extinct_list = pd.DataFrame(possibly_extinct_list) #__________________________________________________________________________ # process the IUCN history data____________________________________________ # extinction prob mode 0: remove all currently extinct taxa if extinction_probs_mode == 0: status_change_coutn_df,years_in_each_category = evaluate_iucn_history(self._iucn_history,possibly_extinct_list=possibly_extinct_list,exclude_extinct=True,outdir=self._outdir) # extinciton prob mode 1: remove only taxa that have been extinct all along, keeping those that have recorded transition to extinct within time frame elif extinction_probs_mode == 1: status_change_coutn_df,years_in_each_category = evaluate_iucn_history(self._iucn_history,possibly_extinct_list=possibly_extinct_list,exclude_extinct=False,outdir=self._outdir) #__________________________________________________________________________ # run MCMCs to sample transition rates_____________________________________ if not self._random_seed: print('Running MCMC with user-set starting seed %i ...'%seed) else: print('Running MCMC with randomely generated starting seed %i ...'%seed) sampled_rates_list = [pd.DataFrame(columns = ['status_change'] + ['rate_%i' % i for i in np.arange(0,n_rep)])] for status_a in status_change_coutn_df.columns: row = status_change_coutn_df.loc[status_a] for status_b in row.index.values: if not status_a == status_b: count = row[status_b] total_time = years_in_each_category[status_a] rates = sample_rate_mcmc(count, total_time, n_samples = n_rep, n_gen = n_gen, burnin = burnin) sampled_rates_list.append(pd.DataFrame(data=np.matrix(['%s->%s' % (status_a,status_b)] + list(rates)), columns = ['status_change']+ ['rate_%i' % i for i in np.arange(0, n_rep)])) sampled_rates_df = pd.concat(sampled_rates_list) sampled_rates_df[['rate_%i' % i for i in np.arange(0, n_rep)]] = sampled_rates_df[['rate_%i' % i for i in np.arange(0, n_rep)]].apply(pd.to_numeric) sampled_rates_df.to_csv(os.path.join(outdir,'sampled_status_change_rates.txt'),sep='\t',index=False,float_format='%.8f') print('Sampled %i rates from MCMC posterior for each transition type.'%n_rep) #__________________________________________________________________________ # if mode 0, calculate extinction probabilities for EN and CR with GL data_________________________ if extinction_probs_mode == 0: # calculate yearly extinction risks for categories EN and CR if gl_data_available: dims = gl_matrix.shape[1] en_risks = [] for gl_array in gl_matrix: if dims == 1: gl_array = np.array(gl_array) #replace all nan values with the standard en extinction risk en_risks_species = p_e_year(np.minimum(np.maximum([20]*len(gl_array),5*gl_array),100),0.2) n_nan = len(en_risks_species[en_risks_species!=en_risks_species]) en_risks_species[en_risks_species!=en_risks_species] = [p_e_year(20,0.2)]*n_nan en_risks.append(en_risks_species) en_risks = np.array(en_risks) else: print('Warning: No generation length (GL) data found. Extinction risks for status EN and CR are calculated without using GL data.') dims = 1 en_risks = np.array([[p_e_year(20,0.2)]]*len(species_list)) en_risks_df = make_empty_rate_df(species_list,dims,'EN') en_risks_df.iloc[:,1:] = en_risks en_risks_df.to_csv(os.path.join(outdir,'en_extinction_risks_all_species.txt'),sep='\t',index=False, float_format='%.12f') if gl_data_available: dims = gl_matrix.shape[1] cr_risks = [] for gl_array in gl_matrix: if dims == 1: gl_array = np.array(gl_array) #replace all nan values with the standard en extinction risk cr_risks_species = p_e_year(np.minimum(np.maximum([10]*len(gl_array),3*gl_array),100),0.5) n_nan = len(cr_risks_species[cr_risks_species!=cr_risks_species]) cr_risks_species[cr_risks_species!=cr_risks_species] = [p_e_year(10,0.5)]*n_nan cr_risks.append(cr_risks_species) cr_risks = np.array(cr_risks) else: dims = 1 cr_risks = np.array([[p_e_year(10,0.5)]]*len(species_list)) cr_risks_df = make_empty_rate_df(species_list,dims,'CR') cr_risks_df.iloc[:,1:] = cr_risks cr_risks_df.to_csv(os.path.join(outdir,'cr_extinction_risks_all_species.txt'),sep='\t',index=False, float_format='%.12f') if self._species_specific_regression: # make regression for all other categories based on EN and CR risks print('Fitting species-specific regression function to determine LC, NT, and VU extinction probabilities ...') vu_risks_df = make_empty_rate_df(species_list,dims,'VU') nt_risks_df = make_empty_rate_df(species_list,dims,'NT') lc_risks_df = make_empty_rate_df(species_list,dims,'LC') for i,species in enumerate(cr_risks_df.species.values): en_risks = en_risks_df.iloc[i,1:].values cr_risks = cr_risks_df.iloc[i,1:].values vu_risks = [] nt_risks = [] lc_risks = [] for j,_ in enumerate(en_risks): en_prob = en_risks[j] cr_prob = cr_risks[j] x = [4.,5.] y = [en_prob,cr_prob] # fitting the power function to the 2 data points of each species (EN and CR risk) with warnings.catch_warnings(): # this is to avoid printing the warning from curve_fit when trying to fit function to only 2 points: "OptimizeWarning: Covariance of the parameters could not be estimated" warnings.filterwarnings("ignore") a_b = curve_fit(power_function,x,y); # extracting the values for a and b from the curve fit function a = a_b[0][0] b = a_b[0][1] # get values for LC, NT, and VU p_year_LC = power_function(1,a,b) p_year_NT = power_function(2,a,b) p_year_VU = power_function(3,a,b) vu_risks.append(p_year_VU) nt_risks.append(p_year_NT) lc_risks.append(p_year_LC) vu_risks_df.iloc[vu_risks_df[vu_risks_df.species == species].index.values[0],1:] = np.array(vu_risks) nt_risks_df.iloc[nt_risks_df[nt_risks_df.species == species].index.values[0],1:] = np.array(nt_risks) lc_risks_df.iloc[lc_risks_df[lc_risks_df.species == species].index.values[0],1:] = np.array(lc_risks) vu_risks_df.to_csv(os.path.join(outdir,'vu_extinction_risks_all_species.txt'),sep='\t',index=False, float_format='%.12f') nt_risks_df.to_csv(os.path.join(outdir,'nt_extinction_risks_all_species.txt'),sep='\t',index=False, float_format='%.12f') lc_risks_df.to_csv(os.path.join(outdir,'lc_extinction_risks_all_species.txt'),sep='\t',index=False, float_format='%.12f') #__________________________________________________________________________ # populate q-matrices______________________________________________________ print("\nPopulating species-specific q-matrices ...") sampled_rates_df.index = sampled_rates_df.status_change.values if extinction_probs_mode == 0: transition_rates = sampled_rates_df.iloc[:,1:] # randomely sample cr and en extinction probs to be used in q-matrices. if n_rep <= dims: sample_columns = np.random.choice(np.arange(dims),size=n_rep,replace=False) # since there are only as many cr and en p(ex) estimates as there are provided GL values, we may have to resample some (but make sure all are present at least once) else: sample_columns1 = np.random.choice(np.arange(dims),size=dims,replace=False) sample_columns2 = np.random.choice(np.arange(dims),size=(n_rep-dims),replace=True) sample_columns = np.concatenate([sample_columns1,sample_columns2]) # get the corresponding en and cr ex-risk columns cr_risks_selection = cr_risks_df.iloc[:,1:].values[:,sample_columns] en_risks_selection = en_risks_df.iloc[:,1:].values[:,sample_columns] # smae for the vu, nt, and lc cats if species_specific_regression is activated if self._species_specific_regression: vu_risks_selection = vu_risks_df.iloc[:,1:].values[:,sample_columns] nt_risks_selection = nt_risks_df.iloc[:,1:].values[:,sample_columns] lc_risks_selection = lc_risks_df.iloc[:,1:].values[:,sample_columns] elif extinction_probs_mode == 1: target_keys = [i for i in sampled_rates_df.status_change.values if i[-2:] == 'EX'] ex_probs = sampled_rates_df[sampled_rates_df.status_change.isin(target_keys)].iloc[:-1,1:].values.T transition_rates = sampled_rates_df[~sampled_rates_df.status_change.isin(target_keys)].iloc[:30,1:] for i in np.arange(n_rep): rates_i = transition_rates.iloc[:,i] sys.stdout.write('\rProgress: %i %%'%int(((i+1)/n_rep)*100)) # for each rep (i), create list of q-matrices, 1 for each species if extinction_probs_mode == 0: cr_risks_rep = cr_risks_selection[:,i] en_risks_rep = en_risks_selection[:,i] if self._species_specific_regression: vu_risks_rep = vu_risks_selection[:,i] nt_risks_rep = nt_risks_selection[:,i] lc_risks_rep = lc_risks_selection[:,i] q_matrix_list_i = [] for j,__ in enumerate(species_list): en_risk = en_risks_rep[j] cr_risk = cr_risks_rep[j] if self._species_specific_regression: lc_nt_vu = [lc_risks_rep[j],nt_risks_rep[j],vu_risks_rep[j]] else: lc_nt_vu = [0.000000155728,0.000041551152,0.001053050310] status_specific_p_e = np.array(lc_nt_vu+[en_risk,cr_risk]) # These values are the category specific probabilities of extinction per year calculated from IUCN definition of each category q_matrix = qmatrix(rates_i, status_specific_p_e) q_matrix_list_i.append([q_matrix]) elif extinction_probs_mode == 1: q_matrix_list_i = [] status_specific_p_e = ex_probs[i] q_matrix = qmatrix(rates_i, status_specific_p_e) q_matrix_list_i = [] for spec in species_list: q_matrix_list_i.append([q_matrix]) q_matrix_list_i_copy = q_matrix_list_i.copy() if i == 0: qmatrix_list_dict = dict(zip(list(species_list),q_matrix_list_i_copy)).copy() else: update_dict = [qmatrix_list_dict[species].append(q_matrix_list_i_copy[i][0]) for i, species in enumerate(list(species_list))] print('\n') #__________________________________________________________________________ # get transition rates for DD______________________________________________ dd_changes = [] dd_rates = [] for row_id,change_type in enumerate(transition_rates.index.values): states = change_type.split('->') if states[0] == 'DD': dd_changes.append('-'.join(states)) rates = transition_rates[transition_rates.index==change_type].values dd_rates.append(rates[0]) dd_probs = dd_rates/sum(np.array(dd_rates)) #__________________________________________________________________________ # Finally write all the compiled info to a pickle file_____________________ species_specific_data = [[species,current_status[i],qmatrix_list_dict[species]]for i,species in enumerate(species_list)] final_output_data = [species_specific_data,dd_probs] simdata_outfile = os.path.join(outdir,'simulation_input_data.pkl') save_obj(final_output_data,simdata_outfile) self._simulation_input_data = final_output_data self._simdata_outfile = simdata_outfile class run_sim(): def __init__(self, input_data, outdir, n_years=100, n_sim=10000, status_change=1, conservation_increase_factor=1, threat_increase_factor=1, model_unknown_as_lc=0, until_n_taxa_extinct=0, plot_diversity_trajectory=1, plot_status_trajectories=1, plot_histograms=0, plot_status_piechart=1, seed=None, load_from_file=False, save_future_status_array = False, save_status_through_time = False): self._input_data = input_data self._outdir = outdir self._n_years = n_years self._n_sim = n_sim self._status_change = status_change self._conservation_increase_factor = conservation_increase_factor self._threat_increase_factor = threat_increase_factor self._model_unknown_as_lc = model_unknown_as_lc self._until_n_taxa_extinct = until_n_taxa_extinct self._plot_diversity_trajectory = plot_diversity_trajectory self._plot_histograms = plot_histograms self._plot_status_trajectories = plot_status_trajectories self._plot_status_piechart = plot_status_piechart self._seed = seed self._load_from_file = load_from_file self._save_future_status_array = save_future_status_array self._save_status_through_time = save_status_through_time self.run() def run(self): # get user input___________________________________________________________ seed = self._seed try: random_seed = int(seed) print('Simulating with user-set starting seed %i.'%random_seed) except: random_seed = np.random.randint(999999999) print('Simulating with randomely generated starting seed %i.'%random_seed) np.random.seed(random_seed) self._seed = random_seed colors = {'LC':'#60C659','NT':'#CCE226','VU':'#F9E814','EN':'#FC7F3F','CR':'#D81E05','EX':'#000000','DD':'#D1D1C6'} outdir = self._outdir n_years = int(self._n_years) n_sim = int(self._n_sim) n_extinct_taxa = int(self._until_n_taxa_extinct) allow_status_change = int(self._status_change) conservation_increase_factor = int(self._conservation_increase_factor) threat_increase_factor = int(self._threat_increase_factor) plot_diversity_trajectory = int(self._plot_diversity_trajectory) plot_histograms = int(self._plot_histograms) model_unknown_as_lc = int(self._model_unknown_as_lc) plot_status_trajectories = int(self._plot_status_trajectories) plot_status_piechart = int(self._plot_status_piechart) if not os.path.exists(outdir): os.makedirs(outdir) np.savetxt(os.path.join(outdir,'starting_seed_future_sim.txt'),np.array([random_seed]),fmt='%i') if self._load_from_file: infile = self._input_data input_data = load_obj(infile) else: input_data = self._input_data species_input_data, dd_probs = input_data species_list = np.array([i[0] for i in species_input_data]) current_status_list = np.array([i[1] for i in species_input_data]) q_matrix_list = [i[2] for i in species_input_data] n_rates = dd_probs.shape[1] #__________________________________________________________________________ # modify q-matrices, if set by user________________________________________ final_qmatrix_list = [] q_matrix_list_copy = np.array(q_matrix_list).copy() for q_matrix_list_i in q_matrix_list_copy: q_matrix_list_temp = [] for q_matrix in q_matrix_list_i: if conservation_increase_factor != 1: indeces_lower_triangle = np.tril_indices(q_matrix.shape[0],-1) q_matrix[indeces_lower_triangle] = q_matrix[indeces_lower_triangle] * conservation_increase_factor np.fill_diagonal(q_matrix,0) np.fill_diagonal(q_matrix, -np.sum(q_matrix,axis=1)) if threat_increase_factor != 1: indeces_upper_triangle = np.triu_indices(q_matrix.shape[0],1) q_matrix[indeces_upper_triangle] = q_matrix[indeces_upper_triangle] * threat_increase_factor np.fill_diagonal(q_matrix,0) np.fill_diagonal(q_matrix, -np.sum(q_matrix,axis=1)) q_matrix_list_temp.append(q_matrix) final_qmatrix_list.append(q_matrix_list_temp) # turn into a dict with the n qmatrices for each species final_qmatrix_dict = dict(zip(species_list,final_qmatrix_list)) #__________________________________________________________________________ # if more n_rep are set than there are q-matrices, resample________________ if n_sim <= n_rates: sample_columns = np.random.choice(np.arange(n_rates),size=n_sim,replace=False) # since there are only as many cr and en p(ex) estimates as there are provided GL values, we may have to resample some (but make sure all are present at least once) else: sample_columns1 = np.random.choice(np.arange(n_rates),size=n_rates,replace=False) sample_columns2 = np.random.choice(np.arange(n_rates),size=(n_sim-n_rates),replace=True) sample_columns = np.concatenate([sample_columns1,sample_columns2]) #__________________________________________________________________________ # run simulations__________________________________________________________ delta_t = n_years model_ne_as_dd = False dynamic_qmatrix = True if model_ne_as_dd: current_status_list[current_status_list=='NE'] = 'DD' if model_unknown_as_lc: print('\nSetting all DD and NE species to LC.') all_lc=True else: all_lc=False if allow_status_change: status_change=True else: print('\nNot simulating future status changes!') status_change=False print('\nStarting simulations ...') diversity_through_time,te_array,status_through_time,time_until_n_extinctions_list = run_multi_sim(n_sim, delta_t, species_list, current_status_list, dd_probs, final_qmatrix_dict, sample_columns, outdir, all_lc=all_lc, status_change=status_change, dynamic_qmatrix=dynamic_qmatrix, n_extinct_taxa=n_extinct_taxa, save_future_status_array=self._save_future_status_array, save_status_through_time=self._save_status_through_time) # summarize simulation results sim_species_list = te_array[:,0].copy() ext_date_data = te_array[:,1:].copy() self._ext_date_data = ext_date_data extinction_occs = np.array([len(row[~np.isnan(list(row))]) for row in ext_date_data]) extinction_prob = extinction_occs/ext_date_data.shape[1] self._extinction_prob = extinction_prob # produce output file for status distribution through time mean_status_through_time = np.mean(status_through_time,axis=2) year = np.arange(delta_t+1).astype(int) status_df_data = np.round(np.vstack([year,mean_status_through_time])).astype(int) status_df = pd.DataFrame(data = status_df_data.T,columns=['year','LC','NT','VU','EN','CR','EX']) self._status_through_time_mean = status_df #self._status_through_time = status_through_time status_df.to_csv(os.path.join(outdir,'status_distribution_through_time.txt'),sep='\t',index=False) self._simulated_extinctions_through_time = status_through_time[-1] np.savetxt(os.path.join(outdir,'simulated_extinctions_array.txt'),status_through_time[-1],fmt='%i') extinction_times = pd.DataFrame(data=ext_date_data,index=sim_species_list) self._extinction_times = extinction_times extinction_times.to_csv(os.path.join(outdir,'te_all_species.txt'),sep='\t',header=False,index=True) #__________________________________________________________________________ # print extinction probabilities over simulated time frame for each species column_names = ['species','simulated_ext_prob_in_%i_years'%delta_t] extinction_prob_df = pd.DataFrame(np.array([sim_species_list,extinction_prob]).T,columns=column_names) extinction_prob_df[column_names[1:]] = extinction_prob_df[column_names[1:]].astype(float) extinction_prob_df.to_csv(os.path.join(outdir,'extinction_probs_over_next_%i_years.txt'%delta_t),sep='\t',index=False,float_format='%.8f') #__________________________________________________________________________ # if target_species: # posterior = get_rate_estimate_posterior(ext_date_data[0],n_years,0,sim_species_list,n_gen=n_gen,burnin=burnin) # np.savetxt('/Users/tobias/GitHub/iucn_predictions/doc/figures/Figure_2/figure_data/posterior_samples/%s_gl_no_status_change.txt'%target_species.replace(' ','_'),posterior,fmt='%.8f') # print('\nPrinted posterior') #__________________________________________________________________________ if n_extinct_taxa: np.savetxt(os.path.join(outdir,'time_until_%i_extinctions.txt'%n_extinct_taxa),time_until_n_extinctions_list,fmt='%.2f') self._time_until_n_extinctions = time_until_n_extinctions_list fig = plt.figure() plt.hist(time_until_n_extinctions_list) plt.xlabel('Time in years') plt.ylabel('N') plt.title('Simulated years until %i extinctions (%i simulations)'%(n_extinct_taxa,n_sim)) fig.savefig(os.path.join(outdir,'time_until_%i_extinctions.pdf'%n_extinct_taxa),bbox_inches='tight', dpi = 500) #__________________________________________________________________________ # plot diversity trajectory of species list________________________________ #colors = ["#9a002e","#df4a3d","#fecd5f","#5cd368","#916200"] # define time axis time_axis = np.array(range(len(diversity_through_time[0]))) fig = plt.figure() y_values = np.mean(diversity_through_time, axis =0) plt.plot(time_axis,y_values,color="#b80033", label='accounting for GL') # get upper and lower confidence interval boundaries min_hpd, max_hpd = np.array([calcHPD(i,0.95) for i in diversity_through_time.T]).T mean_min_max = np.vstack([y_values,min_hpd,max_hpd]) self._future_div_mean_min_max = mean_min_max if plot_diversity_trajectory: np.savetxt(os.path.join(outdir,'future_diversity_trajectory.txt'),mean_min_max,fmt='%.2f') plt.fill_between(time_axis, min_hpd, max_hpd, color="#b80033", alpha=0.2) #plt.legend() plt.ylabel('Total diversity') plt.xlabel('Years from present') ax = plt.gca() ax1 = ax.twinx() # Set the limits of the new axis from the original axis limits ax1.set_ylim(ax.get_ylim()) current_diversity = diversity_through_time[0,0] plt.yticks([np.mean(diversity_through_time[:,-1])],[int(current_diversity-np.mean(diversity_through_time[:,-1]))]) #plt.xticks(modified_q_matrix.year[::10],modified_q_matrix.year[::10]) plt.ylabel('Lost species') plt.tight_layout() fig.savefig(os.path.join(outdir,'future_diversity_trajectory.pdf'),bbox_inches='tight', dpi = 500) #__________________________________________________________________________ #__________________________________________________________________________ if plot_status_trajectories: # color palette #colors = ["#227a00","#a5c279","#f3d248","#6956cb","#79262a","#e34349"] # define time axis time_axis = np.array(range(len(diversity_through_time[0]))) # plot results def plot_mean_and_interval(div,color,label,fig): plt.plot(time_axis,np.mean(div,axis=0),color=color,label=label); min_hpd, max_hpd = np.array([calcHPD(i,0.95) for i in div.T]).T plt.fill_between(time_axis, min_hpd, max_hpd, color=color, alpha=0.2); return fig fig = plt.figure(figsize=(10,10)) plot_mean_and_interval(status_through_time[0,:,:].T,colors['LC'],'LC',fig) plot_mean_and_interval(status_through_time[1,:,:].T,colors['NT'],'NT',fig) plot_mean_and_interval(status_through_time[2,:,:].T,colors['VU'],'VU',fig) plot_mean_and_interval(status_through_time[3,:,:].T,colors['EN'],'EN',fig) plot_mean_and_interval(status_through_time[4,:,:].T,colors['CR'],'CR',fig) plot_mean_and_interval(status_through_time[5,:,:].T,colors['EX'],'EX',fig) # add title, legend and axis-labels plt.legend(loc='best',fancybox=True) plt.title('Diversity trajectory IUCN categories - status change') #10x higher conservation plt.ylabel('Number species in category') plt.xlabel('Years from present') ax = plt.gca() ax1 = ax.twinx() # Set the limits of the new axis from the original axis limits ax1.set_ylim(ax.get_ylim()) # annotate final counts with labels right_ticks = [int(np.round(np.mean(status_through_time[i,-1,:]))) for i in range(status_through_time.shape[0])] plt.yticks(right_ticks,right_ticks) #plt.xticks(modified_q_matrix.year[::10],modified_q_matrix.year[::10]) plt.tight_layout() fig.savefig(os.path.join(outdir,'future_status_trajectory.pdf'),bbox_inches='tight', dpi = 500) #__________________________________________________________________________ #__________________________________________________________________________ if plot_status_piechart: statuses, counts = np.unique(current_status_list,return_counts=True) init_status_dict = dict(zip(statuses, counts)) init_status_dict['EX'] = 0 iucn_status_code = {0:'LC', 1:'NT', 2:'VU', 3:'EN', 4:'CR', 5:'EX', 6:'DD'} status_count_list = [] status_order = [] for status_id in np.arange(status_through_time.shape[0]+1): status = iucn_status_code[status_id] if status in init_status_dict.keys(): pre_dd_modeling_count = init_status_dict[status] else: pre_dd_modeling_count = 0 if not status == 'DD': present_status_count = int(np.round(np.mean(status_through_time[status_id][0]))) final_status_count = int(np.round(np.mean(status_through_time[status_id][-1]))) else: present_status_count = 0 final_status_count = 0 status_count_list.append([pre_dd_modeling_count,present_status_count,final_status_count]) status_order.append(status) status_count_list = np.array(status_count_list).T pie_colors = np.array([colors[i] for i in status_order]) #colors = np.array(["#227a00","#a5c279","#f3d248","#6956cb","#79262a","#b80033",'black']) labels = np.array(['LC', 'NT', 'VU', 'EN', 'CR', 'EX', 'DD']) def func(pct, allvals): absolute = int(np.round((pct/100.*np.sum(allvals)))) return "{:d}".format(absolute) fig, axs = plt.subplots(1, 3,figsize=(12,10)) # status distribution beginning wedges, texts, autotexts =axs[1].pie(status_count_list[1][status_count_list[1] >0], colors= pie_colors[status_count_list[1] >0], autopct=lambda pct: func(pct, status_count_list[1][status_count_list[1] >0]), shadow=False,textprops=dict(color="w")) # status distribution end wedges, texts, autotexts =axs[2].pie(status_count_list[2][status_count_list[2] >0], colors= pie_colors[status_count_list[2] >0], autopct=lambda pct: func(pct, status_count_list[2][status_count_list[2] >0]), shadow=False,textprops=dict(color="w")) ext = wedges[-1] # status distribution pre-dd wedges, texts, autotexts =axs[0].pie(status_count_list[0][status_count_list[0] >0], colors= pie_colors[status_count_list[0] >0], autopct=lambda pct: func(pct, status_count_list[0][status_count_list[0] >0]), shadow=False,textprops=dict(color="w")) axs[0].set_title('Current (including DD)') axs[1].set_title('Current (DD corrected)') axs[2].set_title('Final (%i years)'%delta_t) final_labels = list(labels[status_count_list[0] >0]) + ['EX'] plt.legend(wedges+[ext], final_labels,title="IUCN status\n(N=%i sp.)"%status_count_list[2].sum(),loc="center left",bbox_to_anchor=(1, 0, 0.5, 1)) fig.savefig(os.path.join(outdir,'status_pie_chart.pdf'),bbox_inches='tight', dpi = 500) #__________________________________________________________________________ #__________________________________________________________________________ if plot_histograms: # plot histograms of extinction times with PdfPages(os.path.join(outdir,'extinction_time_histograms.pdf')) as pdf: for i,species in enumerate(te_array[:,0]): sys.stdout.write('\rPlotting extinction histogram for species %i/%i'%(i+1,len(te_array[:,0]))) plt.figure() species_te_array = te_array[:,1:][i] not_na_values = species_te_array[~np.isnan(list(species_te_array))] heights, bins = np.histogram(not_na_values,np.arange(0,delta_t+10,10)) percent = heights/n_sim plt.bar(bins[:-1],percent,width=10, align="edge") plt.ylim(0,0.5) #survival_prob = 1-sum(percent) #if survival_prob >= 0.5: # text_color = 'green' #else: # text_color = 'red' ax = plt.gca() #plt.text(0.05, 0.7, 'survival probability: %.2f'%survival_prob,color=text_color, horizontalalignment='left',verticalalignment='baseline', transform=ax.transAxes) # annotate last bar if ax.patches[-1].get_height() > 0: ax.text(ax.patches[-1].get_x()+3, np.round(ax.patches[-1].get_height()+0.001,4), '**', fontsize=12, color='black') plt.title('%s - Extinct in %i years: %i/%i'%(species,delta_t,sum(heights),n_sim)) plt.xlabel('Years from present') plt.ylabel('Fraction of simulations') plt.tight_layout() pdf.savefig() # saves the current figure into a pdf page plt.close() print('\n') def evaluate_iucn_history(iucn_history_file,possibly_extinct_list=[],exclude_extinct=False,outdir=False): # process the IUCN history data stat_time_df = process_iucn_history(iucn_history_file) # if possibly_extinct_list provided, read that list and set the status for those taxa to extinct, starting at provided year if len(possibly_extinct_list) > 0: master_stat_time_df = set_taxa_as_extinct(stat_time_df,possibly_extinct_list) else: master_stat_time_df = stat_time_df.copy() # extract most recent valid status for each taxon valid_status_dict,most_recent_status_dict,status_series,taxon_series = extract_valid_statuses(master_stat_time_df) # if exclude_extinct remove all currently extinct taxa if exclude_extinct: ext_indices = np.array([num for num,i in enumerate(most_recent_status_dict.keys()) if most_recent_status_dict[i] == 'EX']) master_stat_time_df = master_stat_time_df.drop(ext_indices) master_stat_time_df.index = np.arange(len(master_stat_time_df)) # replace any occurrence of 'EX' as a past status with NaN to avoid problems with counting types of transitions (treating these assessments as invalid) master_stat_time_df.replace('EX', np.nan,inplace=True) # else remove only taxa that have been extinct all along, keeping those that have recorded transition to extinct within time frame else: ext_indices = np.array([num for num,i in enumerate(master_stat_time_df.iloc[:,1:].values.astype(str)) if 'EX' in np.unique(i) and len(np.unique(i))==2]) master_stat_time_df = master_stat_time_df.drop(ext_indices) master_stat_time_df.index = np.arange(len(master_stat_time_df)) # extract most recent valid status for each taxon valid_status_dict,most_recent_status_dict,status_series,taxon_series = extract_valid_statuses(master_stat_time_df) # count current status distribution unique, counts = np.unique(status_series, return_counts=True) print('\nCurrent IUCN status distribution in reference group:',dict(zip(unique, counts))) # count how often each status change occurs change_type_dict = count_status_changes(master_stat_time_df,valid_status_dict) #print('Summing up years spend in each category ...') years_in_each_category = get_years_spent_in_each_category(master_stat_time_df,valid_status_dict) if exclude_extinct: status_change_coutn_df = pd.DataFrame(data=np.zeros([6,6]).astype(int),index = ['LC','NT','VU','EN','CR','DD'],columns=['LC','NT','VU','EN','CR','DD']) else: status_change_coutn_df = pd.DataFrame(data=np.zeros([7,7]).astype(int),index = ['LC','NT','VU','EN','CR','DD','EX'],columns=['LC','NT','VU','EN','CR','DD','EX']) for status_change in change_type_dict.keys(): states = status_change.split('->') original_state = states[0] new_state = states[1] count = change_type_dict[status_change] status_change_coutn_df.loc[original_state,new_state] = count if outdir: # write IUCN history df to file master_stat_time_df.to_csv(os.path.join(outdir,'formatted_iucn_history_reference_taxa.txt'),sep='\t') # write the status change data to file final_years_count_array = np.array([list(years_in_each_category.keys()),list(years_in_each_category.values())]).T np.savetxt(os.path.join(outdir,'years_spent_in_each_category.txt'),final_years_count_array,fmt='%s\t%s') change_type_dict_array = np.array([list(change_type_dict.keys()),list(change_type_dict.values())]).T np.savetxt(os.path.join(outdir,'change_type_dict.txt'),change_type_dict_array,fmt='%s\t%s') status_change_coutn_df.to_csv(os.path.join(outdir,'status_change_counts.txt'),sep='\t',index=True) print('Counted the following transition occurrences in IUCN history of reference group:') print(status_change_coutn_df) return(status_change_coutn_df,years_in_each_category) def estimate_extinction_rates(extinction_dates,max_t,outdir,n_gen=100000,burnin=1000,plot_posterior=False,seed=None,load_from_file=False): if not os.path.exists(outdir): os.makedirs(outdir) if load_from_file: te_df = pd.read_csv(extinction_dates,sep='\t',header=None,index_col=0) else: te_df = pd.DataFrame(extinction_dates) sim_species_list = list(te_df.index.values) ext_date_data = te_df.values.copy() # estimate extinction rates scaled by year print('\nRunning %i MCMCs to estimate species-specific extinction rates from simulation output...'%len(sim_species_list),flush=True) #ext_date_data = ext_date_data[:10,:] if plot_posterior: with PdfPages(os.path.join(outdir,'posterior_ext_rate_histograms.pdf')) as pdf: sampled_rates = np.array([get_rate_estimate(species_values,max_t,i,sim_species_list,plot_posterior=plot_posterior,pdf=pdf,n_gen=n_gen,burnin=burnin) for i,species_values in enumerate(ext_date_data)]) else: sampled_rates = np.array([get_rate_estimate(species_values,max_t,i,sim_species_list,plot_posterior=plot_posterior,pdf=0,n_gen=n_gen,burnin=burnin) for i,species_values in enumerate(ext_date_data)]) # get overall extincion rate for whole group all_species_values = np.reshape(ext_date_data,[ext_date_data.shape[0]*ext_date_data.shape[1],]) overall_average_list_item = ['All taxa combined'] i = 0 overall_rate_estimates = get_rate_estimate(all_species_values,max_t,i,sim_species_list,plot_posterior=False,pdf=0,n_gen=n_gen,burnin=burnin,suppress_info=True) # add the average to the species specific values sim_species_list = overall_average_list_item+sim_species_list sampled_rates = np.vstack([overall_rate_estimates,sampled_rates]) # export extinction stats to file column_names = ['species','rate_e_mean','rate_e_lower','rate_e_upper'] extinction_prob_df = pd.DataFrame(np.array([sim_species_list,sampled_rates[:,0],sampled_rates[:,1],sampled_rates[:,2]]).T,columns=column_names) extinction_prob_df[column_names[1:]] = extinction_prob_df[column_names[1:]].astype(float) extinction_prob_df.to_csv(os.path.join(outdir,'extinction_rates.txt'),sep='\t',index=False,float_format='%.8f') print('\n') return(extinction_prob_df) def get_iucn_history(reference_group=None,reference_rank=None,iucn_key=None,avoid_precompiled_iucn_data=False,outdir=''): # create the r-scripts to be used later on: write_r_scripts(outdir,script_id = 'history') if reference_group: taxon_reference_groups = reference_group.split(',') else: print('No reference group provided. Provide the name of a taxonomic group as reference_group, e.g. Mammalia') quit() if reference_rank: reference_ranks = reference_rank.split(',') if not os.path.exists(outdir): os.makedirs(outdir) # get IUCN history_________________________________________________________ precompiled_taxon_groups = [] precompiled_taxon_group_files = [] if not avoid_precompiled_iucn_data: for taxon_group in taxon_reference_groups: try: # look for precompiled files online url = 'https://raw.githubusercontent.com/tobiashofmann88/iucn_extinction_simulator/master/data/precompiled/iucn_history/%s_iucn_history.txt'%taxon_group.upper() urlpath =urlopen(url) string = urlpath.read().decode('utf-8') string_input = StringIO(string) ref_group_data = pd.read_csv(string_input, sep="\t") hist_outfile = os.path.join(outdir,os.path.basename(url)) ref_group_data.to_csv(hist_outfile,sep='\t',index=False) precompiled_taxon_groups.append(str.lower(taxon_group)) precompiled_taxon_group_files.append(hist_outfile) except: pass iucn_history_files = [] for i,taxon_group in enumerate(taxon_reference_groups): if str.lower(taxon_group) in precompiled_taxon_groups and not avoid_precompiled_iucn_data: print('Using precompiled IUCN history data for %s.'%taxon_group) iucn_history_files.append([file for file in precompiled_taxon_group_files if os.path.basename(file).startswith(str.upper(taxon_group))][0]) else: print('Fetching IUCN history using rredlist') rank = reference_ranks[i] if rank == 'genus': iucn_cmd = ['Rscript',os.path.join(outdir,'rscripts/get_iucn_status_data_and_species_list.r'), str.capitalize(taxon_group), str.lower(rank), iucn_key, outdir] else: iucn_cmd = ['Rscript',os.path.join(outdir,'rscripts/get_iucn_status_data_and_species_list.r'), str.upper(taxon_group), str.lower(rank), iucn_key, outdir] if not iucn_key: print('ERROR','*'*50,'\nIUCN-KEY ERROR: Need to download IUCN history for specified reference group: %s. Please provide a valid IUCN key (using the --iucn_key flag). Alternatively choose a precompiled reference group (see available groups at github.com/tobiashofmann88/iucn_extinction_simulator/data/precompiled/iucn_history/)'%(taxon_group),'*'*55) quit() #iucn_error_file = os.path.join(outdir,'get_iucn_status_data_and_species_list_error_file.txt') #with open(iucn_error_file, 'w') as err: run_iucn_cmd = subprocess.Popen(iucn_cmd) run_iucn_cmd.wait() iucn_history_files.append(os.path.join(outdir,'%s_iucn_history.txt'%str.upper(taxon_group))) if len(iucn_history_files) > 1: df_previous = pd.DataFrame() outfile_stem = '' for i in iucn_history_files: outfile_stem += os.path.basename(i).split('_')[0]+'_' df_new = pd.read_csv(i,sep='\t') if len(df_previous) > 0: df_previous = pd.concat([df_previous,df_new],sort=False,ignore_index=True) else: df_previous = df_new os.remove(i) df_previous.to_csv(os.path.join(outdir,'%siucn_history.txt'%str.upper(outfile_stem)),sep='\t',index=False) else: outfile_stem = os.path.basename(iucn_history_files[0]).split('_')[0]+'_' # save the file name of the output file for further actions that need to read this file iucn_history_file = os.path.join(outdir,'%siucn_history.txt'%str.upper(outfile_stem)) return(iucn_history_file) def process_iucn_history(iucn_history_file,iucn_start_year=2001,final_year=None): # get current IUCN status all target species_______________________________ # process the IUCN history data if not final_year: current_year = datetime.datetime.now().year else: current_year = final_year master_stat_time_df = pd.DataFrame(columns=['species']+list(np.arange(iucn_start_year,current_year+1).astype(str))) statuses_through_time = pd.read_csv(iucn_history_file, delimiter = '\t') target_columns = [column for column in master_stat_time_df.columns if column in statuses_through_time.columns] master_stat_time_df[target_columns] = statuses_through_time[target_columns] # check if we have sufficient number of species for rate estimation if len(master_stat_time_df) < 1000: print('\n\n%s'%('#'*50),'\nWarning: Only %i species in reference dataset. This may not be sufficient for proper estimation of status transition rates. It is recommended to choose a larger reference group encompassing >1000 species at the least!\n\nContinuing processing IUCN history data of reference group ...\n%s\n\n'%(len(master_stat_time_df),'#'*50)) # treat EW as EX master_stat_time_df.replace('EW', 'EX',inplace=True) # replace occurrences of NR (not recognized) with nan master_stat_time_df.replace('NR', np.nan,inplace=True) # clean and sort df master_stat_time_df = master_stat_time_df.sort_values(by='species') master_stat_time_df = master_stat_time_df.drop_duplicates() master_stat_time_df.index = np.arange(len(master_stat_time_df)) # set most recent status to NE for species without any IUCN status information na_row_indeces = np.where(master_stat_time_df.iloc[:,1:].T.isnull().all().values) for index in na_row_indeces: master_stat_time_df.iloc[index,-1] = 'NE' return(master_stat_time_df) def set_taxa_as_extinct(stat_time_df,pex_data): # provide 2-column df as possibly_extinct_list with species names and supposed year of extinction master_stat_time_df = stat_time_df.copy() pex_species_list = pex_data.iloc[:,0].values.astype(str) pex_year = pex_data.iloc[:,1].values.astype(int) column_names = master_stat_time_df.columns.values row_names = master_stat_time_df.species.values #df_selection = master_stat_time_df[master_stat_time_df.species.isin(pex_species_list)] for i,species in enumerate(pex_species_list): row_index = np.where(row_names==species)[0][0] assessment_year = pex_year[i] column_index = np.where(column_names==str(assessment_year))[0][0] master_stat_time_df.iloc[row_index,column_index:] = 'EX' return(master_stat_time_df) def get_most_recent_status_target_species(species_list=[],iucn_history_file=None,iucn_key=None,load_from_file=False,outdir=''): if iucn_history_file: # process IUCN history file master_stat_time_df = process_iucn_history(iucn_history_file) valid_status_dict_refgroup,most_recent_status_dict_refgroup,status_series_refgroup,taxon_series_refgroup = extract_valid_statuses(master_stat_time_df) provided_history_file = True else: provided_history_file = False # see if we want to only stick with the reference taxa if len(species_list) == 0: gl_data = [] if provided_history_file: joined_df = pd.DataFrame([taxon_series_refgroup,status_series_refgroup]).T else: print('ERROR','*'*50,'\nABORTED: You need to provide either a target species list or a IUCN history file (or both). Found neither.') else: if not load_from_file: species_list_data = pd.DataFrame(species_list) species_list_data.columns = np.arange(len(species_list_data.columns)) # make sure column naming is integers, as in header=None scenario else: # get list of species we want to simulate species_list_data = pd.read_csv(species_list,sep='\t',header=None) species_list_data = species_list_data.drop_duplicates(0) species_list_data = species_list_data.sort_values(0) species_list_data.index = np.arange(len(species_list_data)) species_list = np.unique(species_list_data.iloc[:,0].values.astype(str)) species_list = np.array([i.replace('_',' ') for i in species_list]) # Check if all species names are binomial for species in species_list: if len(species.split(' ')) != 2: print('ERROR','*'*50,'\nABORTED: All provided species names provided under --target_species_list flag must be binomial! Found non binomial name:\n%s\n'%species,'*'*50) quit() try: gl_data = species_list_data.iloc[:,1:].values print('Found generation length data, which will be written to output file for downstream processing.') except: gl_data = [] # see which species are not in reference group and hence need current IUCN status extraction if provided_history_file: remaining_species_wo_iucn_status = sorted(list(set(species_list)-set(taxon_series_refgroup))) fraction_overlap = np.round(1-(len(remaining_species_wo_iucn_status)/len(species_list)),3) if fraction_overlap <= 0.5: print('\n\n%s'%('#'*50),'\nWarning: Only %.3f of target species found in reference group. Is this intended? You may want to reconsider chosing a reference group encompassing more of your target species.\n\nContinuing with downloading current status information for all remaining species...\n%s\n\n'%(fraction_overlap,'#'*50)) else: print('\n\nA fraction of %.3f of the specified target species is present in reference group.\n\n'%fraction_overlap) else: remaining_species_wo_iucn_status = sorted(list(species_list)) tmp_outdir = os.path.join(outdir,'other_files') if not os.path.exists(tmp_outdir): os.makedirs(tmp_outdir) if len(remaining_species_wo_iucn_status) > 0: # write r-script for downloading missing taxa write_r_scripts(outdir,script_id = 'current') # write the missing taxa to file for the r-script species_list_out_file = os.path.join(tmp_outdir,'target_species_not_in_reference_group.txt') np.savetxt(species_list_out_file,remaining_species_wo_iucn_status,fmt='%s') # extract the current status for those missing species print('Extracting current status for target species...') iucn_cmd = ['Rscript',os.path.join(outdir,'rscripts/get_current_iucn_status_missing_species.r'), species_list_out_file, iucn_key, tmp_outdir] #iucn_error_file = os.path.join(iucn_outdir,'get_current_iucn_status_missing_species_error_file.txt') #with open(iucn_error_file, 'w') as err: if not iucn_key: quit('***IUCN-KEY ERROR:*** Trying to download current status for target species from IUCN. Please provide a valid IUCN key (using the --iucn_key flag) to access IUCN data. Alternatively you can turn off this functionality by setting "--target_species_list 0". In that case you need to compile your own current IUCN status list manually. In that case store the status data in a tab-separated format with the header "species current_status".') print('Downloading current IUCN status information for %i target species that are not present in reference group:'%len(remaining_species_wo_iucn_status)) run_iucn_cmd = subprocess.Popen(iucn_cmd) run_iucn_cmd.wait() # get the IUCN data and combine with recent status info from refgroup taxa current_status_target_species_file = os.path.join(tmp_outdir,'current_status_missing_species.txt') current_status_missing_taxa = pd.read_csv(current_status_target_species_file,sep='\t',header=None) # print info which species were not found in IUCN current_status_missing_list = current_status_missing_taxa[1].values.astype(str) nan_taxa = current_status_missing_taxa[0].values[current_status_missing_list=='nan'] if len(nan_taxa) > 0: print('\n\nNo IUCN information found for the following %i species. This could be due to taxonomic issues. Make sure that all species names match with the most recent IUCN taxonomy.\n\nFor now, these species will be coded as Not Evaluated (NE)...\n\n%s\n\n'%(len(nan_taxa),str(nan_taxa))) if provided_history_file: target_reference_taxa = list(set(species_list)-set(current_status_missing_taxa[0].values)) status_series_refgroup = np.array(status_series_refgroup).astype(str) taxon_series_refgroup = np.array(taxon_series_refgroup).astype(str) status_reference_taxa = [status_series_refgroup[taxon_series_refgroup == i][0] for i in target_reference_taxa] current_status_reference_taxa = pd.DataFrame(data = np.array([target_reference_taxa,status_reference_taxa]).T) joined_df = pd.concat([current_status_missing_taxa,current_status_reference_taxa],ignore_index=True).sort_values(by=[0]) else: joined_df = current_status_missing_taxa.sort_values(by=[0]) else: target_reference_taxa = list(species_list) status_series_refgroup = np.array(status_series_refgroup).astype(str) taxon_series_refgroup = np.array(taxon_series_refgroup).astype(str) status_reference_taxa = [status_series_refgroup[taxon_series_refgroup == i][0] for i in target_reference_taxa] joined_df = pd.DataFrame(data = np.array([target_reference_taxa,status_reference_taxa]).T) # fix NE taxa and sort joined_df = joined_df.replace(np.nan,'NE') joined_df.index = np.arange(len(joined_df)) if len(gl_data) > 0: joined_df = pd.concat([joined_df,pd.DataFrame(np.round(gl_data,3))],axis=1) # remove all extinct taxa from future steps ext_boolean = joined_df.iloc[:,1]=='EX' extinct_taxa = joined_df.iloc[ext_boolean.values,:] print('The following taxa listed in your target species list are extinct according to IUCN:\n') print(extinct_taxa.iloc[:,0].values) print('\nThese taxa will be removed from the list for downstream processing.') extant_taxa = joined_df.iloc[~ext_boolean.values,:] extant_taxa.to_csv(os.path.join(outdir,'species_data.txt'),sep='\t',index=False,header=False) return(extant_taxa) def get_possibly_extinct_iucn_info(iucn_history_file,outdir=''): # get info about possibly extinct taxa_____________________________________ url = 'https://raw.githubusercontent.com/tobiashofmann88/iucn_extinction_simulator/master/data/precompiled/pex_taxa/2020_1_RL_Stats_Table_9.txt' urlpath =urlopen(url) string = urlpath.read().decode('utf-8') string_input = StringIO(string) pe_data = pd.read_csv(string_input, sep="\t") # which species are we looking for? reference_taxa = pd.read_csv(iucn_history_file,sep='\t').species.values.astype(str) # extract all these species and write to file reference_taxa_listed_as_pe = pe_data[pe_data['Scientific name'].isin(reference_taxa)] reference_taxa_listed_as_pe = reference_taxa_listed_as_pe.iloc[:,[0,3]] pe_data_outfile = os.path.join(outdir,'possibly_extinct_reference_taxa.txt') reference_taxa_listed_as_pe.to_csv(pe_data_outfile,sep='\t',index=False) return(reference_taxa_listed_as_pe) def get_rate_estimate(ext_time_array,max_t,index,species_list,plot_posterior=0,pdf=0,n_gen = 100000,burnin = 1000,suppress_info=False): if not suppress_info: sys.stdout.write('\rProcessing species: %i/%i '%(index+1,len(species_list))) sys.stdout.flush() ext_time_array_new = ext_time_array.copy() ext_time_array_new[ext_time_array_new!=ext_time_array_new] = max_t ext_time_array_new = ext_time_array_new.astype(float) w_times = np.sum(ext_time_array_new) ext_events = len(ext_time_array_new[ext_time_array_new= np.log(np.random.random()): q = new_q likA = lik if i > burnin and i % 10==0: post_samples.append(q) mean_value = np.mean(post_samples) lower,upper = calcHPD(post_samples,0.95) if plot_posterior: plt.figure() plt.hist(post_samples,100) plt.xlabel('Extinction rate estimates') plt.ylabel('Counts') plt.title(species_list[index]) plt.tight_layout() pdf.savefig() plt.close() #fig.savefig(os.path.join(posterior_plot_dir,'%s.pdf'%species_list[index]),bbox_inches='tight', dpi = 500) return [mean_value,lower,upper] def select_target_species(species,species_list_status,species_list,en_ext_data,cr_ext_data): target_species = species target_index = species_list_status[species_list_status.species==target_species].index.values[0] species_list_status = species_list_status.iloc[target_index,:] species_list = np.array([species_list[target_index]]) en_ext_data = np.array([en_ext_data[target_index]]) cr_ext_data = np.array([cr_ext_data[target_index]]) return pd.DataFrame(species_list_status).T,species_list,en_ext_data,cr_ext_data def get_rate_estimate_posterior(ext_time_array,max_t,index,species_list,n_gen = 100000,burnin = 1000): sys.stdout.write('\rProcessing species: %i/%i '%(index+1,len(species_list))) sys.stdout.flush() ext_time_array_new = ext_time_array.copy() ext_time_array_new[ext_time_array_new!=ext_time_array_new] = max_t ext_time_array_new = ext_time_array_new.astype(float) w_times = np.sum(ext_time_array_new) ext_events = len(ext_time_array_new[ext_time_array_new= np.log(np.random.random()): q = new_q likA = lik if i > burnin and i % 10==0: post_samples.append(q) return post_samples def p_e_year(years,p_e): pe_year = 1-(1-float(p_e))**(1/years) return pe_year def update_multiplier(q,d=1.1): u = np.random.uniform(0,1) l = 2*np.log(d) m = np.exp(l*(u-.5)) new_q = q * m return new_q, np.log(m) def sample_rate_mcmc(count, tot_time, n_samples = 1, n_gen = 100000,burnin = 1000): def get_loglik(count, dT, rate): return np.log(rate)*count - dT*rate post_samples = [] q = 0.01 likA = get_loglik(count,tot_time,q) for i in range(n_gen): new_q, hast = update_multiplier(q) lik = get_loglik(count,tot_time,new_q) if lik-likA + hast >= np.log(np.random.random()): q = new_q likA = lik if i > burnin and i % 10==0: post_samples.append(q) sampled_rates = np.random.choice(post_samples,n_samples,replace=False) return sampled_rates def power_function(x,a,b): # defining the power function y = float(a)*x**float(b) return y def make_empty_rate_df(species_list,rate_columns,status_label): rate_df = pd.DataFrame(np.zeros((len(species_list),rate_columns+1))) rate_df.columns = ['species']+ ['%s_p_ext_%i'%(status_label,i) for i in np.arange(0,rate_columns)] rate_df.species = species_list return rate_df def format_iucn_df(statuses_through_time_file,start_year,current_year): new_df = pd.DataFrame() new_df['species'] = statuses_through_time_file['species'] for number in range(start_year,current_year+1): year = str(number) if year in statuses_through_time_file.columns: pass else: statuses_through_time_file[year] = np.nan new_df[year] = statuses_through_time_file[year] new_df.replace({'NE': np.nan},inplace=True) print('Completed: Adding missing columns and selecting target years.') return new_df def exclude_extinct(iucn_dataframe, status_column_name): new_df = iucn_dataframe.copy() new_df = new_df[(new_df[status_column_name] != 'EW') & (new_df[status_column_name] != 'EX')] # a few odd species have come back from extinction, meaning that even though their current status is neither 'EX' or 'EW' they had 'EX' as a previous status new_df.replace({'EX': 'NaN'},inplace=True) new_df.drop(new_df.columns[[-2, -1]], axis=1, inplace=True) return new_df def count_status_changes(iucn_dataframe,valid_status_dict): change_types = {} for row in iucn_dataframe.iterrows(): species_index = row[0] species_name = iucn_dataframe.species[species_index] current_status = '' if species_name in valid_status_dict: valid_status = list(valid_status_dict[species_name]) for i in range(len(valid_status)): # compile all change types and count current_status = valid_status[i] #this takes care of when we are getting out of the list index for the following steps and when we are at the end of the list anyways if i+1 >= len(valid_status): pass elif valid_status[i+1] != current_status: next_status = valid_status[i+1] change = "%s->%s" %(current_status,next_status) change_types.setdefault(change,[]) change_types[change].append(1) for category in change_types: change_types[category] = sum(change_types[category]) return change_types def get_years_spent_in_each_category(extant_iucn_df,valid_status_dict): species_list = extant_iucn_df.species.values only_year_columns = extant_iucn_df.iloc[:,1:].copy() max_index = only_year_columns.shape[-1] years_in_status = [np.diff(np.append(np.where(only_year_columns[extant_iucn_df.species==species].notna().values[0])[0],max_index)) for species in species_list] status_master_list = np.array([[item for sublist in [[j]*years_in_status[index_i][index_j] for index_j,j in enumerate(valid_status_dict[i])] for item in sublist]+['NA']*(max_index-sum(years_in_status[index_i])) for index_i,i in enumerate(species_list)]) status_master_flat_array = np.concatenate(status_master_list) statuses, counts = np.unique(status_master_flat_array,return_counts=True) years_in_each_category = dict(zip(statuses, counts)) years_in_each_category.pop('NA') return years_in_each_category def replace_iucn_status_with_int(change_types,sum_years): iucn_code = {'LC':0, 'NT':1, 'VU':2, 'EN':3, 'CR':4} new_change_types = {} new_sum_years = {} for element in change_types: states = element.split('->') new_states = [] new_states.append(str(iucn_code[states[0]])) new_states.append(str(iucn_code[states[1]])) new_states = '->'.join(new_states) new_change_types.setdefault(new_states,change_types[element]) for status in sum_years: new_status = iucn_code[status] new_sum_years.setdefault(new_status,sum_years[status]) return new_change_types,new_sum_years def save_obj(obj, file_name): with open(file_name, 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) def load_obj(file_name): with open(file_name, 'rb') as f: return pickle.load(f) def qmatrix(rates, status_specific_p_e): Q_matrix = np.zeros((5,6)) iucn_code = {'LC':0, 'NT':1, 'VU':2, 'EN':3, 'CR':4} for row_id,change_type in enumerate(rates.index.values): states = change_type.split('->') if not 'DD' in states: original_state = iucn_code[states[0]] derived_state = iucn_code[states[1]] rate = rates[row_id] Q_matrix[original_state,derived_state] = rate Q_matrix[original_state,5] = status_specific_p_e[original_state] # This fills the diagonal line of the matrix with negative values, so that the line has a sum of 0 np.fill_diagonal(Q_matrix, -np.sum(Q_matrix,axis=1)) np.set_printoptions(precision=8) return Q_matrix def treat_dd_species(iucn_dataframe,change_type_dict,all_lc = False): new_df = iucn_dataframe.copy() #get the change frequencies from DD species to a real category and based on that draw new, valid categories for DD data dd_change_vector = np.zeros(5) status_index_dict = {'LC':0,'NT':1,'VU':2,'EN':3,'CR':4} for key in list(change_type_dict.keys()): if key.startswith('DD'): old_status,new_status = key.split('->') new_status_index = status_index_dict[new_status] new_value = change_type_dict[key] dd_change_vector[new_status_index] = new_value dd_change_probabilities_vector = dd_change_vector/sum(dd_change_vector) #extant_iucn_df.replace('DD', list(status_index_dict.keys())[list(status_index_dict.values()).index(random_choice_P(dd_change_vector)[1])]) dd_coords = np.where(new_df == 'DD') new_draws = np.random.choice(['LC','NT','VU','EN','CR'], size=len(dd_coords[0]), replace=True, p=dd_change_probabilities_vector) if all_lc: new_draws = np.array(['LC']*len(dd_coords[0])) # iucn_dataframe.iloc[dd_coords[0],dd_coords[1]] new_df = np.array(new_df) new_df[new_df=="DD"] = new_draws new_df = pd.DataFrame(new_df) new_df.columns = np.array(list(iucn_dataframe.columns)) return new_df def extract_valid_statuses(formatted_status_through_time_file): taxon_list = list(formatted_status_through_time_file.species.values) #valid_status_matrix = [list(line[line.isin(['NE','EX','EW','DD','CR','EN','VU','NT','LC'])].values) for it,line in formatted_status_through_time_file.iterrows()] df_array = formatted_status_through_time_file.values valid_status_matrix = [list(line[np.isin(line,['DD','EX','CR','EN','VU','NT','LC'])]) for line in df_array] # if taxon has no single valid status in IUCN history, model as NE at present valid_status_matrix = [['NE'] if len(i) == 0 else i for i in valid_status_matrix] valid_status_dict = dict(zip(taxon_list, valid_status_matrix)) current_status_list = [line[-1] for line in valid_status_matrix] most_recent_status_dict = dict(zip(taxon_list, current_status_list)) return valid_status_dict,most_recent_status_dict,current_status_list,taxon_list def random_choice_P(vector): # randomly sample an element of 'vector' based on their values, input needs to be float probDeath=vector/sum(vector) # (larger values have higher prob of being sampled) r=np.random.choice(probDeath, p=probDeath) ind=np.where(probDeath==r)[0][0] return [vector[ind], ind] def round_up(value): try: return math.ceil(float(value)) except ValueError: return value def simulate_extinction_and_status_change(delta_t,list_of_all_current_species_statuses,species_list,outdir,qmatrix_dict,status_change=False,dynamic_qmatrix=True,n_extinct_taxa=0): # write the species name and the index to a separate txt file # this is necessary because two years get lost when summarizing the simulated data into year bins #final_year = final_year+2 if not os.path.exists(outdir): os.makedirs(outdir) iucn_code = {'LC':0, 'NT':1, 'VU':2, 'EN':3, 'CR':4} species_cat_arg = [iucn_code[x] for x in list_of_all_current_species_statuses] species_cat = np.array(species_cat_arg)+0 current_state=species_cat root = -delta_t n_columns = abs(root) status_array = np.tile(np.array([current_state]).transpose(), (1, n_columns)) species_list_log = open("%s/species_list.txt" %outdir, "w") species_list_file=csv.writer(species_list_log, delimiter='\t') #print(q_matrix) extinction_time_list = [] for taxon in range(len(species_list)): species = species_list[taxon] if dynamic_qmatrix: q_matrix = qmatrix_dict[species].copy() else: q_matrix = qmatrix_dict #print(q_matrix) if not status_change: # if the status change is deactivated set all status shift rates in the q-matrix to 0 q_matrix[0:5,0:5] = 0.0 np.fill_diagonal(q_matrix, -np.sum(q_matrix,axis=1)) species_list_file.writerow([taxon,species]) current_state = int(status_array[taxon,0]) D = -np.diagonal(q_matrix) extinction = 'extant' #start at 1 so that the current status will always be protected/kept stable t=0 while t < abs(root): # draw randomely when the next change is going to happen, based on the negative inverse diagonal value of the q-matrix (corrected by scale) delta_t =np.random.exponential(1./(D[current_state])) # fill all columns between t and delta_t with the current_status (gets updated after this) status_array[taxon,np.arange(int(t),min(int(t+delta_t), abs(root)))] = current_state t = min(t+delta_t, abs(root)) # get the q_row of the current status containing all change rates Qrow = q_matrix[current_state]+0 # eliminate the negative value in the diagonal and set to 0 since we don't want to sample this one Qrow[Qrow<0]= 0 #=1-sum(Qrow[Qrow>0]) [rate, ind] = random_choice_P(Qrow) current_state = ind if ind == 5: # go extinct time_of_extinction = delta_t+t if time_of_extinction > abs(root): break else: status_array[taxon,np.arange(int(time_of_extinction), abs(root))] = 5 extinction = time_of_extinction break extinction_time_list.append(extinction) a = np.array(species_list) b = np.array(extinction_time_list) time_until_n_extinctions = None if n_extinct_taxa: ext_taxa = b[b!='extant'].astype(float) if len(ext_taxa) < n_extinct_taxa: print('\n\nWARNING: The --n_extinct_taxa flag is being ignored in this analysis.\n\nCan not find %i extinct taxa in this simulation replicate. You may have to increase the simulated time frame by adjusting the --n_years value.'%n_extinct_taxa) else: # select the n smallest values from the extinction times sorted_times = np.sort(ext_taxa) time_until_n_extinctions = np.max(sorted_times[:n_extinct_taxa]) extinction_array = np.stack((a, b)) list_ex = list(map(round_up, extinction_array[1])) extinctions_per_year = Counter(list_ex) extinctions_per_year = dict(extinctions_per_year) return status_array, extinction_array, extinctions_per_year, time_until_n_extinctions def get_dtt_array_from_extinction_per_year_dict(extinction_dict_sim_out,current_year,final_year): year_bins = np.arange(current_year,final_year+1) ext_count= [extinction_dict_sim_out[i] if i in extinction_dict_sim_out.keys() else 0 for i in year_bins] diversity_through_time = sum(extinction_dict_sim_out.values()) - np.cumsum(ext_count) diversity_through_time return diversity_through_time def run_multi_sim(n_rep, delta_t, species_list, input_status_list, dd_probs, qmatrix_dict, rate_index_list, outdir, all_lc=False, status_change=True, dynamic_qmatrix=True, n_extinct_taxa=0, save_future_status_array=False, save_status_through_time=False): iucn_code = {'LC':0, 'NT':1, 'VU':2, 'EN':3, 'CR':4} extinct_per_year_array = np.zeros([n_rep,delta_t+1]) te_array = np.zeros((len(species_list),n_rep+1)).astype(object) te_array[:,0]=species_list status_through_time_dict = {} status_through_time = np.zeros([6,delta_t+1,n_rep]) time_until_n_extinctions_list = [] all_future_status_arrays = [] for n in range(n_rep): sys.stdout.write('\rRunning simulation rep %i/%i' %(n+1,n_rep)) target_column = rate_index_list[n] # new modeling of DD species every rep current_status_list_new_dd = input_status_list.copy() dd_indices = np.where([current_status_list_new_dd=='DD'])[1] dd_prob_vector = dd_probs.T[target_column] if all_lc: new_draws = np.array(['LC']*len(dd_indices)) else: new_draws = np.random.choice(['LC','NT','VU','EN','CR'], size=len(dd_indices), replace=True, p=dd_prob_vector) current_status_list_new_dd[dd_indices] = new_draws # new modeling of NE species every rep status_count_dict = Counter(input_status_list) counts = np.array([status_count_dict[key] for key in status_count_dict.keys() if key not in ['DD','NE']]) ne_probs = counts/sum(counts) status_array_count = [key for key in status_count_dict.keys() if key not in ['DD','NE']] ne_indices = np.where([current_status_list_new_dd=='NE'])[1] if all_lc: new_draws = np.array(['LC']*len(ne_indices)) else: new_draws = np.random.choice(status_array_count, size=len(ne_indices), replace=True, p=ne_probs) current_status_list_new_dd[ne_indices] = new_draws # sample a different q-matrix for each rep, according to the rate_index_list input if dynamic_qmatrix: q_matrix_list_rep = [qmatrix_dict[species][target_column] for species in species_list] qmatrix_dict_rep = dict(zip(species_list,q_matrix_list_rep)) # the dynamic_qmatrix flag can be turned off to pass only a single q-matrix that will be used for all taxa and sim reps else: qmatrix_dict_rep = qmatrix_dict future_status_array, extinction_array, extinction_dict, time_until_n_extinctions = simulate_extinction_and_status_change(delta_t,current_status_list_new_dd,species_list,outdir,qmatrix_dict_rep,status_change=status_change,dynamic_qmatrix=dynamic_qmatrix,n_extinct_taxa=n_extinct_taxa) if save_future_status_array: # this might become a pretty big object cluddering up the memory all_future_status_arrays.append(future_status_array) time_until_n_extinctions_list.append(time_until_n_extinctions) # diversity_through time array for year in extinction_dict.keys(): if not year == 'extant': pos = int(year) extinct_per_year_array[n,pos] = extinction_dict[year] # status through time array year = 1 for column in future_status_array.T: the_dict = dict(Counter(list(column))) for status in range(6): key = '%s_%s'%(str(year),str(status)) status_through_time_dict.setdefault(key,[]) if status in the_dict.keys(): status_through_time_dict[key].append(the_dict[status]) else: status_through_time_dict[key].append(0) year += 1 status_list = [iucn_code[status] for status in current_status_list_new_dd] counts_present = dict(Counter(status_list)) for i in counts_present.keys(): status_through_time[i,0,n]=counts_present[i] # fill te_array for species in range(len(extinction_array[1])): extinction_date = '' if extinction_array[1][species] == 'extant': extinction_date = np.nan else: extinction_date = np.round(float(extinction_array[1][species]),2) te_array[species,n+1]=extinction_date # finish diversity through time array diversity_through_time = sum(extinction_dict.values()) - np.cumsum(extinct_per_year_array,axis=1) # finish status_through_time_array for key in status_through_time_dict: x = int(key.split('_')[1]) y = int(key.split('_')[0]) status_through_time[x,y,:] = status_through_time_dict[key] # write data-objects to output folder #with open(os.path.join(outdir,'diversity_through_time.pkl'), 'wb') as f: # pickle.dump(diversity_through_time, f, pickle.HIGHEST_PROTOCOL) #with open(os.path.join(outdir,'te_array.pkl'), 'wb') as f: # pickle.dump(te_array, f, pickle.HIGHEST_PROTOCOL) if save_status_through_time: with open(os.path.join(outdir,'status_through_time.pkl'), 'wb') as f: pickle.dump(status_through_time, f, pickle.HIGHEST_PROTOCOL) if save_future_status_array: with open(os.path.join(outdir,'future_status_array_list.pkl'), 'wb') as f: pickle.dump(all_future_status_arrays, f, pickle.HIGHEST_PROTOCOL) return diversity_through_time,te_array,status_through_time,time_until_n_extinctions_list def summarize_future_status_array(future_status_file): future_status_list = load_obj(future_status_file) future_status_list = np.array(future_status_list) status_probs = np.array([np.count_nonzero(future_status_list==i,axis=0)/future_status_list.shape[0] for i in np.arange(future_status_list.max()+1)]) most_probable_status_per_year = np.argmax(status_probs,axis=0)+1 return(status_probs,most_probable_status_per_year) def calcHPD(data, level): assert (0 < level < 1) d = list(data) d.sort() nData = len(data) nIn = int(round(level * nData)) if nIn < 2 : raise RuntimeError("not enough data") i = 0 r = d[i+nIn-1] - d[i] for k in range(len(d) - (nIn - 1)): rk = d[k+nIn-1] - d[k] if rk < r : r = rk i = k assert 0 <= i <= i+nIn-1 < len(d) return (d[i], d[i+nIn-1]) def write_r_scripts(output_folder,script_id = 'both'): script_1_content = """ library(rredlist) args = commandArgs(trailingOnly = TRUE) taxon_group = args[1] group_rank = args[2] iucn_key = args[3] outdir = args[4] exclude_extinct = FALSE # load all IUCN data data = c() for (i in seq(0, 20, 1)){ data = c(data,c(rl_sp(key=iucn_key,page = i))) } # get taxon list, class list and status list from data taxon_list = c() group_list = c() status_list = c() taxon_id = c() for (page in data){ if (length(page) > 1){ target_column = which(startsWith(colnames(page),group_rank)) taxon_list = c(taxon_list,page$scientific_name) group_list = c(group_list,page[,target_column]) status_list = c(status_list,page$category) taxon_id = c(taxon_id,page$taxonid) } } # exclude extinct taxa if needed if (exclude_extinct){ boolean = !grepl('EX|EW',status_list) taxon_list = taxon_list[boolean] group_list = group_list[boolean] status_list = status_list[boolean] taxon_id = taxon_id[boolean] } # remove all non-species level identifications boolean = !grepl('subsp.|ssp.|subpopulation|Subpopulation',taxon_list) taxon_list = taxon_list[boolean] group_list = group_list[boolean] status_list = status_list[boolean] taxon_id = taxon_id[boolean] # select target taxa selected_taxon_list = taxon_list[group_list==taxon_group] selected_ids = taxon_id[group_list==taxon_group] final_sorted_taxon_list = selected_taxon_list #final_taxon_list = as.data.frame(cbind(selected_taxon_list,selected_ids)) #final_sorted_taxon_list = final_taxon_list[order(final_taxon_list$selected_taxon_list),] write.table(sort(final_sorted_taxon_list),file=paste0(outdir,'/',taxon_group,"_species_list.txt"), quote=F,row.names=F,sep=' ',col.names = FALSE) # get historic data __________________________ # create new dataframe with species as first column historic_assessments = selected_taxon_list historic_assessments = as.data.frame(historic_assessments) colnames(historic_assessments) = c('species') # find historic assessments and fill into dataframe log_frequency = 1000 counter = 1 for (i in seq(1, length(selected_taxon_list), 1)){ species = selected_taxon_list[i] species_id = selected_ids[i] print(paste0('Downloading IUCN history: species ',counter, ' of ',length(selected_taxon_list))) #print(species) row_id = which(historic_assessments$species == species) hist_data <- rl_history(id=species_id,key=iucn_key) for (year in hist_data$result$year){ id = which(hist_data$result$year == year) #some species have multiple assignments for some years if (length(hist_data$result$code[id])>1){ historic_assessments[row_id,year] <- hist_data$result$code[id][1] } else{ historic_assessments[row_id,year] <- hist_data$result$code[id] } } if (counter %% log_frequency == 0){ write.table(historic_assessments,file=paste0(outdir,'/',taxon_group,"_iucn_history.txt"), quote=F,row.names=F,sep=' ') } counter = counter+1 } write.table(historic_assessments,file=paste0(outdir,'/',taxon_group,"_iucn_history.txt"), quote=F,row.names=F,sep=' ') #___________________________________ """ script_2_content = """ library(rredlist) args = commandArgs(trailingOnly = TRUE) species_list_file = args[1] iucn_key = args[2] outdir = args[3] data = read.csv(species_list_file,header = FALSE) species_list = data$V1 status_list = c() for (i in 1:length(species_list)){ species = as.character(species_list[i]) print(paste0('Extracting current status for ', species,' (',i,' of ',length(species_list),')')) iucn_info = rl_search(species,key = iucn_key) category = iucn_info$result$category if (is.null(category)){ category = NaN } status_list = c(status_list,category) } species_cat_df = cbind(as.character(species_list),status_list) write.table(species_cat_df,file=paste0(outdir,'/current_status_missing_species.txt'),quote=FALSE,sep = '\t',col.names = FALSE,row.names=FALSE) """ rscript_out = os.path.join(output_folder,'rscripts') if not os.path.exists(rscript_out): os.makedirs(rscript_out) if script_id == 'both': with open(os.path.join(rscript_out,'get_iucn_status_data_and_species_list.r'),'w') as file: file.write(script_1_content) file.close() with open(os.path.join(rscript_out,'get_current_iucn_status_missing_species.r'),'w') as file: file.write(script_2_content) file.close() elif script_id == 'history': with open(os.path.join(rscript_out,'get_iucn_status_data_and_species_list.r'),'w') as file: file.write(script_1_content) file.close() elif script_id == 'current': with open(os.path.join(rscript_out,'get_current_iucn_status_missing_species.r'),'w') as file: file.write(script_2_content) file.close()