import os
import sys

import pandas as pd
import numpy as np
#import xarray as xr

sys.path.append(os.path.join(os.path.dirname(__file__), "../", "src"))

from cscm_calibrate.set_up_calibration_configs_and_run import define_scendata_for_scm, get_df_from_input_w_data_handler

#cscm_root = "../ciceroscm/"
cscm_root = "/div/no-backup-nac/users/masan/GRAFITE/ciceroscm/"
# cscm_root = "/div/no-backup/git-repos/ciceroscm/"
# Adding location of source code to system path
# os.path.dirname(__file__) gives the directory of
# current file. Put in updated path if running script from elsewhere
# os.path joins all the folders of a path together in a
# system independent way (i.e. will work equally well on Windows, linux etc)
sys.path.append(f"{cscm_root}src")
sys.path.append(f"{cscm_root}venv/lib/python3.11/site-packages/")
print(sys.path)

from ciceroscm.parallel.distributionrun import DistributionRun

special_scen_skip = ["1pctCO2-bgc", "1pctCO2-rad"]#, "scen7-LC", "scen7-HLC", "scen7-MLC", "scen7-LNC", "scen7-MC", "esm-scen7-L", "esm-scen7-HL", "esm-scen7-ML", "esm-scen7-LN", "esm-scen7-M"]
special_mapping = {"hist":"historical", "hist-cmip6": "historical-cmip6"}

outpath_main = "out_file_dump_nopattern_noefficacy"

def check_if_inspected(scenario_name):
    if not os.path.exists("scenarios_inspected.txt"):
        return False
    with open("scenarios_inspected.txt", "r") as f:
        lines = f.readlines()
    for line in lines:
        if line.startswith(scenario_name + ","):
            return True
    return False

def make_dataframe_of_zeros(varname, start_year, end_year):
    years = np.arange(start_year, end_year + 1)
    data = np.zeros(len(years))
    df = pd.DataFrame(data=data, index=years, columns=[varname])
    df.index.name="year"
    return df

# split each subframe by Scenario prefix groups
def _split_by_scenario_prefix(subdf):
    s = subdf['Scenario'].astype(str).str.lower().str.strip()
    mask_esm_all = s.str.startswith('esm-allGHG')
    mask_esm = s.str.startswith('esm-') & ~mask_esm_all
    mask_other = ~s.str.startswith('esm-')
    return subdf[mask_esm_all].copy(), subdf[mask_esm].copy(), subdf[mask_other].copy()


def load_and_process_protocol(excel_file_path):
    # Load the protocol CSV file into a DataFrame
    df = pd.read_excel(excel_file_path, sheet_name="scenario_info", 
                                skiprows=[1, 2], usecols=[1, 2, 5, 6])
    #print(df.head())
    #print(df.shape)
    # normalise Type values and split into two DataFrames
    _type_norm = df['Type'].astype(str).str.lower().str.replace(r'[\s\-_]+', '', regex=True)
    df_idealised = df[_type_norm.isin(['idealised', 'idealized'])].copy()
    df_non_idealised = df[_type_norm.isin(['nonidealised', 'nonidealized', 'nonideal'])].copy()
    #print(f"idealised: {df_idealised.shape}, non-idealised: {df_non_idealised.shape}")

    df_idealised_esm_all, df_idealised_esm_other, df_idealised_other = _split_by_scenario_prefix(df_idealised)
    df_non_idealised_esm_all, df_non_idealised_esm_other, df_non_idealised_other = _split_by_scenario_prefix(df_non_idealised)

    #print("idealised      :", df_idealised.shape, "-> esm_all:", df_idealised_esm_all.shape, "esm_other:", df_idealised_esm_other.shape, "other:", df_idealised_other.shape)
    #print("non-idealised  :", df_non_idealised.shape, "-> esm_all:", df_non_idealised_esm_all.shape, "esm_other:", df_non_idealised_esm_other.shape, "other:", df_non_idealised_other.shape)
    return {
        'idealised': {
            'esm_all': df_idealised_esm_all,
            'esm_other': df_idealised_esm_other,
            'other': df_idealised_other,
        },
        'non_idealised': {
            'esm_all': df_non_idealised_esm_all,
            'esm_other': df_non_idealised_esm_other,
            'other': df_non_idealised_other,
        }
    }

def read_output_variables_from_protocol(excel_file_path):
    df = pd.read_excel(excel_file_path, sheet_name="variable_definitions", usecols=[2,6], header=0)
    #print(df.head())
    #print(df[(df["Report when"] == "Always")])
    variables_all = df[(df["Report when"] == "Always")].iloc[:,0].copy().tolist() + ["Atmospheric Concentrations|CO2"]
    variables_non_idealised  = variables_all + df[(df["Report when"] == "Non-idealised")].iloc[:,0].copy().tolist()
    variables_non_idealised_emi = variables_non_idealised + df[(df["Report when"] == "Non-idealised and if emissions-driven")].iloc[:,0].copy().tolist()
    variables_non_idealised_conc = variables_non_idealised + ["Emissions|CO2"]
    return variables_all, variables_non_idealised, variables_non_idealised_emi, variables_non_idealised_conc

ystart = 1750
yendmax= 2500
emistart = 1850
#input_dir = "/home/masan/temp/rcmip_inputs_cscm/"
input_dir = "/div/no-backup-nac/users/masan/GRAFITE/temp_indata/"
gases_ep = "gases_vupdate_2024_WMO_added_new.txt"
gases_df = get_df_from_input_w_data_handler(
    os.path.join(input_dir, gases_ep), 
    input_dir, 
    gases_ep, 
    case_type="gaspam"
    )
fallback_emissions = get_df_from_input_w_data_handler(
    None, 
    input_dir, 
    f"ssp245_em_{gases_ep}", 
    nyend=yendmax, nystart=ystart, case_type="emis"
    )
fallback_concentrations = get_df_from_input_w_data_handler(
    None, 
    input_dir, 
    f"historical_conc_{gases_ep}", 
    nyend=yendmax, nystart=ystart, case_type="conc"
    )
em_piControl = get_df_from_input_w_data_handler(
    None, 
    input_dir, 
    f"esm-piControl_em_{gases_ep}", 
    nyend=yendmax, nystart=ystart, case_type="emis"
    )
conc_piControl = get_df_from_input_w_data_handler(
    None, 
    input_dir, 
    f"piControl_conc_{gases_ep}", 
    nyend=yendmax, nystart=ystart, case_type="conc"
    )
lucalbedo_piControl = os.path.join(input_dir,"LUCalbedo_RCMIP_constant_zero_RCMIP3.txt") 

# Not system independent, can only be run on amoc or qbo
# If running on different system, this must be changed
# to where you have ssp input files

#distrorun = DistributionRun(None, json_file_name="/div/no-backup/users/masan/SCM_stuff/subset_cscm_configfile_for_py.json")
#distrorun = DistributionRun(None, json_file_name="/div/no-backup/users/masan/SCM_stuff/subset_cscm_configfile_for_py.json")
#distrorun = DistributionRun(None, json_file_name="/div/no-backup/git-repos/ciceroscm/scripts/calib_output_2.json")
#distrorun = DistributionRun(None, json_file_name="draw_samples_500.json")
#distrorun = DistributionRun(None, json_file_name="/div/no-backup-nac/users/masan/GRAFITE/cscm-calibrate/src/cscm_calibrate/data/draw_samples_500.json")
#distrorun = DistributionRun(None, json_file_name="/div/no-backup-nac/users/masan/GRAFITE/cscm-calibrate/src/cscm_calibrate/data/draw_samples_500_w_ecs.json")
#distrorun = DistributionRun(None, json_file_name="/div/no-backup-nac/users/masan/GRAFITE/cscm-calibrate/output/draw_samples_500.json")
#json_file = "../../flat10_runs_repo/draw_samples_just2.json"
#json_file= "/div/no-backup-nac/users/masan/GRAFITE/cscm-calibrate/output/draw_samples_500.json"
json_file = "/div/no-backup-nac/users/masan/GRAFITE/cscm-calibrate/draw_samples_archive/draw_samples_no_efficacy_no_pattern_wide_lambda_400.json"
#json_file = "offending_member.json"
distrorun = DistributionRun(None, json_file_name=json_file)
#distrorun = DistributionRun(None, json_file_name="/div/no-backup/users/masan/SCM_stuff/subset_cscm_configfile_for_py_small.json")

def make_scenariodata_argdict(row, run_type, scen_name, scen_name_strip, yend):
    print(run_type)
    if scen_name.startswith("esm-allGHG"):
        print("Hello")
        emistart = 1850
    else:
        emistart = yend
    arg_dict = {
        "gaspam" : gases_df,
        "nyend": yend,
        "nystart": ystart,
        "emstart": emistart,
    }

    # Deal with natural forcings:
    if row["Type"] == "idealised" or scen_name.endswith("piControl"):
        arg_dict["sunvolc"] = 0
        arg_dict["rf_luc_file"] = lucalbedo_piControl
        arg_dict["df_nat_ch4"] = make_dataframe_of_zeros("CH4", ystart, yend+1)
        arg_dict["df_nat_n2o"] = make_dataframe_of_zeros("N2O", ystart, yend+1)
    else:
        arg_dict["sunvolc"] = 1
        if os.path.exists(os.path.join(input_dir, f"solar_RCMIP_{scen_name_strip}_RCMIP3.txt")):
            arg_dict["rf_solar_file"] = f"solar_RCMIP_{scen_name_strip}_RCMIP3.txt"
        elif os.path.exists(os.path.join(input_dir, f"solar_RCMIP_{scen_name_strip.split('-')[0]}_RCMIP3.txt")):
            arg_dict["rf_solar_file"] = f"solar_RCMIP_{scen_name_strip.split('-')[0]}_RCMIP3.txt"
        elif scen_name.startswith("methanemip") and os.path.exists(os.path.join(input_dir, f"solar_RCMIP_ssp245_RCMIP3.txt")):
            arg_dict["rf_solar_file"] = f"solar_RCMIP_ssp245_RCMIP3.txt"
        else:
            # TODO check if this is appropriate in all cases
            arg_dict["rf_solar_file"] = "solar_RCMIP_historical_RCMIP3.txt"
        if os.path.exists(os.path.join(input_dir, f"VOLC_RCMIP_{scen_name_strip}_RCMIP3.txt")):
            arg_dict["rf_volc_file"] = f"VOLC_RCMIP_{scen_name_strip}_RCMIP3.txt"
        elif os.path.exists(os.path.join(input_dir, f"VOLC_RCMIP_{scen_name_strip.split('-')[0]}_RCMIP3.txt")):
            arg_dict["rf_volc_file"] = f"VOLC_RCMIP_{scen_name_strip.split('-')[0]}_RCMIP3.txt"
        elif scen_name.startswith("methanemip") and os.path.exists(os.path.join(input_dir, f"VOLC_RCMIP_ssp245_RCMIP3.txt")):
            arg_dict["rf_volc_file"] = f"VOLC_RCMIP_ssp245_RCMIP3.txt"
        else:
            # TODO check if this is appropriate in all cases
            arg_dict["rf_volc_file"] = "VOLC_RCMIP_historical_RCMIP3.txt"
        if os.path.exists(os.path.join(input_dir, f"LUCalbedo_RCMIP_{scen_name_strip}_RCMIP3.txt")):
            arg_dict["rf_luc_file"] = f"LUCalbedo_RCMIP_{scen_name_strip}_RCMIP3.txt"
        elif os.path.exists(os.path.join(input_dir, f"LUCalbedo_RCMIP_{scen_name_strip.split('-')[0]}_RCMIP3.txt")):
            arg_dict["rf_luc_file"] = f"LUCalbedo_RCMIP_{scen_name_strip.split('-')[0]}_RCMIP3.txt"
        elif scen_name.startswith("methanemip") and os.path.exists(os.path.join(input_dir, f"LUCalbedo_RCMIP_ssp245_RCMIP3.txt")):
            arg_dict["rf_luc_file"] = f"LUCalbedo_RCMIP_ssp245_RCMIP3.txt"
        elif scen_name == "esm-allGHG-scen7-H-CH4L_rcmip_draw_samples_500.csv":
            arg_dict["rf_luc_file"] = "LUCalbedo_RCMIP_scen7-H_RCMIP3.txt"
        elif scen_name == "esm-allGHG-scen7-L-CH4H_rcmip_draw_samples_500.csv":
            arg_dict["rf_luc_file"] = "LUCalbedo_RCMIP_scen7-L_RCMIP3.txt"
        else:
            # TODO check if this is appropriate in all cases
            arg_dict["rf_luc_file"] = "LUCalbedo_RCMIP_historical_RCMIP3.txt"
    
    # Pick correct emissions file:
    if os.path.exists(os.path.join(input_dir, f"{scen_name}_em_{gases_ep}")):
        arg_dict["df_emis"] = f"{scen_name}_em_{gases_ep}"
    elif os.path.exists(os.path.join(input_dir, f"{scen_name_strip}_em_{gases_ep}")):
        arg_dict["df_emis"] = f"{scen_name_strip}_em_{gases_ep}"
    elif scen_name.startswith("esm-allGHG") and os.path.exists(os.path.join(input_dir, f"esm-{scen_name_strip}_em_{gases_ep}")):
        arg_dict["df_emis"] = f"esm-{scen_name_strip}_em_{gases_ep}"
    elif scen_name_strip in special_mapping and os.path.exists(os.path.join(input_dir, f"{special_mapping[scen_name_strip]}_em_{gases_ep}")):
        print("Went and got correct emissions")
        arg_dict["df_emis"] = f"{special_mapping[scen_name_strip]}_em_{gases_ep}"
    elif not scen_name.startswith("esm-") and row["Type"] == "idealised":
        arg_dict["df_emis"] = em_piControl
    elif os.path.exists(os.path.join(input_dir, f"esm-{scen_name_strip}_em_{gases_ep}")):
        print("Picked emissions file based on esm- prefix")
        arg_dict["df_emis"] = f"esm-{scen_name_strip}_em_{gases_ep}"
    elif scen_name != scen_name_strip:
        print(f"Emissions file missing for scenario {scen_name}")
        sys.exit(4)
    else:
        # TODO check if this is ok...
        print("Went and got fallback emissions")
        arg_dict["df_emis"] = fallback_emissions
    #print(arg_dict["df_emis"])

    # Pick correct concentrations file:
    if os.path.exists(os.path.join(input_dir, f"{scen_name}_conc_{gases_ep}")):
        arg_dict["df_conc"] = f"{scen_name}_conc_{gases_ep}"
    elif os.path.exists(os.path.join(input_dir, f"{scen_name_strip}_conc_{gases_ep}")):
        arg_dict["df_conc"] = f"{scen_name_strip}_conc_{gases_ep}"
    elif scen_name_strip in special_mapping and os.path.exists(os.path.join(input_dir, f"{special_mapping[scen_name_strip]}_conc_{gases_ep}")):
        print("Went and got correct concentrations")
        arg_dict["df_conc"] = f"{special_mapping[scen_name_strip]}_conc_{gases_ep}"
    elif scen_name.startswith("esm-") and row["Type"] == "idealised":
        arg_dict["df_conc"] = conc_piControl
    elif scen_name.startswith("methanemip") and os.path.exists(os.path.join(input_dir, f"ssp245_conc_{gases_ep}")):
        arg_dict["df_conc"] = f"ssp245_conc_{gases_ep}"
    elif not scen_name.startswith("esm-allGHG"):
        print(f"Concentration file missing for scenario {scen_name}")
        sys.exit(4)
    # Need to set this to correct version
    else:
        arg_dict["df_conc"] = fallback_concentrations
    return arg_dict


def take_scenario_row_define_scendata_and_run(row, run_type, variables=None, dont_run=False):
    print(row)
    yend = row["Duration of scenario"] + ystart - 1
    scen_name = row["Scenario"]
    scen_name_strip = scen_name.split("esm-")[-1].split("allGHG-")[-1]
    if scen_name_strip.startswith("scen7") and scen_name_strip.endswith("C"):
        scen_name_strip = scen_name_strip[:-1]
    arg_dict = make_scenariodata_argdict(row, run_type, scen_name, scen_name_strip, yend)
    # print(arg_dict.keys())
    # print(arg_dict["rf_luc_file"])
    #sys.exit(4)
    # TODO: Deal with natural emissions of ch4 and n2
    # Run conc, esm or esm-allghg
    #print(arg_dict)
    scendata = define_scendata_for_scm(input_dir,**arg_dict)
    scendata[0]["scenname"] = scen_name

    if not scen_name.startswith("esm") and not scen_name.startswith("methanemip"):
        scendata[0]["conc_run"] = True
    if dont_run:
        with open("scenarios_inspected.txt", "a") as f:
            f.write(f"{scen_name}, {yend}, {emistart}, {ystart}, {scendata[0]['conc_run']}, {len(scendata)}\n")
        if scen_name == "esm-hist":
            print(row)
            print(scen_name)
            print(scen_name_strip)
            print(arg_dict)
            sys.exit(4)
        return pd.DataFrame()
    
    # if row["Type"] == "idealised" or "piControl" in scen_name:
    #     return pd.DataFrame()


    #print(variables)
    # print(scen_name)
    # #print(row["Type"])
    # print(scendata[0]["emstart"])
    # print(scendata)
    #sys.exit(4)
    #try:
    results = distrorun.run_over_distribution(
        scendata, output_vars=variables, max_workers=20
    )
    # except Exception as e:
    #     print(f"Error running scenario {scen_name}: {e}")
    #     del scendata
    #     return pd.DataFrame()
    # Figure out what variables to output
    # Run over with distrorun, save outputs and move on

    # Special hook for branch runs etc...

    #
    del scendata
    return results

dont_run = False

def dump_results_to_netcdf(results, scenario_name):
    outpath = f"{outpath_main}/{scenario_name}_rcmip_{json_file.split('/')[-1].split('.')[0]}.csv"
    if os.path.exists(outpath):
        print(f"Output file {outpath} already exists, skipping dump")
        return
    results.to_csv(outpath)

if __name__ == "__main__":
    #input_dir = "/div/no-backup-nac/users/masan/GRAFITE/temp_indata"
    #protocol_file = os.path.join("..", "..","rcmip-phase-3/RCMIP3_input_datafiles/", "rcmip_phase3_protocol_v1.1.0.xlsx")
    protocol_file = os.path.join(input_dir, "rcmip_phase3_protocol_v1.1.6.xlsx")
    variables_all, variables_non_idealised, variables_non_idealised_emi, variables_non_idealised_conc =  read_output_variables_from_protocol(protocol_file)
    print(variables_all)
    print(variables_non_idealised_emi)
    experiment_out = {
        "idealised":{
            "esm_all": variables_all,
            "esm_other": variables_all,
            "other": variables_all + ["Emissions|CO2"],
        },
        "non_idealised":{
            "esm_all": variables_non_idealised_emi,
            "esm_other": variables_non_idealised,
            "other": variables_non_idealised_conc,
        }
    }
    skip = []
    skip = ["esm-1pct-brch-1000PgC", "esm-1pct-brch-2000PgC", "esm-1pct-brch-750PgC"]
    split_experiment_dfs = load_and_process_protocol(protocol_file)
    print(split_experiment_dfs)
    #sys.exit(4)
    for key1 in split_experiment_dfs:
        for key2 in split_experiment_dfs[key1]:
            print(f"{key1} - {key2} : {split_experiment_dfs[key1][key2].shape}")
            print("---------------------------------------------------------")
            for index,row in split_experiment_dfs[key1][key2].iterrows():
                print(row["Scenario"])
                outpath = f"{outpath_main}/{row['Scenario']}_rcmip_{json_file.split('/')[-1].split('.')[0]}.csv"
                outpath_processed = f"{outpath_main}/{row['Scenario']}_rcmip_ciceroscm_20260401.csv"
                if os.path.exists(outpath) or os.path.exists(outpath_processed):
                    print(f"Output file {outpath} already exists, skipping run")
                    continue
                print(outpath)
                #sys.exit(4)
                # if key1 == "idealised":
                #     print(f"Skipping scenario {row['Scenario']} as it's idealised")
                #     continue
                if row["Scenario"] in skip:
                    print(f"Skipping scenario {row['Scenario']} as it's in the skip list")
                    continue
                print(row["Scenario"])
                print(special_scen_skip)
                if row["Scenario"] in special_scen_skip:
                    print(f"Skipping special scenario {row['Scenario']}")
                    continue
                if dont_run:
                    if check_if_inspected(row["Scenario"]):
                        print(f"Skipping already inspected scenario {row['Scenario']}")
                        continue
                if row["Duration of scenario"] == "Variable":
                    continue
                print(row["Scenario"])
                results = take_scenario_row_define_scendata_and_run(row, key2, variables=experiment_out[key1][key2], dont_run=dont_run)
                #print(results.head())
                #sys.exit(4)
                if not results.empty:
                    results.to_csv(f"{outpath_main}/{row['Scenario']}_rcmip_{json_file.split('/')[-1].split('.')[0]}.csv")
                del results
        # #sys.exit(4)
    print("Done all scenarios")
    sys.exit(4)