#!/usr/bin/env python3

import pandas as pd
import requests
from urllib.parse import quote_plus
import os


def read_csv(file_path):
    """Read a CSV file and return the DataFrame."""
    return pd.read_csv(file_path)


def get_unique_values(df, column_name):
    """Get unique values from a specified DataFrame column."""
    return df[column_name].unique()


def fetch_api_data(unique_values):
    """Fetch data from the WoRMS API for a list of unique scientific names."""
    base_url = 'https://www.marinespecies.org/rest/AphiaRecordsByMatchNames?scientificnames%5B%5D='
    api_results = []

    for each in unique_values:
        try:
            url_sp = quote_plus(each)
            url = f'{base_url}{url_sp}&marine_only=true'
            print("Next api query to run:")
            print(url)

            response = requests.get(url)
            response.raise_for_status()  # Raise an error for bad responses
            data = response.json()

            for i in data:
                for y in i:
                    y['PI_entered_name'] = each  # Add the scientific name to the result
                    api_results.append(y)

        except requests.exceptions.HTTPError as e:
            print(f"HTTP error: {e}")
            api_results.append({'PI_entered_name': each, 'error_message': 'HTTP error'})
        except Exception as e:
            print(f"Error fetching data: {e}. Likely this means no close match was found.")
            api_results.append({'PI_entered_name': each, 'error_message': 'Error fetching data'})

    return api_results


def save_results_to_csv(results, output_file):
    """Save the results to a CSV file."""
    resulting_df = pd.DataFrame(results)
    resulting_df.to_csv(output_file, index=False)  # Set index=False to avoid writing row numbers


def merge_dataframes(original_df, api_results, column_name):
    """Merge the original DataFrame with API results based on PI_entered_name."""
    results_df = pd.DataFrame(api_results)

    # Specify columns to keep after merging
    merge_columns = [
        'PI_entered_name', 'AphiaID', 'scientificname',
        'status', 'rank', 'valid_name', 'lsid', 'match_type'
    ]

    # Perform the merge
    merged_df = original_df.merge(results_df[merge_columns],
                                  left_on=column_name,
                                  right_on='PI_entered_name',
                                  how='left')
    return merged_df


def main():
    """Main function to orchestrate the reading, fetching, and saving processes."""
    # Step 1: Get user input for the original file name, output file name, and column name
    original_file = input("Enter the original CSV file name (with extension). The file path is relative from where this program is run: ")
    output_file_name = input("Enter the new file name to save (without an extension)(all files will be created with suffix '_deduplicated_worms_taxa_results': ")


    # Create the output directory if it doesn't exist
    #output_dir = 'output_archive'
    #os.makedirs(output_dir, exist_ok=True)

    # Step 2: Read the input CSV
    df = read_csv(original_file)

    # Step 3: Loop to ensure a valid column name is provided
    while True:
        column_name = input("Enter the name of the column that contains the scientific names: ")
        if column_name in df.columns:
            break  # Exit the loop if the column name is valid
        else:
            print(f"Column '{column_name}' not found. Please try again. (make sure capitalization pattern is accurate.)")

    # Step 4: Get unique values from the specified column
    unique_values = get_unique_values(df, column_name)
    print(f"Unique values in column '{column_name}':\n{unique_values}")

    # Step 5: Fetch data from the API
    api_results = fetch_api_data(unique_values)

    # Step 6: Define the output file path
    output_file = os.path.join(os.path.dirname(original_file), f'{output_file_name}_deduplicated_worms_taxa_results.csv')
    output_file_name = str(os.path.join(os.path.dirname(original_file), f'{output_file_name}_deduplicated_worms_taxa_results.csv'))

    # Step 7: Save the results to a new CSV
    save_results_to_csv(api_results, output_file)
    print(output_file_name + " was saved in the same folder as the input file (relative path = " + os.path.dirname(original_file) + ")")

    # Step 8: Ask the user if they want to merge results onto the original DataFrame
    merge_choice = input("Do you want to merge key results with the original data columns? (yes/no): ").strip().lower()

    if merge_choice == 'yes':
        merged_output_name = input("What do you want to call the merged output file (without extension)? ")
        merged_output_name = str(merged_output_name + "_merged_worms_bcodmo.csv")
        merged_df = merge_dataframes(df, api_results, column_name)

        # Define the merged output file path
        merged_output_file = os.path.join(os.path.dirname(original_file), f'{merged_output_name}')
        merged_df.to_csv(merged_output_file, index=False)  # Save the merged DataFrame
        #print(merged_output_file'Merged results saved to:', merged_output_file)
    else:
        print("ok, bye")

if __name__ == "__main__":
    main()