import csv
import matplotlib.pyplot as plt

from datetime import datetime
from datetime import timedelta

# Define the path to the CSV file
#csv_file_path = r'c:\russia_losses_equipment.csv'
print(f"--------------------------------------")
print(f"Enter the file path of the Kaggle dataset CSV file.")
print(f"Download the dataset at https://www.kaggle.com/datasets/piterfm/2022-ukraine-russian-war")
csv_file_path = input("Write the filepath to CSV file: ")
print(f"Analyzing CSV file: {csv_file_path}")
print(f" ")

# Initial number of tanks
initial_tanks = 17500
print(f"Sources determine number of tanks before FEB 2022: {initial_tanks}")

# Number of monthly tank repair rate 
#tanks_monthly_repair_rate = 15
print(f"Set tanks monthly repair rate (integer between 8 and 23). Sources determine the repair rate somewhere from 8 to 23 repaired tanks per month")
tanksmonthly_repair_rate = int(input("Monthly repair rate: "))
if 8 <= tanksmonthly_repair_rate <= 23:
    tanks_monthly_repair_rate = tanksmonthly_repair_rate
else:
    tanks_monthly_repair_rate = 15

# Initialize the datetankdata array
datetankdata = []


# Read the CSV file and process it
with open(csv_file_path, newline='', encoding='utf-8') as csvfile:
    csvreader = csv.reader(csvfile)
    header = next(csvreader)  # Skip the header row
    
    # Iterate through each row in the CSV file
    for row in csvreader:
        date_str = row[0]  # Get the date from the first column
        tank_count = row[4]  # Get the tank value from the 4th column
        
        # Convert the date string to a datetime object for sorting
        date = datetime.strptime(date_str, '%Y-%m-%d')
        
        # Append the [date, tank] list to the datetankdata list
        # datetankdata [][0] is date
        # datetankdata [][1] is date in string format
        # datetankdata [][2] is number of daily sum of total tank losses
        # datetankdata [][3] is daily number of tank losses
        # datetankdata [][4] is monthly tank losses
        # datetankdata [][5] is number of days in month (dataset may contain less days for a month than calendar month)
        datetankdata.append([date, date_str, int(tank_count), int(tank_count), 0, 0])

# Sort the datetankdata by date (from oldest to most recent)
datetankdata.sort(key=lambda x: x[0])

# Initialize the monthly loss variable
monthly_loss = 0

# Initialize the variable for the number of days in the month
days_in_month = 1

total_tank_lost_for_known_dataset = 0
total_number_of_days_in_known_dataset = 1

# Calculate daily and monthly tank losses (difference between today and yesterday)
for i in range(1, len(datetankdata)):  # Start from the second row (index 1)
    daily_difference = datetankdata[i][2] - datetankdata[i - 1][2]  # Difference from previous day
    datetankdata[i][3] = daily_difference  # Assign the daily difference to the last column
    total_tank_lost_for_known_dataset = total_tank_lost_for_known_dataset + daily_difference
    total_number_of_days_in_known_dataset = total_number_of_days_in_known_dataset + 1
    
    # Check if the month is the same as the previous month
    if datetankdata[i][0].month == datetankdata[i - 1][0].month:
        # If in the same month, accumulate the monthly loss
        monthly_loss += daily_difference
        days_in_month = days_in_month + 1
    else:
        # If month changes, reset the monthly loss to the current day's difference
        monthly_loss = daily_difference
        days_in_month = 1
        
    
    # Assign the monthly loss to the new column
    datetankdata[i][4] = monthly_loss
    datetankdata[i][5] = days_in_month
    if datetankdata[i][0].month == datetankdata[i - 1][0].month:
        datetankdata[i-1][4] = 0
        datetankdata[i-1][5] = 0
            

    
datetankdata[0][3] = 0



# Fill the zero values of Days in Month (datetankdata[i][5]) with actual number of days in the dataset for the particular month

for i in range(len(datetankdata) - 1, 0, -1):  # Start from the last row
    # Check if the two consecutive rows belong to the same month
    if datetankdata[i][0].month == datetankdata[i - 1][0].month:
        # If the earlier row's monthly loss is zero, update it
        if datetankdata[i - 1][4] == 0:
            datetankdata[i - 1][4] = datetankdata[i][4]
            datetankdata[i - 1][5] = datetankdata[i][5]


# Calculating average daily rate of lost tanks 
average_daily_rate_of_lost_tanks = total_tank_lost_for_known_dataset // total_number_of_days_in_known_dataset


# Prepare data for plotting
# Create an empty list to hold date and tank count pairs
remaining_tanks = []

# Initialize the remaining tank count with the initial value
numberof_tanks = initial_tanks

# Loop through each entry in the datetankdata
for i in range(len(datetankdata)):
    # Get the current date
    current_date = datetankdata[i][0]
    
    # Subtract the daily loss from the remaining tank count, add 7 on the 15th day, and add 8 on the last day of the month
    if current_date == 15:  # Check if it's the 15th day of the month
        numberof_tanks -= datetankdata[i][3] + tanks_monthly_repair_rate //2
    elif current_date == days_in_month:  # Check if it's the last day of the month
        numberof_tanks -= datetankdata[i][3] + tanks_monthly_repair_rate // 2
    else:
        numberof_tanks -= datetankdata[i][3]

    # Append the current date and remaining tanks as a pair to the array
    remaining_tanks.append([current_date, numberof_tanks])

print(f"Daily rate of tanks lost: {average_daily_rate_of_lost_tanks}")
print(f"Remaining tanks at the end of known historical dataset timeframe: {numberof_tanks}")


# Make prediction data for dates outside of historical dataset
last_date = datetankdata[-1][0]  # Get the last date from the dataset
max_days = 365 * 1  # Set a limit of 10 years into the future to prevent overflow
days_predicted = 0  # Counter for the number of predicted days

while numberof_tanks >= 15:
    # Increment the last date by one day
    last_date += timedelta(days=1)  # New date to the dates list
    
    # Get the day of the month for the current date
    day_of_month = last_date.day
    # Get the last day of the current month
    days_in_month = (last_date.replace(day=28) + timedelta(days=4)).day - 3

    # Adjust the number of tanks based on the day of the month
    if day_of_month == 15:  # Check if it's the 15th day of the month
        numberof_tanks = numberof_tanks - average_daily_rate_of_lost_tanks + tanks_monthly_repair_rate // 2
    elif day_of_month == days_in_month:  # Check if it's the last day of the month
        numberof_tanks = numberof_tanks - average_daily_rate_of_lost_tanks + tanks_monthly_repair_rate // 2
    else:
        numberof_tanks = numberof_tanks - average_daily_rate_of_lost_tanks

    # Append the remaining tanks to the list
    remaining_tanks.append([last_date, numberof_tanks])

    # Increment the days predicted counter
    days_predicted += 1



print(f"Number of predicted days before tank stock depleted: {days_predicted}")

#for row in remaining_tanks:
#    print(f"Date: {row[0]}, Tanks: {row[1]}")

print(f"--------------------------------------")

# Extract dates and tank counts
dates = [row[0] for row in remaining_tanks]
tank_counts = [row[1] for row in remaining_tanks]

# Plotting the tank count decrease over time
plt.figure(figsize=(10, 6))

# Determine the split point
split_date = datetankdata[-1][0]

# Separate the data into two parts
dates_before = [date for date in dates if date <= split_date]
tank_counts_before = tank_counts[:len(dates_before)]

dates_after = [date for date in dates if date > split_date]
tank_counts_after = tank_counts[len(dates_before):]

# Plot the solid line for dates before or equal to the split date
plt.plot(dates_before, tank_counts_before, label='Remaining Tanks (Historical data)', color='tab:red', linestyle='-', marker='o')

# Plot the dotted line for dates after the split date
plt.plot(dates_after, tank_counts_after, label='Remaining Tanks (Prediction)', color='tab:grey', linestyle=':', marker='+')

# Add stock depleted line
plt.axhline(y=0, color='red', linestyle='--', label="Tank Stock Depleted")

# Add labels, title, and legend
plt.xlabel('Date')
plt.ylabel('Remaining Tanks')
plt.title('Tank Losses Over Time')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.legend()
plt.show()