import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm

from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

def data_simulation(sample_size, scale, period, variance):
    
    x = np.random.uniform(-scale, scale, sample_size)
    x.sort()
    noise = np.random.normal(0, variance, sample_size)
    y = x**1 * np.cos(x / period) + noise
    
    return x, y

def scatter_plot(x_train, x_test, y_train, y_test):
    plt.figure(figsize=(15,5))
    plt.plot(x_train, y_train, '.', color='black', markersize= 3, label='train')
    plt.plot(x_test, y_test, '.', color='red', markersize= 8, label='test')

    plt.xlabel('X')
    plt.ylabel('Y')
    leg = plt.legend(loc='lower center', fontsize='large')
    leg.get_frame().set_alpha(0)
    return plt
    
def plot_polynomial_curves(x_train, x_test, y_train, y_test, degree, scale):

    loss_train_stack, loss_test_stack = [], []
    color=cm.rainbow(np.linspace(0,1,len(degree)))
    plt = scatter_plot(x_train, x_test, y_train, y_test)

    for k,c in zip(range(len(degree)),color):
        coef = np.polyfit(x_train, y_train, degree[k])
        
        y_hat_train = np.polyval(coef, x_train)
        y_hat_test = np.polyval(coef, x_test)
       
        loss_train_stack.append(MSE(y_hat_train, y_train))
        loss_test_stack.append(MSE(y_hat_test, y_test))
        
        print('Polynomial degree: ', degree[k], ' | MSE train:', np.round(loss_train_stack[-1], 4), ' | MSE test:', np.round(loss_test_stack[-1], 4))
        x_draw = np.linspace(-scale, scale, num=200)
        y_draw = np.polyval(coef, x_draw)
        plt.plot(x_draw, y_draw, color=c, label=degree[k],)
        plt.ylim(min(min(y_train), min(y_test)), max(max(y_train), max(y_test)))
        #plt.plot(x_train, y_hat_train, color=c, label=degree[k],)
    
    leg = plt.gca().legend(loc='center left', bbox_to_anchor=(1, .65), title="Polynomial degree of  \n  the fitted curve \n")
    leg.get_frame().set_alpha(0)    

    
def plot_optimal_curve(optimal_train, optimal_test, H_train, H_test, optimal_degree):

    cmap = plt.get_cmap("tab10")   # Because I prefer this color map

    H = np.concatenate((optimal_train, optimal_test, H_train[:,2], H_test[:,2]), axis=0)
    mini, maxi = min(H), max(H)
    linewidth = 2
    fig, ax1 = plt.subplots(figsize=(15,5))
    ax2 = ax1.twinx()
    
    # Home made tricks for great legend
    ax1.plot([-20] * len(optimal_train),  label='Test', color='k', linewidth=1)
    ax1.plot([-20] * len(optimal_train), label='Training', linestyle='dashed', color='k', linewidth=1)
    ax1.plot([-20] * len(optimal_train), label='     ', linestyle='dashed', color='white')

    ax1.plot(H_train[:,2], color=cmap(1), linestyle='dashed', linewidth=linewidth)   # Since we are interested in the cubic curve
    ax1.plot(H_test[:,2], color=cmap(1), label='Cubic',linewidth=linewidth)
    
    ax1.plot(optimal_train, color=cmap(0), linestyle='dashed', linewidth=linewidth)   # Since we are 'also' interested in the optimal curve
    ax1.plot(optimal_test, color=cmap(0), label='Optimal Capacity', linewidth=linewidth)

    ax2.plot(optimal_degree, color=cmap(2), label='Optimial degree', linewidth=linewidth)   # Optimal degree with respect to the sample size

    plt.xticks([0, 1, 2, 3, 4, 5])

    ax1.set_xlabel('Sample size:  $\ \log_{10}(n) - 1$')
    ax1.set_ylabel('MSE')
    ax1.set_ylim(mini-50, maxi+100)
    ax2.set_ylabel('Degree of the polynomial', fontsize=12, color = 'green')
    leg1 = ax1.legend(loc='center left', bbox_to_anchor=(1.1, .8))   # Legend location is somehow important to me
    leg1.get_frame().set_alpha(0)   # Legend without frame > legend with frame imo
    leg2 = ax2.legend(loc='center left', bbox_to_anchor=(1.1, .8))   # Legend location is somehow important to me
    leg2.get_frame().set_alpha(0)   # Legend without frame > legend with frame imo


def train_poly_and_see(sample_size, scale, period, variance, degree):
    H_train = np.zeros((len(sample_size), len(degree)))
    H_test = np.zeros((len(sample_size), len(degree)))

    optimal_train, optimal_test, optimal_degree = [], [], []

    i = 0
    for n in sample_size:

        x_train, y_train = data_simulation(n, scale, period, variance)
        x_test, y_test = data_simulation(1000, scale, period, variance)

        j = 0
        for k in degree:
            coef = np.polyfit(x_train, y_train, k)

            y_hat_train = np.polyval(coef, x_train)
            y_hat_test = np.polyval(coef, x_test)

            H_train[i, j] = MSE(y_train, y_hat_train)
            H_test[i, j] = MSE(y_test, y_hat_test)
            j += 1

        optimal_degree.append(degree[np.argmin(H_test[i, :])])
        optimal_train.append(H_train[i, np.argmin(H_test[i, :])])
        optimal_test.append(H_test[i, np.argmin(H_test[i, :])]) 
        i +=1
    
    return H_train, H_test, optimal_train, optimal_test, optimal_degree
    
def MSE(a, b):
    return ((a-b)**2).mean()