'''
Several no-fuss methods for creating plots
'''
from typing import Dict, Optional, Callable, Tuple, Union, List
from numpy import exp
import numpy
from numpy.core.fromnumeric import repeat, shape
import pandas
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as graph_objects

# Set the default theme
template =  graph_objects.layout.Template()
template.layout = graph_objects.Layout(
                                    title_x=0.5,
                                    # border width and size
                                    margin=dict(l=2, r=2, b=2, t=30),
                                    height=400,
                                    # Interaction
                                    hovermode="closest",
                                    # axes
                                    xaxis_showline=True,
                                    xaxis_linewidth=2,
                                    yaxis_showline=True,
                                    yaxis_linewidth=2,
                                    # Pick a slightly different P.O.V from default
                                    # this avoids the extremities of the y and x axes
                                    # being cropped off
                                    scene_camera=dict(eye=dict(x=1.5, y=1.5, z=0.1))
                                    )

template.data.scatter = [graph_objects.Scatter(marker=dict(opacity=0.8))]
template.data.scatter3d = [graph_objects.Scatter3d(marker=dict(opacity=0.8))]
template.data.surface = [graph_objects.Surface()]
template.data.histogram = [graph_objects.Histogram(marker=dict(line=dict(width=1)))]
template.data.box = [graph_objects.Box(boxpoints='outliers', notched=False)]


pio.templates["custom_template"] = template
pio.templates.default = "plotly_white+custom_template"

# Trendline colors
# Take note that the text for this course often refers to colours explicitly
# such as "looking at the red line". Changing the variable below may result 
# in this text being inconsistent
colours_trendline = px.colors.qualitative.Set1  

def _to_human_readable(text:str):
    '''
    Converts a label into a human readable form
    '''
    return text.replace("_", " ")


def _prepare_labels(df:pandas.DataFrame, labels:List[Optional[str]], replace_nones:bool=True):
    '''
    Ensures labels are human readable.
    Automatically picks data if labels not provided explicitly
    '''

    human_readable = {}

    if isinstance(replace_nones, bool):
        replace_nones = [replace_nones] * len(labels)

    for i in range(len(labels)):
        lab = labels[i]
        if replace_nones[i] and (lab is None):
            lab = df.columns[i]
            labels[i] = lab

        # make human-readable
        if lab is not None:
            human_readable[lab] = _to_human_readable(lab)

    return labels, human_readable


def box_and_whisker(df:pandas.DataFrame,
                label_x:Optional[str]=None,
                label_y:Optional[str]=None,
                label_x2:Optional[str]=None,
                title=None,
                show:bool=False):
    '''
    Creates a box and whisker plot and optionally shows it. Returns the figure for that plot.

    Note that if calling this from jupyter notebooks and not capturing the output
    it will appear on screen as though `.show()` has been called

    df: The data
    label_x: What to group by. Defaults to None
    label_y: What to plot on the y axis. Defaults to count of df.columns[0]
    label_x2: If provided, splits boxplots into 2+ per x value, each with its own colour
    title: Plot title
    show:   appears on screen. NB that this is not needed if this is called from a
            notebook and the output is not captured

    '''

    # Automatically pick columns if not specified
    selected_columns, axis_labels = _prepare_labels(df, [label_x, label_y, label_x2], replace_nones=[False, True, False])

    fig = px.box(df,
                    x=selected_columns[0],
                    y=selected_columns[1],
                    color=label_x2,
                    labels=axis_labels,
                    title=title)

    # Show the plot, if requested
    if show:
        fig.show()

    # return the figure
    return fig


def histogram(df:pandas.DataFrame,
                label_x:Optional[str]=None,
                label_y:Optional[str]=None,
                label_colour:Optional[str]=None,
                nbins:Optional[int]=None,
                title=None,
                include_boxplot=False,
                histfunc:Optional[str]=None,
                show:bool=False):
    '''
    Creates a 2D histogram and optionally shows it. Returns the figure for that histogram.

    Note that if calling this from jupyter notebooks and not capturing the output
    it will appear on screen as though `.show()` has been called

    df: The data
    label_x: What to bin by. Defaults to df.columns[0]
    label_y: If provided, the sum of these numbers becomes the y axis. Defaults to count of label_x
    label_colour: If provided, creates a stacked histogram, splitting each bar by this column
    title: Plot title
    nbins: the number of bins to show. None for automatic
    histfunc: How to calculate y. See plotly for options
    show:   appears on screen. NB that this is not needed if this is called from a
            notebook and the output is not captured

    '''

    # Automatically pick columns if not specified
    selected_columns, axis_labels = _prepare_labels(df, [label_x, label_y, label_colour], replace_nones=[True, False, False])


    fig = px.histogram(df,
                        x=selected_columns[0],
                        y=selected_columns[1],
                        nbins=nbins,
                        color=label_colour,
                        labels=axis_labels,
                        title=title,
                        marginal="box" if include_boxplot else None,
                        histfunc=histfunc
                        )

    # Set the boxplot notches to False by default to deal with plotting bug
    # But only call this line if the user wants to include a boxplot
    if include_boxplot:
        fig.data[1].notched = False

    # Show the plot, if requested
    if show:
        fig.show()

    # return the figure
    return fig


def multiple_histogram(df:pandas.DataFrame,
                label_x:str,
                label_group:str,
                label_y:Optional[str]=None,
                histfunc:str='count',
                nbins:Optional[int]=None,
                title=None,
                show:bool=False):
    '''
    Creates a 2D histogram and optionally shows it. Returns the figure for that histogram.

    Note that if calling this from jupyter notebooks and not capturing the output
    it will appear on screen as though `.show()` has been called

    df: The data
    label_x: What to bin by. Defaults to df.columns[0]
    label_y: If provided, the sum of these numbers becomes the y axis. Defaults to count of label_x
    title: Plot title
    nbins: the number of bins to show. None for automatic
    show:   appears on screen. NB that this is not needed if this is called from a
            notebook and the output is not captured

    '''

    assert (histfunc != 'count') or (label_y == None), "Set histfunc to a value such as sum or avg if using label_y"

    # Automatically pick columns if not specified
    selected_columns, axis_labels = _prepare_labels(df,  [label_x, label_y, label_group], replace_nones=[True, False, False])

    fig = graph_objects.Figure(layout=dict(
                                    title=title,
                                    xaxis_title_text=axis_labels[label_x],
                                    yaxis_title_text=histfunc if label_y is None else (histfunc + " of " + axis_labels[label_y]))
                                )

    group_values = sorted(set(df[label_group]))

    for group_value in group_values:
        dat = df[df[label_group] == group_value]
        x = dat[selected_columns[0]]

        if label_y is None:
            y = None
        else:
            y = dat[selected_columns[1]]

        fig.add_trace(graph_objects.Histogram(
            x=x,
            y=y,
            histfunc=histfunc,
            name=group_value, # name used in legend and hover labels
            nbinsx=nbins))

    #Place legend title
    fig.update_layout(legend_title_text=label_group)

    # Show the plot, if requested
    if show:
        fig.show()

    # return the figure
    return fig


def line_2D(
                trendline:Union[Tuple[str,Callable],List[Tuple[str,Callable]], Dict[str,List[float]]],
                x_range:List[float]=[0,1],
                label_x:str='x',
                label_y:str='y',
                legend_title:str="Line",
                title=None,
                show:bool=False):
    '''
    Creates a 2D line plot *using functions* and optionally shows it. Returns the figure for that plot.
    If you simply want a line plot using data, call scatter_2D then write fig.update_traces(mode='lines')

    Note that if calling this from jupyter notebooks and not capturing the output
    it will appear on screen as though `.show()` has been called

    trendline:  (name, function) tuples. The functions accept X (a numpy array) and return Y (an iterable). Alternatively a dict of pre-calculated values
    x_range:    Sets the x-axis range. If this has more than three values, it is interpeted as each x-value to be graphed
    label_x:    The title for the x-axis
    label_y:    The title for the y-axis
    legend_title: The title for the legend
    title:      The plot title. If None and a single function is provided, the title is automatically set. Use "" to avoid
    show:   appears on screen. NB that this is not needed if this is called from a
            notebook and the output is not captured 

    '''

    if isinstance(trendline, tuple):
        trendline = [trendline]

    x = numpy.array([])
    y = numpy.array([])

    if len(x_range) == 2:
        x_vals = numpy.linspace(x_range[0], x_range[1], num=200)
    else:
        # X-range is interpreted as x_vals
        x_vals = numpy.array(x_range)
        x_vals.sort()

        # Rewrite x_range to actually be an x-axis range
        x_range = [x_vals[0], x_vals[-1]]

    names = []

    if isinstance(trendline, dict):
        for cur in trendline.items():
            name = cur[0]
            x = numpy.concatenate([x, x_vals])
            names = names + ([name] * len(x_vals))
            y = numpy.concatenate([y, cur[1]])
    else:
        for cur in trendline:
            name = cur[0]
            x = numpy.concatenate([x, x_vals])
            names = names + ([name] * len(x_vals))
            y = numpy.concatenate([y, cur[1](x=x_vals)])
    
    data = dict()
    data[label_x] = x
    data[label_y] = y
    data[legend_title] = names

    df = pandas.DataFrame(data)

    # Pick a title if none provided and we only have one function
    if (title is None) and (len(trendline) == 1):
        title = trendline[0][0]

    # Create as a 2d scatter but with lines
    fig = scatter_2D(df, label_colour=legend_title, title=title, show=False, x_range=x_range)
    fig.update_traces(mode='lines')

    # Don't show a legend if we only have one function plotted
    if len(trendline) == 1:
        fig.update_layout(showlegend=False)

    if show:
        fig.show()

    return fig


def scatter_2D(df:pandas.DataFrame,
                label_x:Optional[str]=None,
                label_y:Optional[str]=None,
                label_colour:Optional[str]=None,
                label_size:Optional[str]=None,
                size_multiplier:float=1,
                title=None,
                show:bool=False,
                x_range:Optional[List[float]]=None,
                trendline:Union[Callable,List[Callable],None]=None):
    '''
    Creates a 2D scatter plot and optionally shows it. Returns the figure for that scatter.

    Note that if calling this from jupyter notebooks and not capturing the output
    it will appear on screen as though `.show()` has been called

    df: The data
    label_x: The label to extract from df to plot on the x axis. Defaults to df.columns[0]
    label_y: The label to extract from df to plot on the y axis. Defaults to df.columns[1]
    label_colour: The label to extract from df to colour points by
    title: Plot title
    show:   appears on screen. NB that this is not needed if this is called from a
            notebook and the output is not captured 
    x_range:    Overrides the x-axis range
    trendline:  A function that accepts X (a numpy array) and returns Y (an iterable)

    '''

    # Automatically pick columns if not specified
    selected_columns, axis_labels = _prepare_labels(df, [label_x, label_y, label_colour], [True, True, False])


    # Create the figure and plot
    fig = px.scatter(df,
                x=selected_columns[0],
                y=selected_columns[1],
                color=selected_columns[2],
                labels=axis_labels,
                hover_data=[label_size],
                title=title
                )

    if label_size is None:
        # User a marker size inversely proportional to the number of points
        size = int((round(22.0 - 19/(1+exp(-(df.shape[0]/100-2)))) * size_multiplier))
    else:
        # Set the size based on a label
        size = df[label_size]*size_multiplier

    fig.update_traces(marker={'size': size})

    if x_range is not None:
        fig.update_xaxes(range=[x_range[0], x_range[1]])

    # Create trendlines
    if trendline is not None:
        if isinstance(trendline, Callable):
            trendline = [trendline]
        x_min = min(df[selected_columns[0]]) if x_range is None else x_range[0]
        x_max = max(df[selected_columns[0]]) if x_range is None else x_range[1]
        evaluate_for = numpy.linspace(x_min, x_max, num=200)
        shapes = []
        for t,colour in zip(trendline,colours_trendline):
            y_vals = t(evaluate_for)
            path = "M" + " L ".join([str(c[0]) + " " + str(c[1]) for c in zip(evaluate_for,y_vals)])
            shapes.append(dict(
                                type="path",
                                path=path,
                                line_color=colour,
                            )
                        )

        fig.update_layout(shapes=shapes)

    # Show the plot, if requested
    if show:
        fig.show()

    # return the figure
    return fig


def scatter_3D(df:pandas.DataFrame,
                label_x:Optional[str]=None,
                label_y:Optional[str]=None,
                label_z:Optional[str]=None,
                label_colour:Optional[str]=None,
                title=None,
                show:bool=False):
    '''
    Creates a 3D scatter plot and optionally shows it. Returns the figure for that scatter.

    Note that if calling this from jupyter notebooks and not capturing the output
    it will appear on screen as though `.show()` has been called

    df: The data
    label_x: The label to extract from df to plot on the x axis. Defaults to df.columns[0]
    label_y: The label to extract from df to plot on the y axis. Defaults to df.columns[1]
    label_z: The label to extract from df to plot on the z axis. Defaults to df.columns[2]
    label_colour: The label to extract from df to colour points by. Defaults to label_x
    title: Plot title
    show:   appears on screen. NB that this is not needed if this is called from a
            notebook and the output is not captured
    '''

    # Automatically pick columns if not specified
    selected_columns, axis_labels = _prepare_labels(df, [label_x, label_y, label_z])

    if label_colour is None:
        # Colour by the Z dimension
        label_colour = selected_columns[2]
    else:
        axis_labels[label_colour] = _to_human_readable(label_colour)

    # Create the figure and plot
    fig = px.scatter_3d(df,
                x=selected_columns[0],
                y=selected_columns[1],
                z=selected_columns[2],
                color=label_colour,
                labels=axis_labels,
                title=title)


    # Show the plot, if requested
    if show:
        fig.show()

    # return the figure
    return fig


def surface(x_values,
            y_values,
            calc_z:Callable,
            title=None,
            axis_title_x:Optional[str]=None,
            axis_title_y:Optional[str]=None,
            axis_title_z:Optional[str]=None,
            show:bool=False):
    '''
    Creates a surface plot using a function. Returns the figure for that plot.

    Note that if calling this from jupyter notebooks and not capturing the output
    it will appear on screen as though `.show()` has been called

    x_value: A numpy array of x values
    y_value: A numpy array of y values
    calc_z: A function to calculate z, given an x and a y value
    title: Plot title
    axis_title_x: Title for the x axis
    axis_title_y: Title for the y axis
    axis_title_z: Title for the z axis
    show:   appears on screen. NB that this is not needed if this is called from a
            notebook and the output is not captured
    '''

    # Check arguments
    assert len(x_values.shape) == 1, "Provide x_values as 1D"
    assert len(y_values.shape) == 1, "Provide y_values as 1D"


    # Calculate z for a range of x and y inputs
    # Note that z seems to be expected to be indexed [y,x] not [x,y] though this appears to
    # be counter to the documentation. If z is indexed [x, y] the result is flipped.
    # Potentially there is a bug here somewhere causing this issue or in plotly itself
    z = numpy.zeros((y_values.shape[0], x_values.shape[0]))
    for i_x in range(x_values.shape[0]):
        for i_y in range(y_values.shape[0]):
            z[i_y, i_x] = calc_z(x_values[i_x], y_values[i_y])
            
    # Create a graph of cost
    fig = graph_objects.Figure(data=[graph_objects.Surface(x=x_values, y=y_values, z=z)])
    fig.update_layout(title=title,
                      scene_xaxis_title=axis_title_x,
                      scene_yaxis_title=axis_title_y,
                      scene_zaxis_title=axis_title_z)

    #Add z-axis as colourbar title
    fig.update_traces(colorbar_title_text= axis_title_z, selector=dict(type='surface'))

    # Show the plot, if requested
    if show:
        fig.show()

    # return the figure
    return fig


def model_to_surface_plot(model, plot_features:List[str], data:pandas.DataFrame):
    '''Plots two features of a model as a surface. Other values are set at their means
    
    model:          A model that accepts a dataframe for prediction
    plot_features:  Two features to plot
    data:           A dataframe the model was trained or tested on
    '''

    # Give status as this can take several seconds to run
    print("Creating plot...")

    
    other_features = [f for f in data.columns if f not in plot_features]

    means = numpy.average(data[other_features], axis=0)
    mins = numpy.min(data[plot_features], axis=0)
    maxes = numpy.max(data[plot_features], axis=0)

    df = pandas.DataFrame()

    for f,m in zip(other_features, means):
        df[f] = [m]

    def predict(x, y):
        '''
        Makes a prediction using the model
        '''
        df[plot_features[0]] = [x]
        df[plot_features[1]] = [y]

        return model.predict(df)

    # Create a 3d plot of predictions
    x_vals = numpy.array(numpy.linspace(mins[plot_features[0]], maxes[plot_features[0]],20))
    y_vals = numpy.array(numpy.linspace(mins[plot_features[1]], maxes[plot_features[1]],20))

    return surface(x_vals, 
                    y_vals, 
                    predict, 
                    title="Model Prediction", 
                    axis_title_x=plot_features[0], 
                    axis_title_y=plot_features[1], 
                    axis_title_z="Probability")


def save_plot_as_image(fig, file="./plot.jpg", width=None, height="400", scale=1, format="jpg"):
    """
    Convert a figure to a static image and write it to a file or writeable object
    If "width" not set, plotly will set the aspect ration based on "hight"

    Parameters  

        fig – Figure object or dict representing a figure
        file (str or writeable) – A string representing a local file path or a writeable object (e.g. an open file descriptor)
        format (str or None) – The desired image format:

                ’png’
                ’jpg’ or ‘jpeg’
                ’webp’
                ’svg’
                ’pdf’
                ’eps’ (Requires the poppler library to be installed and on the PATH)

        width (int or None) – The width of the exported image in layout pixels. 
        height (int or None) – The height of the exported image in layout pixels. 

        scale (int or float or None) – The scale factor to use when exporting the figure. 
        A scale factor larger than 1.0 will increase the image resolution with respect to the 
        figure’s layout pixel dimensions. Whereas as scale factor of less than 1.0 will decrease 
        the image resolution.
    """
    pio.write_image(fig, 
                    file=file, 
                    width=width, 
                    height=height, 
                    scale=scale,
                    format=format, 
                    engine="kaleido",
                    )