import pandas as pd
from scipy.stats import chi2
import plotly
import plotly.graph_objs as go


def kruskal_wallis_test(df, column1_name, column2_name, alpha):

	k = df[column2_name].nunique()

	N = df[column2_name].count()

	df['rank'] = df[column1_name].rank(ascending=True)
	table1 = df.groupby(column2_name).sum()
	table2 = df.groupby(column2_name).count()

	sub_component = 0
	for i in range(0, k):
		sub_component = (table1.iloc[i, 1] ** 2 / table2.iloc[i, 1]) + sub_component

	test_statistic = ((12 / (N * (N + 1))) * sub_component) - 3 * (N + 1)

	degrees_of_freedom = k - 1
	a = alpha / 100
	chi_critical_value = round(chi2.isf(q=a, df=degrees_of_freedom), 2)
	p_value = chi2.sf(test_statistic, degrees_of_freedom)

	print('\n Rejection Criteria: Reject null hypothesis at', alpha, '% level of significance '
			'if Test Statistic is greater than or equal ',chi_critical_value, '.')

	table = {'Variable': column1_name + ' grouped by ' + column2_name, 'Test Statistic': round(test_statistic,4),
			   'Critical Value': chi_critical_value,'P value': p_value}

	trace = go.Table(
		header=dict(values=list(table.keys()),
					fill=dict(color='#C2D4FF'),
					align=['left'] * 5),
		cells=dict(values=list(table.values()),
				   fill=dict(color='#F5F8FF'),
				   align=['left'] * 5))

	data = [trace]

	plotly.offline.plot({'data': data}, filename='Table.html')

"""Applying Kruskal Wallis Test for Tensile Strength data set"""

data = pd.read_csv('Tensile Strength.csv')
kruskal_wallis_test(data, column1_name='Tensile Strength', column2_name='Mixing Technique', alpha=5)