import numpy as np
import pandas as pd
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score

# Read the data
data = pd.read_csv("loans.csv", sep=';')
columns = data.columns.values
X = data.iloc[:, 0:2]
y = data.iloc[:, 2]

# Split to training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
traindata = pd.DataFrame(data=np.c_[X_train, y_train], columns=columns)

# Plot data
fig, ax = plt.subplots()
groups = traindata.groupby(columns[2])
for name, group in groups:
    ax.plot(group.iloc[:, 0], group.iloc[:, 1], marker='o', linestyle='', label=name)
ax.set_xlabel(columns[0])
ax.set_ylabel(columns[1])
ax.legend()

# Create meshgrid
x0_min, x0_max = X.iloc[:, 0].min() - 1, X.iloc[:, 0].max() + 1
x1_min, x1_max = X.iloc[:, 1].min() - 1, X.iloc[:, 1].max() + 1
x0, x1 = np.meshgrid(np.arange(x0_min, x0_max, 0.02), np.arange(x1_min, x1_max, 0.02))

# RBF kernel with gamma 1
model = SVC(kernel='rbf', gamma=1, C=1.0)
model.fit(X_train, y_train)
Z = model.predict(np.c_[x0.ravel(), x1.ravel()])
Z = Z.reshape(x0.shape)
CS = ax.contour(x0, x1, Z, colors=['blue'])
labels = ax.clabel(CS, fmt="gamma=1")

# RBF kernel with gamma 0.01
model = SVC(kernel='rbf', gamma=0.01, C=1.0)
model.fit(X_train, y_train)
Z = model.predict(np.c_[x0.ravel(), x1.ravel()])
Z = Z.reshape(x0.shape)
CS = ax.contour(x0, x1, Z, colors=['red'])
labels = ax.clabel(CS, fmt="gamma=0.01")

# RBF kernel with gamma 100
model = SVC(kernel='rbf', gamma=100, C=1.0)
model.fit(X_train, y_train)
Z = model.predict(np.c_[x0.ravel(), x1.ravel()])
Z = Z.reshape(x0.shape)
CS = ax.contour(x0, x1, Z, colors=['gray'])
labels = ax.clabel(CS, fmt="gamma=100")

# Find training and testing error
gammavalues = [0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000]
trainingerror, testingerror = [], []
for gamma in gammavalues:
    model = SVC(kernel='rbf', gamma=gamma, C=1.0)
    model.fit(X_train, y_train)
    trainingerror.append(1 - accuracy_score(y_train, model.predict(X_train)))
    testingerror.append(1 - accuracy_score(y_test, model.predict(X_test)))

# Plot training and testing error
fig, ax = plt.subplots()
ax.plot(trainingerror, label="Training Error")
ax.plot(testingerror, label="Testing Error")
ax.set_xticks(range(len(gammavalues)))
ax.set_xticklabels(gammavalues)
ax.set_xlabel("gamma")
ax.legend()

# Find best gamma using cross validation
accuracies = []
for gamma in gammavalues:
    model = SVC(kernel='rbf', gamma=gamma, C=1.0)
    scores = cross_val_score(model, X_train, y_train, cv=10)
    accuracies.append(np.mean(scores))

# Plot accuracy vs gamma
fig, ax = plt.subplots()
ax.plot(accuracies)
ax.set_xticks(range(len(gammavalues)))
ax.set_xticklabels(gammavalues)
ax.set_xlabel("gamma")
ax.set_ylabel("Accuracy")

plt.show()