import numpy as np
import pandas as pd
from math import sqrt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split  
from sklearn.metrics import mean_squared_error, r2_score

# Read the data
data = pd.read_csv("salary.csv", sep=';')
X = data.iloc[:, 0:-1]
y = data.iloc[:, -1]

# Plot the data
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(X.iloc[:, 0], X.iloc[:, 1], y)
ax.set_xlabel(data.columns.values[0])
ax.set_ylabel(data.columns.values[1])
ax.set_zlabel(data.columns.values[2])
plt.show()

# Split to training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Train the model
model = KNeighborsRegressor(n_neighbors=4)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)  
print(sqrt(mean_squared_error(y_test, y_pred)))
print(r2_score(y_test, y_pred))