import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.naive_bayes import GaussianNB from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, confusion_matrix, roc_curve, precision_recall_curve, auc # Read the data data = pd.read_csv("trafficfull.csv", sep=';') X = data.iloc[:, 0:-1] y = data.iloc[:, -1] # Plot the data fig, ax = plt.subplots() groups = data.groupby('HighTraffic') for name, group in groups: ax.plot(group.iloc[:, 0], group.iloc[:, 1], marker='o', linestyle='', label=name) ax.set_xlabel('Temperature') ax.set_ylabel('Wind') ax.legend() # Split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42) # Train the model model = GaussianNB() model.fit(X_train, y_train) # Evaluate the model y_pred = model.predict(X_test) y_pred_prob = model.predict_proba(X_test) print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) # Plot the Precision Recall Curve #precision, recall, thresholds = precision_recall_curve(y_test, y_pred_prob[:, 1]) #precision = np.insert(precision, 0, 0) #recall = np.insert(recall, 0, 1) #plt.plot(precision, recall, 'b') #plt.ylabel('Precision') #plt.xlabel('Recall') #plt.show() # Plot the ROC curve fpr, tpr, threshold = roc_curve(y_test, y_pred_prob[:, 1]) plt.figure() plt.plot(fpr, tpr, 'b') plt.plot([0, 1], [0, 1],'r--') plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() print(auc(fpr, tpr)) # print the AUC