import pydotplus
import numpy as np
import pandas as pd
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split  
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import classification_report, confusion_matrix

# Read the data
iris = datasets.load_iris()
data = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['target'])

# Plot the data
fig, ax = plt.subplots()
groups = data.groupby('target')
for name, group in groups:
    ax.plot(group.iloc[:, 0], group.iloc[:, 1], marker='o', linestyle='', label=name)
ax.set_xlabel('Sepal length')
ax.set_ylabel('Sepal width')
ax.legend(iris['target_names'])

# Split the data
X = data.iloc[:, 0:2]
y = data.iloc[:, 4]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Train the model
model = DecisionTreeClassifier(criterion="gini", min_samples_split=2, min_samples_leaf=1)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)  
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names = iris['target_names']))

# Plot the tree
dot_data = export_graphviz(model, out_file=None, feature_names=X_train.columns.values, \
                           proportion=True, rounded=True)
graph = pydotplus.graph_from_dot_data(dot_data)  

# Visualize using IPython
#from IPython.display import Image, display
#display(Image(graph.create_png()))
# Visualize using matplotlib
from io import BytesIO
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
plt.figure()
img = mpimg.imread(BytesIO(graph.create_png()))
imgplot = plt.imshow(img,  aspect='equal')
plt.axis('off')
plt.show()