import numpy as np import pandas as pd from sklearn import datasets import matplotlib.pyplot as plt from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import classification_report from sklearn.model_selection import train_test_split from sklearn.feature_selection import SelectKBest, mutual_info_classif # Read the data iris = datasets.load_iris() data = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] + ['target']) # Split the data trainingdata, testdata = train_test_split(data, test_size=0.30, random_state=2) X_train, y_train = trainingdata.iloc[:, 0:4], trainingdata.iloc[:, 4] X_test, y_test = testdata.iloc[:, 0:4], testdata.iloc[:, 4] # Apply classifier model = DecisionTreeClassifier() model.fit(X_train, y_train) y_pred = model.predict(X_test) print(classification_report(y_test, y_pred)) # Keep 3 out of 4 features fselector = SelectKBest(mutual_info_classif, k=3) fselector.fit(X_train, y_train) # and apply classifier model = DecisionTreeClassifier() model.fit(fselector.transform(X_train), y_train) y_pred = model.predict(fselector.transform(X_test)) print(classification_report(y_test, y_pred)) # Keep 2 out of 4 features fselector = SelectKBest(mutual_info_classif, k=2) fselector.fit(X_train, y_train) # and apply classifier model = DecisionTreeClassifier() model.fit(fselector.transform(X_train), y_train) y_pred = model.predict(fselector.transform(X_test)) print(classification_report(y_test, y_pred)) # Keep 1 out of 4 features fselector = SelectKBest(mutual_info_classif, k=2) fselector.fit(X_train, y_train) # and apply classifier model = DecisionTreeClassifier() model.fit(fselector.transform(X_train), y_train) y_pred = model.predict(fselector.transform(X_test)) print(classification_report(y_test, y_pred))