""" =================================================================== Multi-output Decision Tree Regression =================================================================== An example to illustrate multi-output regression with decision tree. The :ref:`decision trees ` is used to predict simultaneously the noisy x and y observations of a circle given a single underlying feature. As a result, it learns local linear regressions approximating the circle. We can see that if the maximum depth of the tree (controlled by the `max_depth` parameter) is set too high, the decision trees learn too fine details of the training data and learn from the noise, i.e. they overfit. """ print(__doc__) import numpy as np # Create a random dataset rng = np.random.RandomState(1) X = np.sort(200 * rng.rand(100, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T y[::5, :] += (0.5 - rng.rand(20, 2)) # Fit regression model from sklearn.tree import DecisionTreeRegressor clf_1 = DecisionTreeRegressor(max_depth=2) clf_2 = DecisionTreeRegressor(max_depth=5) clf_3 = DecisionTreeRegressor(max_depth=8) clf_1.fit(X, y) clf_2.fit(X, y) clf_3.fit(X, y) # Predict X_test = np.arange(-100.0, 100.0, 0.01)[:, np.newaxis] y_1 = clf_1.predict(X_test) y_2 = clf_2.predict(X_test) y_3 = clf_3.predict(X_test) # Plot the results import pylab as pl pl.figure() pl.scatter(y[:, 0], y[:, 1], c="k", label="data") pl.scatter(y_1[:, 0], y_1[:, 1], c="g", label="max_depth=2") pl.scatter(y_2[:, 0], y_2[:, 1], c="r", label="max_depth=5") pl.scatter(y_3[:, 0], y_3[:, 1], c="b", label="max_depth=8") pl.xlim([-6, 6]) pl.ylim([-6, 6]) pl.xlabel("data") pl.ylabel("target") pl.title("Multi-output Decision Tree Regression") pl.legend() pl.show()