{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n# Comparing random forests and the multi-output meta estimator\n\nAn example to compare multi-output regression with random forest and\nthe `multioutput.MultiOutputRegressor ` meta-estimator.\n\nThis example illustrates the use of the\n`multioutput.MultiOutputRegressor ` meta-estimator\nto perform multi-output regression. A random forest regressor is used,\nwhich supports multi-output regression natively, so the results can be\ncompared.\n\nThe random forest regressor will only ever predict values within the\nrange of observations or closer to zero for each of the targets. As a\nresult the predictions are biased towards the centre of the circle.\n\nUsing a single underlying feature the model learns both the\nx and y coordinate as output.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Authors: The scikit-learn developers\n# SPDX-License-Identifier: BSD-3-Clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.multioutput import MultiOutputRegressor\n\n# Create a random dataset\nrng = np.random.RandomState(1)\nX = np.sort(200 * rng.rand(600, 1) - 100, axis=0)\ny = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\ny += 0.5 - rng.rand(*y.shape)\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, train_size=400, test_size=200, random_state=4\n)\n\nmax_depth = 30\nregr_multirf = MultiOutputRegressor(\n RandomForestRegressor(n_estimators=100, max_depth=max_depth, random_state=0)\n)\nregr_multirf.fit(X_train, y_train)\n\nregr_rf = RandomForestRegressor(n_estimators=100, max_depth=max_depth, random_state=2)\nregr_rf.fit(X_train, y_train)\n\n# Predict on new data\ny_multirf = regr_multirf.predict(X_test)\ny_rf = regr_rf.predict(X_test)\n\n# Plot the results\nplt.figure()\ns = 50\na = 0.4\nplt.scatter(\n y_test[:, 0],\n y_test[:, 1],\n edgecolor=\"k\",\n c=\"navy\",\n s=s,\n marker=\"s\",\n alpha=a,\n label=\"Data\",\n)\nplt.scatter(\n y_multirf[:, 0],\n y_multirf[:, 1],\n edgecolor=\"k\",\n c=\"cornflowerblue\",\n s=s,\n alpha=a,\n label=\"Multi RF score=%.2f\" % regr_multirf.score(X_test, y_test),\n)\nplt.scatter(\n y_rf[:, 0],\n y_rf[:, 1],\n edgecolor=\"k\",\n c=\"c\",\n s=s,\n marker=\"^\",\n alpha=a,\n label=\"RF score=%.2f\" % regr_rf.score(X_test, y_test),\n)\nplt.xlim([-6, 6])\nplt.ylim([-6, 6])\nplt.xlabel(\"target 1\")\nplt.ylabel(\"target 2\")\nplt.title(\"Comparing random forests and the multi-output meta estimator\")\nplt.legend()\nplt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.21" } }, "nbformat": 4, "nbformat_minor": 0 }