{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualizing Linear Regression Model in Python" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize a Simple Linear Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Goal: visualzie the relationship between the topsoil lead concentration (`lead` column, as y-axis) and the topsoil cadmium concentration (`cadmium` column, as x-axis). " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/lizhoufan/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:10: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n", " # Remove the CWD from sys.path while we load stuff.\n" ] } ], "source": [ "### Previous steps necessary\n", "# import packages\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.linear_model import LinearRegression\n", "# import dataset\n", "data = pd.read_csv(\"meuse.csv\")\n", "# build the model\n", "regression_model = LinearRegression()\n", "lr = LinearRegression().fit(data.cadmium.reshape((-1, 1)), data.lead)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We use the matplotlib package to visualize:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/lizhoufan/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n", " \n", "/Users/lizhoufan/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# reference: https://becominghuman.ai/implementing-and-visualizing-linear-regression-in-python-with-scikit-learn-a073768dc688\n", "import matplotlib.pyplot as plt\n", "plt.scatter(data.cadmium.reshape((-1, 1)),data.lead, color = \"red\")\n", "plt.plot(data.cadmium.reshape((-1, 1)), lr.predict(data.cadmium.reshape((-1, 1))), color = \"green\")\n", "plt.title(\"Lead vs Cadmium\")\n", "plt.xlabel(\"Cadmium\")\n", "plt.ylabel(\"Lead\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Please follow the next post on how do we analyze the model." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }