{ "metadata": { "name": "", "signature": "sha256:8f2ab6fc66b30e50ac6d8c111699dab1e3580fdf87071476699891b0f2545d30" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Linear Regression using Pandas" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Import what you need\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 84 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Load the data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Set the data\n", "data = pd.DataFrame ({\n", "'length' : [94,74,147,58,86,94,63,86,69,72,128,85,82,86,88,72,74,61,90,89,68,76,114,90,78],\n", "'weight' : [130,51,640,28,80,110,33,90,36,38,366,84,80,83,70,61,54,44,106,84,39,42,197,102,57]\n", "})\n", "\n", "# create another data frame of log values\n", "data_log = np.log(data)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 85 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Build the models" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# ========================\n", "# Model for Original Data\n", "# ========================\n", "\n", "# Get the linear models\n", "lm_original = np.polyfit(data.length, data.weight, 1)\n", "\n", "# calculate the y values based on the co-efficients from the model\n", "r_x, r_y = zip(*((i, i*lm_original[0] + lm_original[1]) for i in data.length))\n", "\n", "# Put in to a data frame, to keep is all nice\n", "lm_original_plot = pd.DataFrame({\n", "'length' : r_x,\n", "'weight' : r_y\n", "})\n", "\n", "# ========================\n", "# Model for Log Data\n", "# ========================\n", "\n", "# Get the linear models\n", "lm_log = np.polyfit(data_log.length, data_log.weight, 1)\n", "\n", "# calculate the y values based on the co-efficients from the model\n", "r_x, r_y = zip(*((i, i*lm_log[0] + lm_log[1]) for i in data_log.length))\n", "\n", "# Put in to a data frame, to keep is all nice\n", "lm_log_plot = pd.DataFrame({\n", "'length' : r_x,\n", "'weight' : r_y\n", "})\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 86 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Plot the models" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# ========================\n", "# Plot the data\n", "# ========================\n", "fig, axes = plt.subplots(nrows=1, ncols=2)\n", "\n", "# Plot the original data and model\n", "data.plot(kind='scatter', color='Blue', x='length', y='weight', ax=axes[0], title='Original Values')\n", "lm_original_plot.plot(kind='line', color='Red', x='length', y='weight', ax=axes[0])\n", "\n", "# Plot the log transformed data and model\n", "data_log.plot(kind='scatter', color='Blue', x='length', y='weight', ax=axes[1], title='Log Values')\n", "lm_log_plot.plot(kind='line', color='Red', x='length', y='weight', ax=axes[1])\n", "\n", "plt.show()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 87 } ], "metadata": {} } ] }