{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "import models\n", "import orca\n", "import pandas as pd\n", "pd.options.mode.chained_assignment = None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hedonic Estimation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running model 'rsh_estimate'\n", "Filling column non_residential_rent with value 0 (142400 values)\n", "Filling column residential_units with value 0 (0 values)\n", "Filling column year_built with value 1927.0 (3116 values)\n", "Filling column residential_sales_price with value 0 (14196 values)\n", "Filling column non_residential_sqft with value 0 (1341 values)\n", "Filling column building_type_id with value 2.0 (0 values)\n", " OLS Regression Results \n", "=============================================================================================\n", "Dep. Variable: np.log1p(residential_sales_price) R-squared: 0.399\n", "Model: OLS Adj. R-squared: 0.399\n", "Method: Least Squares F-statistic: 1.240e+04\n", "Date: Tue, 28 Apr 2015 Prob (F-statistic): 0.00\n", "Time: 11:32:43 Log-Likelihood: -2.5241e+05\n", "No. Observations: 149409 AIC: 5.048e+05\n", "Df Residuals: 149400 BIC: 5.049e+05\n", "Df Model: 8 \n", "Covariance Type: nonrobust \n", "================================================================================================\n", " coef std err t P>|t| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------------------------\n", "Intercept -3.8638 0.170 -22.751 0.000 -4.197 -3.531\n", "I(year_built < 1940)[T.True] 0.0012 0.007 0.160 0.873 -0.013 0.016\n", "I(year_built > 2005)[T.True] -0.0633 0.049 -1.300 0.194 -0.159 0.032\n", "np.log1p(unit_sqft) -1.4830 0.007 -210.948 0.000 -1.497 -1.469\n", "np.log1p(unit_lot_size) -0.1476 0.006 -23.224 0.000 -0.160 -0.135\n", "sum_residential_units 0.0987 0.008 11.987 0.000 0.083 0.115\n", "ave_lot_sqft -0.2177 0.010 -21.273 0.000 -0.238 -0.198\n", "ave_unit_sqft 0.9371 0.020 45.808 0.000 0.897 0.977\n", "ave_income 1.4191 0.015 92.819 0.000 1.389 1.449\n", "==============================================================================\n", "Omnibus: 89684.695 Durbin-Watson: 1.798\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 820175.061\n", "Skew: -2.842 Prob(JB): 0.00\n", "Kurtosis: 12.972 Cond. No. 1.01e+03\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 1.01e+03. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "Time to execute model 'rsh_estimate': 1.19s\n", "Total time to execute: 1.19s\n" ] } ], "source": [ "orca.run([\"rsh_estimate\"])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running model 'nrh_estimate'\n", "Filling column job_category with value service (331 values)\n", "REGRESSION RESULTS FOR SEGMENT Retail\n", "\n", " OLS Regression Results \n", "==========================================================================================\n", "Dep. Variable: np.log1p(non_residential_rent) R-squared: 0.046\n", "Model: OLS Adj. R-squared: 0.045\n", "Method: Least Squares F-statistic: 43.81\n", "Date: Tue, 28 Apr 2015 Prob (F-statistic): 2.83e-44\n", "Time: 11:32:43 Log-Likelihood: -5334.2\n", "No. Observations: 4592 AIC: 1.068e+04\n", "Df Residuals: 4586 BIC: 1.072e+04\n", "Df Model: 5 \n", "Covariance Type: nonrobust \n", "================================================================================================\n", " coef std err t P>|t| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------------------------\n", "Intercept 3.9704 0.331 11.986 0.000 3.321 4.620\n", "I(year_built < 1940)[T.True] -0.3253 0.025 -12.869 0.000 -0.375 -0.276\n", "I(year_built > 2005)[T.True] -0.1107 0.104 -1.067 0.286 -0.314 0.093\n", "np.log1p(stories) 0.2327 0.037 6.311 0.000 0.160 0.305\n", "ave_income -0.0483 0.026 -1.874 0.061 -0.099 0.002\n", "jobs -0.0367 0.014 -2.688 0.007 -0.063 -0.010\n", "==============================================================================\n", "Omnibus: 3246.917 Durbin-Watson: 1.731\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 32590.849\n", "Skew: -3.490 Prob(JB): 0.00\n", "Kurtosis: 14.028 Cond. No. 374.\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "REGRESSION RESULTS FOR SEGMENT Office\n", "\n", " OLS Regression Results \n", "==========================================================================================\n", "Dep. Variable: np.log1p(non_residential_rent) R-squared: 0.066\n", "Model: OLS Adj. R-squared: 0.064\n", "Method: Least Squares F-statistic: 51.14\n", "Date: Tue, 28 Apr 2015 Prob (F-statistic): 2.16e-51\n", "Time: 11:32:43 Log-Likelihood: -4021.6\n", "No. Observations: 3653 AIC: 8055.\n", "Df Residuals: 3647 BIC: 8092.\n", "Df Model: 5 \n", "Covariance Type: nonrobust \n", "================================================================================================\n", " coef std err t P>|t| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------------------------\n", "Intercept 4.1593 0.321 12.975 0.000 3.531 4.788\n", "I(year_built < 1940)[T.True] -0.3156 0.027 -11.908 0.000 -0.368 -0.264\n", "I(year_built > 2005)[T.True] -0.0238 0.119 -0.201 0.841 -0.256 0.209\n", "np.log1p(stories) 0.1750 0.024 7.183 0.000 0.127 0.223\n", "ave_income -0.0719 0.025 -2.853 0.004 -0.121 -0.022\n", "jobs -0.0429 0.013 -3.201 0.001 -0.069 -0.017\n", "==============================================================================\n", "Omnibus: 2697.682 Durbin-Watson: 1.827\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 29985.208\n", "Skew: -3.650 Prob(JB): 0.00\n", "Kurtosis: 14.988 Cond. No. 348.\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "REGRESSION RESULTS FOR SEGMENT Industrial\n", "\n", " OLS Regression Results \n", "==========================================================================================\n", "Dep. Variable: np.log1p(non_residential_rent) R-squared: 0.105\n", "Model: OLS Adj. R-squared: 0.103\n", "Method: Least Squares F-statistic: 59.66\n", "Date: Tue, 28 Apr 2015 Prob (F-statistic): 6.48e-59\n", "Time: 11:32:43 Log-Likelihood: -2035.1\n", "No. Observations: 2558 AIC: 4082.\n", "Df Residuals: 2552 BIC: 4117.\n", "Df Model: 5 \n", "Covariance Type: nonrobust \n", "================================================================================================\n", " coef std err t P>|t| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------------------------\n", "Intercept 3.7205 0.309 12.025 0.000 3.114 4.327\n", "I(year_built < 1940)[T.True] -0.3186 0.023 -13.995 0.000 -0.363 -0.274\n", "I(year_built > 2005)[T.True] -0.0483 0.101 -0.479 0.632 -0.246 0.150\n", "np.log1p(stories) 0.3851 0.042 9.187 0.000 0.303 0.467\n", "ave_income -0.1826 0.027 -6.852 0.000 -0.235 -0.130\n", "jobs 0.0605 0.012 4.981 0.000 0.037 0.084\n", "==============================================================================\n", "Omnibus: 1781.535 Durbin-Watson: 1.817\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 17978.361\n", "Skew: -3.372 Prob(JB): 0.00\n", "Kurtosis: 14.099 Cond. No. 385.\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "Time to execute model 'nrh_estimate': 0.80s\n", "Total time to execute: 0.80s\n" ] } ], "source": [ "orca.run([\"nrh_estimate\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Hedonic Simulation" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running model 'rsh_simulate'\n", "count 140780.000000\n", "mean 1156.971200\n", "std 41503.246343\n", "min 0.002067\n", "25% 252.727481\n", "50% 379.993499\n", "75% 554.244653\n", "max 12507033.315516\n", "dtype: float64\n", "Time to execute model 'rsh_simulate': 0.92s\n", "Running model 'nrh_simulate'\n", "count 10803.000000\n", "mean 20.729909\n", "std 6.833292\n", "min 6.258496\n", "25% 17.431477\n", "50% 20.908725\n", "75% 25.077906\n", "max 47.266957\n", "dtype: float64\n", "Time to execute model 'nrh_simulate': 0.54s\n", "Total time to execute: 1.46s\n" ] } ], "source": [ "orca.run([\"rsh_simulate\", \"nrh_simulate\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## LCM Estimation" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running model 'hlcm_estimate'\n", "LCM RESULTS FOR SEGMENT 0\n", "\n", "Null Log-liklihood: -13815.511\n", "Log-liklihood at convergence: -11262.625\n", "Log-liklihood Ratio: 0.185\n", "\n", "+-----------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+-----------------------------------+-------------+------------+---------+\n", "| np.log1p(residential_sales_price) | -0.000 | 2.513 | -0.000 |\n", "| np.log1p(unit_sqft) | -0.352 | 0.023 | -15.020 |\n", "| sum_residential_units | 0.402 | 0.108 | 3.723 |\n", "| ave_unit_sqft | 0.301 | 0.108 | 2.775 |\n", "| ave_lot_sqft | -0.111 | 0.038 | -2.949 |\n", "| ave_income | -0.460 | 0.140 | -3.285 |\n", "| hhsize | -0.644 | 0.145 | -4.455 |\n", "| jobs | 0.032 | 0.032 | 1.012 |\n", "| sfdu | -0.045 | 0.019 | -2.341 |\n", "| renters | -0.550 | 0.039 | -14.172 |\n", "| poor | 0.806 | 0.152 | 5.292 |\n", "| population | -0.319 | 0.177 | -1.803 |\n", "+-----------------------------------+-------------+------------+---------+\n", "LCM RESULTS FOR SEGMENT 1\n", "\n", "Null Log-liklihood: -13815.511\n", "Log-liklihood at convergence: -12669.732\n", "Log-liklihood Ratio: 0.083\n", "\n", "+-----------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+-----------------------------------+-------------+------------+---------+\n", "| np.log1p(residential_sales_price) | -0.000 | 3.059 | -0.000 |\n", "| np.log1p(unit_sqft) | -0.470 | 0.024 | -19.443 |\n", "| sum_residential_units | 0.046 | 0.112 | 0.414 |\n", "| ave_unit_sqft | 0.204 | 0.109 | 1.867 |\n", "| ave_lot_sqft | -0.233 | 0.041 | -5.716 |\n", "| ave_income | 0.420 | 0.190 | 2.213 |\n", "| hhsize | -0.331 | 0.140 | -2.362 |\n", "| jobs | 0.078 | 0.033 | 2.407 |\n", "| sfdu | -0.055 | 0.019 | -2.911 |\n", "| renters | -0.684 | 0.045 | -15.063 |\n", "| poor | 0.490 | 0.162 | 3.029 |\n", "| population | 0.356 | 0.188 | 1.892 |\n", "+-----------------------------------+-------------+------------+---------+\n", "LCM RESULTS FOR SEGMENT 2\n", "\n", "Null Log-liklihood: -13815.511\n", "Log-liklihood at convergence: -13219.673\n", "Log-liklihood Ratio: 0.043\n", "\n", "+-----------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+-----------------------------------+-------------+------------+---------+\n", "| np.log1p(residential_sales_price) | -0.000 | 3.430 | -0.000 |\n", "| np.log1p(unit_sqft) | -0.538 | 0.026 | -20.924 |\n", "| sum_residential_units | 0.202 | 0.121 | 1.668 |\n", "| ave_unit_sqft | 0.206 | 0.127 | 1.631 |\n", "| ave_lot_sqft | -0.213 | 0.047 | -4.487 |\n", "| ave_income | 0.827 | 0.217 | 3.814 |\n", "| hhsize | -0.416 | 0.148 | -2.813 |\n", "| jobs | 0.020 | 0.033 | 0.620 |\n", "| sfdu | -0.000 | 0.020 | -0.012 |\n", "| renters | -0.574 | 0.055 | -10.534 |\n", "| poor | 0.165 | 0.167 | 0.988 |\n", "| population | 0.544 | 0.206 | 2.639 |\n", "+-----------------------------------+-------------+------------+---------+\n", "LCM RESULTS FOR SEGMENT 3\n", "\n", "Null Log-liklihood: -13815.511\n", "Log-liklihood at convergence: -13239.529\n", "Log-liklihood Ratio: 0.042\n", "\n", "+-----------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+-----------------------------------+-------------+------------+---------+\n", "| np.log1p(residential_sales_price) | -0.000 | 3.888 | -0.000 |\n", "| np.log1p(unit_sqft) | -0.587 | 0.024 | -24.442 |\n", "| sum_residential_units | -0.076 | 0.123 | -0.621 |\n", "| ave_unit_sqft | 0.407 | 0.115 | 3.535 |\n", "| ave_lot_sqft | -0.198 | 0.048 | -4.124 |\n", "| ave_income | 1.822 | 0.254 | 7.176 |\n", "| hhsize | -0.470 | 0.150 | -3.139 |\n", "| jobs | 0.115 | 0.032 | 3.607 |\n", "| sfdu | -0.026 | 0.020 | -1.309 |\n", "| renters | -0.646 | 0.056 | -11.445 |\n", "| poor | 0.210 | 0.184 | 1.141 |\n", "| population | 0.723 | 0.220 | 3.282 |\n", "+-----------------------------------+-------------+------------+---------+\n", "Time to execute model 'hlcm_estimate': 12.44s\n", "Total time to execute: 12.44s\n" ] } ], "source": [ "orca.run([\"hlcm_estimate\"])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running model 'elcm_estimate'\n", "LCM RESULTS FOR SEGMENT industrial\n", "\n", "Null Log-liklihood: -17292.414\n", "Log-liklihood at convergence: -14880.463\n", "Log-liklihood Ratio: 0.139\n", "\n", "+--------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+--------------------------------+-------------+------------+---------+\n", "| np.log1p(non_residential_rent) | -0.000 | 0.901 | -0.000 |\n", "| sum_job_spaces | -0.316 | 0.042 | -7.488 |\n", "| sum_residential_units | 0.141 | 0.053 | 2.659 |\n", "| ave_unit_sqft | 0.095 | 0.023 | 4.065 |\n", "| ave_lot_sqft | 0.473 | 0.030 | 15.634 |\n", "| ave_income | -0.221 | 0.058 | -3.799 |\n", "| hhsize | -0.009 | 0.118 | -0.075 |\n", "| jobs | 0.723 | 0.046 | 15.702 |\n", "| poor | -0.393 | 0.033 | -11.862 |\n", "+--------------------------------+-------------+------------+---------+\n", "LCM RESULTS FOR SEGMENT agriculture\n", "\n", "Null Log-liklihood: -990.112\n", "Log-liklihood at convergence: -683.138\n", "Log-liklihood Ratio: 0.310\n", "\n", "+--------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+--------------------------------+-------------+------------+---------+\n", "| np.log1p(non_residential_rent) | -0.000 | 3.779 | -0.000 |\n", "| sum_job_spaces | -0.932 | 0.294 | -3.172 |\n", "| sum_residential_units | 0.426 | 0.297 | 1.434 |\n", "| ave_unit_sqft | -0.654 | 0.089 | -7.367 |\n", "| ave_lot_sqft | 0.351 | 0.190 | 1.843 |\n", "| ave_income | -0.100 | 0.287 | -0.347 |\n", "| hhsize | -0.810 | 0.721 | -1.123 |\n", "| jobs | 1.676 | 0.315 | 5.312 |\n", "| poor | -0.454 | 0.165 | -2.758 |\n", "+--------------------------------+-------------+------------+---------+\n", "LCM RESULTS FOR SEGMENT service\n", "\n", "Null Log-liklihood: -15883.232\n", "Log-liklihood at convergence: -13151.724\n", "Log-liklihood Ratio: 0.172\n", "\n", "+--------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+--------------------------------+-------------+------------+---------+\n", "| np.log1p(non_residential_rent) | -0.000 | 0.898 | -0.000 |\n", "| sum_job_spaces | -0.692 | 0.046 | -15.135 |\n", "| sum_residential_units | -0.008 | 0.052 | -0.146 |\n", "| ave_unit_sqft | -0.055 | 0.019 | -2.921 |\n", "| ave_lot_sqft | -0.006 | 0.031 | -0.206 |\n", "| ave_income | -0.450 | 0.051 | -8.845 |\n", "| hhsize | -1.366 | 0.145 | -9.402 |\n", "| jobs | 1.362 | 0.048 | 28.130 |\n", "| poor | -0.288 | 0.033 | -8.660 |\n", "+--------------------------------+-------------+------------+---------+\n", "LCM RESULTS FOR SEGMENT retail\n", "\n", "Null Log-liklihood: -14750.360\n", "Log-liklihood at convergence: -13907.690\n", "Log-liklihood Ratio: 0.057\n", "\n", "+--------------------------------+-------------+------------+---------+\n", "| Component | Coefficient | Std. Error | T-Score |\n", "+--------------------------------+-------------+------------+---------+\n", "| np.log1p(non_residential_rent) | -0.000 | 0.915 | -0.000 |\n", "| sum_job_spaces | -0.100 | 0.046 | -2.163 |\n", "| sum_residential_units | -0.414 | 0.061 | -6.774 |\n", "| ave_unit_sqft | -0.128 | 0.031 | -4.136 |\n", "| ave_lot_sqft | 0.062 | 0.036 | 1.705 |\n", "| ave_income | 0.636 | 0.056 | 11.454 |\n", "| hhsize | -1.451 | 0.131 | -11.047 |\n", "| jobs | 0.795 | 0.056 | 14.179 |\n", "| poor | 0.283 | 0.041 | 6.844 |\n", "+--------------------------------+-------------+------------+---------+\n", "Time to execute model 'elcm_estimate': 6.37s\n", "Total time to execute: 6.38s\n" ] } ], "source": [ "orca.run([\"elcm_estimate\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }