\n",
" \n",
" name | \n",
" index | \n",
" value | \n",
" stderr | \n",
"
\n",
" \n",
" (intercept) | \n",
" None | \n",
" -47114.0206702 | \n",
" 4923.34437753 | \n",
"
\n",
" \n",
" sqft_living | \n",
" None | \n",
" 281.957850166 | \n",
" 2.16405465323 | \n",
"
\n",
"
\n",
"[2 rows x 4 columns]
\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tname\tstr\n",
"\tindex\tstr\n",
"\tvalue\tfloat\n",
"\tstderr\tfloat\n",
"\n",
"Rows: 2\n",
"\n",
"Data:\n",
"+-------------+-------+----------------+---------------+\n",
"| name | index | value | stderr |\n",
"+-------------+-------+----------------+---------------+\n",
"| (intercept) | None | -47114.0206702 | 4923.34437753 |\n",
"| sqft_living | None | 281.957850166 | 2.16405465323 |\n",
"+-------------+-------+----------------+---------------+\n",
"[2 rows x 4 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sqft_model.get(\"coefficients\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Build a more elaborate model: more features"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode', 'condition', 'grade', 'waterfront', 'view', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15']\n",
"PROGRESS: Linear regression:\n",
"PROGRESS: --------------------------------------------------------\n",
"PROGRESS: Number of examples : 17384\n",
"PROGRESS: Number of features : 18\n",
"PROGRESS: Number of unpacked features : 18\n",
"PROGRESS: Number of coefficients : 127\n",
"PROGRESS: Starting Newton Method\n",
"PROGRESS: --------------------------------------------------------\n",
"PROGRESS: +-----------+----------+--------------+--------------------+---------------+\n",
"PROGRESS: | Iteration | Passes | Elapsed Time | Training-max_error | Training-rmse |\n",
"PROGRESS: +-----------+----------+--------------+--------------------+---------------+\n",
"PROGRESS: | 1 | 2 | 0.042353 | 3469012.450686 | 154580.940736 |\n",
"PROGRESS: +-----------+----------+--------------+--------------------+---------------+\n",
"PROGRESS: SUCCESS: Optimal solution found.\n",
"PROGRESS:\n",
"{'max_error': 3556849.413858208, 'rmse': 156831.1168021901}\n"
]
}
],
"source": [
"my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']\n",
"#sales[my_features].show()\n",
"\n",
"advanced_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode',\n",
"'condition', # condition of house\t\t\t\t\n",
"'grade', # measure of quality of construction\t\t\t\t\n",
"'waterfront', # waterfront property\t\t\t\t\n",
"'view', # type of view\t\t\t\t\n",
"'sqft_above', # square feet above ground\t\t\t\t\n",
"'sqft_basement', # square feet in basement\t\t\t\t\n",
"'yr_built', # the year built\t\t\t\t\n",
"'yr_renovated', # the year renovated\t\t\t\t\n",
"'lat', 'long', # the lat-long of the parcel\t\t\t\t\n",
"'sqft_living15', # average sq.ft. of 15 nearest neighbors \t\t\t\t\n",
"'sqft_lot15', # average lot size of 15 nearest neighbors \n",
"]\n",
"\n",
"print advanced_features\n",
"\n",
"advanced_features_model = graphlab.linear_regression.create(training_data,target='price',features=advanced_features,validation_set=None)\n",
"print advanced_features_model.evaluate(testing_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sales.show(view='BoxWhisker Plot', x='zipcode', y='price')\n",
"houses = sales[sales[\"zipcode\"]==\"98039\"]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PROGRESS: Linear regression:\n",
"PROGRESS: --------------------------------------------------------\n",
"PROGRESS: Number of examples : 17384\n",
"PROGRESS: Number of features : 6\n",
"PROGRESS: Number of unpacked features : 6\n",
"PROGRESS: Number of coefficients : 115\n",
"PROGRESS: Starting Newton Method\n",
"PROGRESS: --------------------------------------------------------\n",
"PROGRESS: +-----------+----------+--------------+--------------------+---------------+\n",
"PROGRESS: | Iteration | Passes | Elapsed Time | Training-max_error | Training-rmse |\n",
"PROGRESS: +-----------+----------+--------------+--------------------+---------------+\n",
"PROGRESS: | 1 | 2 | 0.030431 | 3763208.270523 | 181908.848367 |\n",
"PROGRESS: +-----------+----------+--------------+--------------------+---------------+\n",
"PROGRESS: SUCCESS: Optimal solution found.\n",
"PROGRESS:\n"
]
}
],
"source": [
"my_features_model = graphlab.linear_regression.create(training_data,target='price',features=my_features,validation_set=None)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'max_error': 4143550.8825285938, 'rmse': 255191.02870527358}\n",
"{'max_error': 3486584.509381705, 'rmse': 179542.4333126903}\n"
]
}
],
"source": [
"print sqft_model.evaluate(testing_data)\n",
"print my_features_model.evaluate(testing_data)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[620000, ... ]\n",
"[629584.8197281545]\n",
"[721918.9333272863]\n"
]
}
],
"source": [
"house1 = sales[sales['id']=='5309101200']\n",
"print house1['price']\n",
"print sqft_model.predict(house1)\n",
"print my_features_model.predict(house1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}