{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Sklearn-pandas\n", "- a bridge betweek sklearn machine learning methods and pandas style data frames\n", "- Specially, it provides:\n", " - a way to map DataFrame columns to transformations, which are later recombined into features\n", " - a way to cross-validate a pipeline that takes a pandas data frame as input.\n", "- credit : https://github.com/paulgb/sklearn-pandas" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import sklearn\n", "from sklearn import preprocessing, decomposition, linear_model, pipeline, metrics\n", "import sklearn_pandas\n", "import pandas as pd\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "## load some data \n", "data = pd.DataFrame({'pet': ['cat', 'dog', 'dog', 'fish', 'cat', 'dog', 'cat', 'fish'],\n", " 'children': [4., 6, 3, 3, 2, 3, 5, 4],\n", " 'salary': [90, 24, 44, 27, 32, 59, 36, 27]})\n", "data" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.\n", "\n", " warnings.warn(d.msg, DeprecationWarning)\n", "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.\n", "\n", " warnings.warn(d.msg, DeprecationWarning)\n" ] }, { "html": [ "
\n", " | children | \n", "pet | \n", "salary | \n", "
---|---|---|---|
0 | \n", "4 | \n", "cat | \n", "90 | \n", "
1 | \n", "6 | \n", "dog | \n", "24 | \n", "
2 | \n", "3 | \n", "dog | \n", "44 | \n", "
3 | \n", "3 | \n", "fish | \n", "27 | \n", "
4 | \n", "2 | \n", "cat | \n", "32 | \n", "
5 | \n", "3 | \n", "dog | \n", "59 | \n", "
6 | \n", "5 | \n", "cat | \n", "36 | \n", "
7 | \n", "4 | \n", "fish | \n", "27 | \n", "