{ "metadata": { "name": "", "signature": "sha256:9f28c7d139b6281fbd1a20d493f37366d5649955e2199f2d3817d1ea9302e2ff" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Expand Cells Containing Lists Into Their Own Variables In Pandas\n", "\n", "- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n", "- **Date:** -\n", "- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n", "- **Note:**" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# import pandas\n", "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "# create a dataset\n", "raw_data = {'score': [1,2,3], \n", " 'tags': [['apple','pear','guava'],['truck','car','plane'],['cat','dog','mouse']]}\n", "df = pd.DataFrame(raw_data, columns = ['score', 'tags'])\n", "\n", "# view the dataset\n", "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scoretags
0 1 [apple, pear, guava]
1 2 [truck, car, plane]
2 3 [cat, dog, mouse]
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 24, "text": [ " score tags\n", "0 1 [apple, pear, guava]\n", "1 2 [truck, car, plane]\n", "2 3 [cat, dog, mouse]" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "# expand df.tags into its own dataframe\n", "tags = df['tags'].apply(pd.Series)\n", "\n", "# rename each variable is tags\n", "tags = tags.rename(columns = lambda x : 'tag_' + str(x))\n", "\n", "# view the tags dataframe\n", "tags" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tag_0tag_1tag_2
0 apple pear guava
1 truck car plane
2 cat dog mouse
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ " tag_0 tag_1 tag_2\n", "0 apple pear guava\n", "1 truck car plane\n", "2 cat dog mouse" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "# join the tags dataframe back to the original dataframe\n", "pd.concat([df[:], tags[:]], axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
scoretagstag_0tag_1tag_2
0 1 [apple, pear, guava] apple pear guava
1 2 [truck, car, plane] truck car plane
2 3 [cat, dog, mouse] cat dog mouse
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 26, "text": [ " score tags tag_0 tag_1 tag_2\n", "0 1 [apple, pear, guava] apple pear guava\n", "1 2 [truck, car, plane] truck car plane\n", "2 3 [cat, dog, mouse] cat dog mouse" ] } ], "prompt_number": 26 } ], "metadata": {} } ] }