{
"metadata": {
"name": "",
"signature": "sha256:9f28c7d139b6281fbd1a20d493f37366d5649955e2199f2d3817d1ea9302e2ff"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Expand Cells Containing Lists Into Their Own Variables In Pandas\n",
"\n",
"- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n",
"- **Date:** -\n",
"- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n",
"- **Note:**"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# import pandas\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# create a dataset\n",
"raw_data = {'score': [1,2,3], \n",
" 'tags': [['apple','pear','guava'],['truck','car','plane'],['cat','dog','mouse']]}\n",
"df = pd.DataFrame(raw_data, columns = ['score', 'tags'])\n",
"\n",
"# view the dataset\n",
"df"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" score | \n",
" tags | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" [apple, pear, guava] | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" [truck, car, plane] | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" [cat, dog, mouse] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
" score tags\n",
"0 1 [apple, pear, guava]\n",
"1 2 [truck, car, plane]\n",
"2 3 [cat, dog, mouse]"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# expand df.tags into its own dataframe\n",
"tags = df['tags'].apply(pd.Series)\n",
"\n",
"# rename each variable is tags\n",
"tags = tags.rename(columns = lambda x : 'tag_' + str(x))\n",
"\n",
"# view the tags dataframe\n",
"tags"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" tag_0 | \n",
" tag_1 | \n",
" tag_2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" apple | \n",
" pear | \n",
" guava | \n",
"
\n",
" \n",
" 1 | \n",
" truck | \n",
" car | \n",
" plane | \n",
"
\n",
" \n",
" 2 | \n",
" cat | \n",
" dog | \n",
" mouse | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"text": [
" tag_0 tag_1 tag_2\n",
"0 apple pear guava\n",
"1 truck car plane\n",
"2 cat dog mouse"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# join the tags dataframe back to the original dataframe\n",
"pd.concat([df[:], tags[:]], axis=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" score | \n",
" tags | \n",
" tag_0 | \n",
" tag_1 | \n",
" tag_2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" [apple, pear, guava] | \n",
" apple | \n",
" pear | \n",
" guava | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" [truck, car, plane] | \n",
" truck | \n",
" car | \n",
" plane | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" [cat, dog, mouse] | \n",
" cat | \n",
" dog | \n",
" mouse | \n",
"
\n",
" \n",
"
\n",
"
"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 26,
"text": [
" score tags tag_0 tag_1 tag_2\n",
"0 1 [apple, pear, guava] apple pear guava\n",
"1 2 [truck, car, plane] truck car plane\n",
"2 3 [cat, dog, mouse] cat dog mouse"
]
}
],
"prompt_number": 26
}
],
"metadata": {}
}
]
}