{ "metadata": { "name": "", "signature": "sha256:6b92a05dbdc4b24d75fe864816c23bf417ce330ce844deedef072c01574b59bd" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Find Unique Values In Pandas Dataframes\n", "\n", "- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n", "- **Date:** -\n", "- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n", "- **Note:**" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "raw_data = {'regiment': ['51st', '29th', '2nd', '19th', '12th', '101st', '90th', '30th', '193th', '1st', '94th', '91th'], \n", " 'trucks': ['MAZ-7310', np.nan, 'MAZ-7310', 'MAZ-7310', 'Tatra 810', 'Tatra 810', 'Tatra 810', 'Tatra 810', 'ZIS-150', 'Tatra 810', 'ZIS-150', 'ZIS-150'],\n", " 'tanks': ['Merkava Mark 4', 'Merkava Mark 4', 'Merkava Mark 4', 'Leopard 2A6M', 'Leopard 2A6M', 'Leopard 2A6M', 'Arjun MBT', 'Leopard 2A6M', 'Arjun MBT', 'Arjun MBT', 'Arjun MBT', 'Arjun MBT'],\n", " 'aircraft': ['none', 'none', 'none', 'Harbin Z-9', 'Harbin Z-9', 'none', 'Harbin Z-9', 'SH-60B Seahawk', 'SH-60B Seahawk', 'SH-60B Seahawk', 'SH-60B Seahawk', 'SH-60B Seahawk']}\n", "\n", "df = pd.DataFrame(raw_data, columns = ['regiment', 'trucks', 'tanks', 'aircraft'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 33 }, { "cell_type": "code", "collapsed": false, "input": [ "# View the top few rows\n", "df.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
regimenttruckstanksaircraft
0 51st MAZ-7310 Merkava Mark 4 none
1 29th NaN Merkava Mark 4 none
2 2nd MAZ-7310 Merkava Mark 4 none
3 19th MAZ-7310 Leopard 2A6M Harbin Z-9
4 12th Tatra 810 Leopard 2A6M Harbin Z-9
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 71, "text": [ " regiment trucks tanks aircraft\n", "0 51st MAZ-7310 Merkava Mark 4 none\n", "1 29th NaN Merkava Mark 4 none\n", "2 2nd MAZ-7310 Merkava Mark 4 none\n", "3 19th MAZ-7310 Leopard 2A6M Harbin Z-9\n", "4 12th Tatra 810 Leopard 2A6M Harbin Z-9" ] } ], "prompt_number": 71 }, { "cell_type": "code", "collapsed": false, "input": [ "# Create a list of unique values by turning the\n", "# pandas column into a set\n", "list(set(df.trucks))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 57, "text": [ "[nan, 'MAZ-7310', 'ZIS-150', 'Tatra 810']" ] } ], "prompt_number": 57 }, { "cell_type": "code", "collapsed": false, "input": [ "# Create a list of unique values in df.trucks\n", "list(df['trucks'].unique())" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 73, "text": [ "['MAZ-7310', nan, 'Tatra 810', 'ZIS-150']" ] } ], "prompt_number": 73 } ], "metadata": {} } ] }