{ "metadata": { "name": "", "signature": "sha256:73eb77cd34995593495a287468f1aa0e702a7da42250f6b0344998059b8a0401" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dealing With Outliers In Pandas\n", "\n", "- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n", "- **Date:** -\n", "- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n", "- **Note:**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### import modules" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = pd.DataFrame(np.random.randn(20, 5))\n", "df.columns = ['score_1', 'score_2', 'score_3', 'score_4', 'score_5']\n", "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | score_1 | \n", "score_2 | \n", "score_3 | \n", "score_4 | \n", "score_5 | \n", "
---|---|---|---|---|---|
0 | \n", "0.809013 | \n", "-0.043927 | \n", "0.928440 | \n", "-1.110457 | \n", "1.623501 | \n", "
1 | \n", "1.626228 | \n", "-0.304158 | \n", "1.249759 | \n", "-1.851839 | \n", "1.081616 | \n", "
2 | \n", "1.894077 | \n", "1.068677 | \n", "-0.111965 | \n", "-1.064986 | \n", "0.035659 | \n", "
3 | \n", "-0.386469 | \n", "-1.188364 | \n", "0.831405 | \n", "-0.014511 | \n", "-0.260195 | \n", "
4 | \n", "-1.242064 | \n", "0.294049 | \n", "0.714657 | \n", "-0.396795 | \n", "-1.219513 | \n", "
5 | \n", "0.852891 | \n", "0.355283 | \n", "0.839451 | \n", "0.746963 | \n", "-0.715827 | \n", "
6 | \n", "0.191754 | \n", "-0.244156 | \n", "-0.238739 | \n", "0.797857 | \n", "-1.355429 | \n", "
7 | \n", "0.175954 | \n", "-0.465887 | \n", "1.882503 | \n", "-0.174788 | \n", "0.646117 | \n", "
8 | \n", "-0.404646 | \n", "-0.755381 | \n", "0.419163 | \n", "0.918593 | \n", "0.923306 | \n", "
9 | \n", "-0.108578 | \n", "0.166226 | \n", "0.890846 | \n", "-0.016745 | \n", "-1.375534 | \n", "
10 | \n", "0.101022 | \n", "-0.132286 | \n", "0.274950 | \n", "-0.678942 | \n", "0.053938 | \n", "
11 | \n", "1.673355 | \n", "-0.164933 | \n", "1.086568 | \n", "-1.621484 | \n", "-0.135308 | \n", "
12 | \n", "1.128543 | \n", "0.355407 | \n", "-1.380984 | \n", "0.604208 | \n", "-1.095205 | \n", "
13 | \n", "-1.602945 | \n", "0.614549 | \n", "-0.089838 | \n", "0.652979 | \n", "1.721376 | \n", "
14 | \n", "-1.272730 | \n", "-0.916772 | \n", "-0.594153 | \n", "0.123623 | \n", "-0.655120 | \n", "
15 | \n", "0.140682 | \n", "-0.364991 | \n", "-0.522412 | \n", "0.863911 | \n", "1.106638 | \n", "
16 | \n", "-0.265389 | \n", "-0.293563 | \n", "1.066478 | \n", "-0.485762 | \n", "-2.222239 | \n", "
17 | \n", "2.465491 | \n", "-0.437448 | \n", "-1.577115 | \n", "0.243174 | \n", "-0.186260 | \n", "
18 | \n", "0.927383 | \n", "-0.615659 | \n", "-0.075537 | \n", "0.939576 | \n", "-0.662184 | \n", "
19 | \n", "-0.426472 | \n", "0.990325 | \n", "0.314062 | \n", "-0.678511 | \n", "0.570545 | \n", "
20 rows \u00d7 5 columns
\n", "\n", " | score_1 | \n", "score_2 | \n", "score_3 | \n", "score_4 | \n", "score_5 | \n", "
---|---|---|---|---|---|
count | \n", "20.000000 | \n", "20.000000 | \n", "20.000000 | \n", "20.000000 | \n", "20.000000 | \n", "
mean | \n", "0.313855 | \n", "-0.104151 | \n", "0.295377 | \n", "-0.110197 | \n", "-0.106006 | \n", "
std | \n", "1.101752 | \n", "0.592899 | \n", "0.879432 | \n", "0.860297 | \n", "1.081506 | \n", "
min | \n", "-1.602945 | \n", "-1.188364 | \n", "-1.577115 | \n", "-1.851839 | \n", "-2.222239 | \n", "
25% | \n", "-0.391013 | \n", "-0.444558 | \n", "-0.143658 | \n", "-0.678619 | \n", "-0.810672 | \n", "
50% | \n", "0.158318 | \n", "-0.204545 | \n", "0.366613 | \n", "-0.015628 | \n", "-0.160784 | \n", "
75% | \n", "0.977673 | \n", "0.309358 | \n", "0.900244 | \n", "0.676475 | \n", "0.715414 | \n", "
max | \n", "2.465491 | \n", "1.068677 | \n", "1.882503 | \n", "0.939576 | \n", "1.721376 | \n", "
8 rows \u00d7 5 columns
\n", "