{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Unsupervised Methods ##" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import division\n", "import pandas as pd\n", "import numpy as np\n", "import scipy as sp\n", "import scipy.sparse as ss\n", "import matplotlib.pyplot as plt\n", "import pylab as pl\n", "from sklearn.decomposition import PCA\n", "from sklearn.cluster import KMeans\n", "from sklearn.cluster import Ward\n", "from sklearn.preprocessing import Binarizer\n", "%matplotlib inline" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "# Dataset imported from R using write.csv(USArrests, \"/tmp/USArrests.csv\", row.names=FALSE)\n", "# Each row in this dataset corresponds to one of the 50 US states.\n", "usa_df = pd.read_csv(\"../data/USArrests.csv\")\n", "usa_df.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | State | \n", "Murder | \n", "Assault | \n", "UrbanPop | \n", "Rape | \n", "
---|---|---|---|---|---|
0 | \n", "Alabama | \n", "13.2 | \n", "236 | \n", "58 | \n", "21.2 | \n", "
1 | \n", "Alaska | \n", "10.0 | \n", "263 | \n", "48 | \n", "44.5 | \n", "
2 | \n", "Arizona | \n", "8.1 | \n", "294 | \n", "80 | \n", "31.0 | \n", "
3 | \n", "Arkansas | \n", "8.8 | \n", "190 | \n", "50 | \n", "19.5 | \n", "
4 | \n", "California | \n", "9.0 | \n", "276 | \n", "91 | \n", "40.6 | \n", "
5 rows \u00d7 5 columns
\n", "\n", " | Murder | \n", "Assault | \n", "UrbanPop | \n", "Rape | \n", "
---|---|---|---|---|
count | \n", "50.00000 | \n", "50.000000 | \n", "50.000000 | \n", "50.000000 | \n", "
mean | \n", "7.78800 | \n", "170.760000 | \n", "65.540000 | \n", "21.232000 | \n", "
std | \n", "4.35551 | \n", "83.337661 | \n", "14.474763 | \n", "9.366385 | \n", "
min | \n", "0.80000 | \n", "45.000000 | \n", "32.000000 | \n", "7.300000 | \n", "
25% | \n", "4.07500 | \n", "109.000000 | \n", "54.500000 | \n", "15.075000 | \n", "
50% | \n", "7.25000 | \n", "159.000000 | \n", "66.000000 | \n", "20.100000 | \n", "
75% | \n", "11.25000 | \n", "249.000000 | \n", "77.750000 | \n", "26.175000 | \n", "
max | \n", "17.40000 | \n", "337.000000 | \n", "91.000000 | \n", "46.000000 | \n", "
8 rows \u00d7 4 columns
\n", "\n", " | State | \n", "Murder | \n", "Assault | \n", "UrbanPop | \n", "Rape | \n", "
---|---|---|---|---|---|
0 | \n", "Alabama | \n", "0.054031 | \n", "0.966016 | \n", "0.237411 | \n", "0.086778 | \n", "
1 | \n", "Alaska | \n", "0.036872 | \n", "0.969739 | \n", "0.176987 | \n", "0.164081 | \n", "
2 | \n", "Arizona | \n", "0.026439 | \n", "0.959624 | \n", "0.261122 | \n", "0.101185 | \n", "
3 | \n", "Arkansas | \n", "0.044528 | \n", "0.961392 | \n", "0.252998 | \n", "0.098669 | \n", "
4 | \n", "California | \n", "0.030657 | \n", "0.940134 | \n", "0.309972 | \n", "0.138295 | \n", "
5 rows \u00d7 5 columns
\n", "\n", " | Murder | \n", "Assault | \n", "UrbanPop | \n", "Rape | \n", "
---|---|---|---|---|
count | \n", "50.000000 | \n", "50.000000 | \n", "50.000000 | \n", "50.000000 | \n", "
mean | \n", "0.040677 | \n", "0.881704 | \n", "0.409630 | \n", "0.119341 | \n", "
std | \n", "0.015018 | \n", "0.101645 | \n", "0.166944 | \n", "0.035642 | \n", "
min | \n", "0.012626 | \n", "0.473419 | \n", "0.132112 | \n", "0.042620 | \n", "
25% | \n", "0.027485 | \n", "0.850453 | \n", "0.279234 | \n", "0.092741 | \n", "
50% | \n", "0.039406 | \n", "0.907323 | \n", "0.395010 | \n", "0.117441 | \n", "
75% | \n", "0.049811 | \n", "0.952031 | \n", "0.508929 | \n", "0.139552 | \n", "
max | \n", "0.079346 | \n", "0.989371 | \n", "0.854213 | \n", "0.207893 | \n", "
8 rows \u00d7 4 columns
\n", "