{ "metadata": { "name": "", "signature": "sha256:27a83edb7926554471d87d020c9e2ca09c17cd35c0eabd1d18813bf1ef3a3c3f" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Project\n", "CSE 802 - Pattern Recognition and Analysis Instructor: Dr. Arun Ross \n", "Due Date: May 2, 5:00pm \n", "\n", "\n", "Sebastian Raschka \n", "Last updated: 04/24/2014\n", "
1.8569 -3.4702 1 \n",
" -0.2096 -2.8342 1 \n",
" -1.0265 2.1614 1 \n",
" [...] \n",
" 9.3851 4.0336 2 \n",
" 10.1375 1.1495 2 \n",
" 11.7569 0.8005 2 \n",
" [...] \n",
" 3.9854 5.1360 3 \n",
" 2.7592 5.9536 3 \n",
" 4.1379 4.3258 3 \n",
" [...]
"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"### Reading in and analyzing the dataset"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"\n",
"np.random.seed(1234568)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"all_projdata = np.genfromtxt('./data/project_data.txt', delimiter=' ')\n",
"\n",
"# Test if the data is read in in the correct dimensions:\n",
"assert(all_projdata.shape == (1500, 3))\n",
"for c_label in range(1,4): # class labels 1-3 are in column 3\n",
" assert(all_projdata[all_projdata[:,2] == c_label].shape == (500,3))\n",
"\n",
"# Print basic statistics:\n",
"for column in range(2):\n",
" print(50 * '-')\n",
" print(\"range of x_{}: ({}, {})\".format(column+1, \n",
" min(all_projdata[:,column]), max(all_projdata[:,0])))\n",
" print(\"mean of x_{}: {}\".format(column+1, \n",
" np.mean(all_projdata[:,column])))\n",
" print(\"standard_deviation of x_{}: {}\".format(\n",
" column+1, np.std(all_projdata[:,column])))\n",
"\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"--------------------------------------------------\n",
"range of x_1: (-6.8114, 17.4559)\n",
"mean of x_1: 5.027464333333326\n",
"standard_deviation of x_1: 4.574803380930654\n",
"--------------------------------------------------\n",
"range of x_2: (-7.9943, 17.4559)\n",
"mean of x_2: 1.6648250000000002\n",
"standard_deviation of x_2: 3.156841470283494\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"