{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n",
      "1.14.2\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import csv\n",
    "\n",
    "print csv.__version__\n",
    "print np.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# open file\n",
    "# set delimiter as ';'\n",
    "# typecast to list and store to variable \"wine\"\n",
    "\n",
    "DATA_DIR = '../data'\n",
    "\n",
    "with open(os.path.abspath(os.path.join(DATA_DIR, 'day7/winequality-white.csv')), 'r') as datafile:\n",
    "    reader = csv.reader(datafile, delimiter=\";\")\n",
    "    wines = list(reader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['fixed acidity',\n",
       " 'volatile acidity',\n",
       " 'citric acid',\n",
       " 'residual sugar',\n",
       " 'chlorides',\n",
       " 'free sulfur dioxide',\n",
       " 'total sulfur dioxide',\n",
       " 'density',\n",
       " 'pH',\n",
       " 'sulphates',\n",
       " 'alcohol',\n",
       " 'quality']"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wines[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total records: 4898\n"
     ]
    }
   ],
   "source": [
    "# -1 to avoid header from counting into records\n",
    "print 'Total records: {}'.format(len(wines)-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5.87790935076\n"
     ]
    }
   ],
   "source": [
    "# average of quality variable\n",
    "qualities = [float(item[-1]) for item in wines[1:]]\n",
    "print sum(qualities)/len(qualities)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4898, 12)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# converting python array to numpy array and typecasting the cell values to float\n",
    "wines = wines[1:]\n",
    "wines_np = np.array(wines, dtype='float')\n",
    "wines_np.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],\n",
       "       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],\n",
       "       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],\n",
       "       ...,\n",
       "       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],\n",
       "       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],\n",
       "       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# printing out wines 2-D array\n",
    "wines_np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0. 0.]\n",
      " [0. 0.]]\n"
     ]
    }
   ],
   "source": [
    "# creating numpy array with all zero elements\n",
    "print np.zeros((2,2), dtype='float')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0.79301112 0.95735914]\n",
      " [0.29987175 0.96456405]]\n"
     ]
    }
   ],
   "source": [
    "# creating numpy array with all random numbers\n",
    "print np.random.rand(2,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# using numpy to read dataset\n",
    "wines = np.genfromtxt(\n",
    "                  os.path.abspath(os.path.join(DATA_DIR, 'day7/winequality-white.csv')), \n",
    "                  delimiter=\";\", \n",
    "                  skip_header=1\n",
    "                    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ True,  True,  True, ...,  True,  True,  True],\n",
       "       [ True,  True,  True, ...,  True,  True,  True],\n",
       "       [ True,  True,  True, ...,  True,  True,  True],\n",
       "       ...,\n",
       "       [ True,  True,  True, ...,  True,  True,  True],\n",
       "       [ True,  True,  True, ...,  True,  True,  True],\n",
       "       [ True,  True,  True, ...,  True,  True,  True]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wines == wines_np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.28\n",
      "True\n"
     ]
    }
   ],
   "source": [
    "# accessing the value for 3rd row 2nd column of wines\n",
    "print wines[2, 1]\n",
    "print wines[2, 1] == wines_np[2, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.36, 0.34, 0.4 , 0.32])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we would like to access 4rows from top of 3rd column\n",
    "# wines[start:end, column_index]\n",
    "# since the index start from zero; so slicing excludes 4 and finds out result from 0, 1, 2, 3\n",
    "wines[:4,2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([10., 10., 10., 10.])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we will override the existing value of 2nd column to 10.0 for all the rows\n",
    "wines[:, 2] = 10.0\n",
    "wines[:4, 2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.28855438, 0.95129591, 0.80747318, 0.89765623, 0.98632739])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# creating 1-D array in numpy\n",
    "random_1d = np.random.rand(5)\n",
    "random_1d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# creating 3-D numpy array\n",
    "random_3d = np.random.rand(2,4,3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2, 4, 3)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# take this shape as any thing for 2 years across 4 quarters per month in that quarter\n",
    "#2x4x3 = 24 months\n",
    "random_3d.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 7,  0, 10, ...,  0,  8,  6],\n",
       "       [ 6,  0, 10, ...,  0,  9,  6],\n",
       "       [ 8,  0, 10, ...,  0, 10,  6],\n",
       "       ...,\n",
       "       [ 6,  0, 10, ...,  0,  9,  6],\n",
       "       [ 5,  0, 10, ...,  0, 12,  7],\n",
       "       [ 6,  0, 10, ...,  0, 11,  6]])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Data types in numpy\n",
    "# converting wines to type=int\n",
    "wines.astype('int')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[6. 6. 6. ... 6. 7. 6.]\n",
      "[7. 7. 7. ... 7. 8. 7.]\n"
     ]
    }
   ],
   "source": [
    "# addition to any column across all rows\n",
    "# as shows below all the remaining mathematical operations can be done\n",
    "print wines[:, 11]\n",
    "print wines[:, 11] + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([36., 36., 36., ..., 36., 49., 36.])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# multiplying 2 columns\n",
    "# examples show the square of 12th column\n",
    "wines[:, 11] * wines[: , 11]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "28790.0"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sum any column across all rows\n",
    "wines[:, 11].sum(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.87790935075541"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wines[:, 11].mean() #std, min, max are many other methods for fast stats computation"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}