{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Handling Comma Separated Value Files\n",
    "\n",
    "This notebook showcases methods to extract data from CSVs:\n",
    "+ csv containing delimiter separated values\n",
    "+ csv containing tabular data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# import required libraries\n",
    "import csv\n",
    "import pandas as pd\n",
    "from pprint import pprint"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Utilities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def print_basic_csv(file_name, delimiter=','):\n",
    "    \"\"\"This function extracts and prints csv content from given filename\n",
    "       Details: https://docs.python.org/2/library/csv.html\n",
    "    Args:\n",
    "        file_name (str): file path to be read\n",
    "        delimiter (str): delimiter used in csv. Default is comma (',')\n",
    "\n",
    "    Returns:\n",
    "        None\n",
    "\n",
    "    \"\"\"\n",
    "    csv_rows = list()\n",
    "    csv_attr_dict = dict()\n",
    "    csv_reader = None\n",
    "\n",
    "    # read csv\n",
    "    csv_reader = csv.reader(open(file_name, 'r'), delimiter=delimiter)\n",
    "        \n",
    "    # iterate and extract data    \n",
    "    for row in csv_reader:\n",
    "        print(row)\n",
    "        csv_rows.append(row)\n",
    "    \n",
    "    # prepare attribute lists\n",
    "    for col in csv_rows[0]:\n",
    "        csv_attr_dict[col]=list()\n",
    "    \n",
    "    # iterate and add data to attribute lists\n",
    "    for row in csv_rows[1:]:\n",
    "        csv_attr_dict['sno'].append(row[0])\n",
    "        csv_attr_dict['fruit'].append(row[1])\n",
    "        csv_attr_dict['color'].append(row[2])\n",
    "        csv_attr_dict['price'].append(row[3])\n",
    "    \n",
    "    # print the result\n",
    "    print(\"\\n\\n\")\n",
    "    print(\"CSV Attributes::\")\n",
    "    pprint(csv_attr_dict)\n",
    "            \n",
    "\n",
    "\n",
    "def print_tabular_data(file_name,delimiter=\",\"):\n",
    "    \"\"\"This function extracts and prints tabular csv content from given filename\n",
    "       Details: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\n",
    "    Args:\n",
    "        file_name (str): file path to be read\n",
    "        delimiter (str): delimiter used in csv. Default is comma ('\\t')\n",
    "\n",
    "    Returns:\n",
    "        None\n",
    "\n",
    "    \"\"\"\n",
    "    df = pd.read_csv(file_name,sep=delimiter)\n",
    "    print(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parse using CSV module\n",
    "\n",
    "The print_basic_csv() function takes the input file name along with delimiter as input parameters."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sno', 'fruit', 'color', 'price']\n",
      "['1', 'apple', 'red', '110.85']\n",
      "['2', 'banana', 'yellow', '50.12']\n",
      "['3', 'mango', 'yellow', '70.29']\n",
      "['4', 'orange', 'orange', '80.00']\n",
      "['5', 'kiwi', 'green', '150.00']\n",
      "['6', 'pineapple', 'yellow', '90.00']\n",
      "['7', 'guava', 'green', '20.00']\n",
      "\n",
      "\n",
      "\n",
      "CSV Attributes::\n",
      "{'color': ['red', 'yellow', 'yellow', 'orange', 'green', 'yellow', 'green'],\n",
      " 'fruit': ['apple', 'banana', 'mango', 'orange', 'kiwi', 'pineapple', 'guava'],\n",
      " 'price': ['110.85', '50.12', '70.29', '80.00', '150.00', '90.00', '20.00'],\n",
      " 'sno': ['1', '2', '3', '4', '5', '6', '7']}\n"
     ]
    }
   ],
   "source": [
    "print_basic_csv(r'tabular_csv.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The first output in the above cell shows the data in the csv as-is.\n",
    "The second one is the parsed output showcasing the contents of the csv as key-value pairs\n",
    "\n",
    "\n",
    "--------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parse using pandas\n",
    "\n",
    "The print_tabular_data() function takes the input file name along with delimiter as input parameters. It uses pandas to do the heavy lifting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   sno      fruit   color   price\n",
      "0    1      apple     red  110.85\n",
      "1    2     banana  yellow   50.12\n",
      "2    3      mango  yellow   70.29\n",
      "3    4     orange  orange   80.00\n",
      "4    5       kiwi   green  150.00\n",
      "5    6  pineapple  yellow   90.00\n",
      "6    7      guava   green   20.00\n"
     ]
    }
   ],
   "source": [
    "print_tabular_data(r'tabular_csv.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The output in the above cell shows how pandas reads a csv and prepares a tabular dataframe"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}