{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Handling JSON Files\n",
    "\n",
    "This notebook showcases methods to read JSON type data:\n",
    "+ using python's inbuilt utilities\n",
    "+ using pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# import required libraries\n",
    "import json\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Utilities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def print_nested_dicts(nested_dict,indent_level=0):\n",
    "    \"\"\"This function prints a nested dict object\n",
    "    Args:\n",
    "        nested_dict (dict): the dictionary to be printed\n",
    "        indent_level (int): the indentation level for nesting\n",
    "    Returns:\n",
    "        None\n",
    "\n",
    "    \"\"\"\n",
    "    \n",
    "    for key, val in nested_dict.items():\n",
    "        if isinstance(val, dict):\n",
    "          print(\"{0} : \".format(key))\n",
    "          print_nested_dicts(val,indent_level=indent_level+1)\n",
    "        elif isinstance(val,list):\n",
    "            print(\"{0} : \".format(key))\n",
    "            for rec in val:\n",
    "                print_nested_dicts(rec,indent_level=indent_level+1)\n",
    "        else:\n",
    "          print(\"{0}{1} : {2}\".format(\"\\t\"*indent_level,key, val))\n",
    "\n",
    "def extract_json(file_name,do_print=True):\n",
    "    \"\"\"This function extracts and prints json content from a given file\n",
    "    Args:\n",
    "        file_name (str): file path to be read\n",
    "        do_print (bool): boolean flag to print file contents or not\n",
    "\n",
    "    Returns:\n",
    "        None\n",
    "\n",
    "    \"\"\"\n",
    "    try:\n",
    "        json_filedata = open(file_name).read() \n",
    "        json_data = json.loads(json_filedata)\n",
    "        \n",
    "        if do_print:\n",
    "            print_nested_dicts(json_data)\n",
    "    except IOError:\n",
    "        raise IOError(\"File path incorrect/ File not found\")\n",
    "    except ValueError:\n",
    "        ValueError(\"JSON file has errors\")\n",
    "    except Exception:\n",
    "        raise\n",
    "\n",
    "def extract_pandas_json(file_name,orientation=\"records\",do_print=True):\n",
    "    \"\"\"This function extracts and prints json content from a file using pandas\n",
    "       This is useful when json data represents tabular, series information\n",
    "    Args:\n",
    "        file_name (str): file path to be read\n",
    "        orientation (str): orientation of json file. Defaults to records\n",
    "        do_print (bool): boolean flag to print file contents or not\n",
    "\n",
    "    Returns:\n",
    "        None\n",
    "\n",
    "    \"\"\"\n",
    "    try:\n",
    "        df = pd.read_json(file_name,orient=orientation)\n",
    "        \n",
    "        if do_print:\n",
    "            print(df)\n",
    "    except IOError:\n",
    "        raise IOError(\"File path incorrect/ File not found\")\n",
    "    except ValueError:\n",
    "        ValueError(\"JSON file has errors\")\n",
    "    except Exception:\n",
    "        raise"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parse using json module\n",
    "\n",
    "The extract_json() function takes the input file name as input parameter."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "outer_col_2 : \n",
      "\tinner_col_1 : 3\n",
      "outer_col_1 : \n",
      "\tnested_inner_col_1 : val_1\n",
      "\tnested_inner_col_2 : 2\n",
      "\tnested_inner_col_1 : val_2\n",
      "\tnested_inner_col_2 : 2\n",
      "outer_col_3 : 4\n"
     ]
    }
   ],
   "source": [
    "extract_json(r'sample_json.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The function generates a nested output resembling the structure of the JSON itself where outer_col_1's value is a nested object in itself\n",
    "\n",
    "\n",
    "--------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parse using pandas\n",
    "\n",
    "The extract_pandas_json() function takes the input file name  as input parameter. It uses pandas to do the heavy lifting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  col_1 col_2\n",
      "0     a     b\n",
      "1     c     d\n",
      "2     e     f\n",
      "3     g     h\n",
      "4     i     j\n",
      "5     k     l\n"
     ]
    }
   ],
   "source": [
    "extract_pandas_json(r'pandas_json.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The output in the above cell shows how pandas reads a JSON and prepares a tabular dataframe"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}