{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "From | to | Use \n", ":--- | :--- | :---\n", "JSON | DataFrame | pandas.read_json()\n", "DataFrame | JSON | df.to_json() or\n", "| | df.to_json(orient='records')\n", "JSON | Python object | json.loads()\n", "Python object | JSON | json.dumps()\n", "\n", "Ref:-\n", "* Python for Data Analysis book by Wes McKinney, Second Edition, Section 6.1 Reading and Writing Data in Text Format -> JSON Data\n", "* https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "obj = \"\"\"\n", "{\"name\": \"Wes\",\n", " \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n", " \"pet\": null,\n", " \"siblings\": [{\"name\": \"Scott\", \"age\": 30, \"pets\": [\"Zeus\", \"Zuko\"]},\n", " {\"name\": \"Katie\", \"age\": 38,\n", " \"pets\": [\"Sixes\", \"Stache\", \"Cisco\"]}]\n", "}\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "{'name': 'Wes',\n", " 'pet': None,\n", " 'places_lived': ['United States', 'Spain', 'Germany'],\n", " 'siblings': [{'age': 30, 'name': 'Scott', 'pets': ['Zeus', 'Zuko']},\n", " {'age': 38, 'name': 'Katie', 'pets': ['Sixes', 'Stache', 'Cisco']}]}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use json.loads to read JSON data\n", "\n", "import json\n", "result = json.loads(obj)\n", "print(type(result))\n", "result" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'{\"name\": \"Wes\", \"places_lived\": [\"United States\", \"Spain\", \"Germany\"], \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\": 30, \"pets\": [\"Zeus\", \"Zuko\"]}, {\"name\": \"Katie\", \"age\": 38, \"pets\": [\"Sixes\", \"Stache\", \"Cisco\"]}]}'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Use json.dumps to convert a Python object back to JSON\n", "\n", "asjson = json.dumps(result)\n", "asjson" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameage
0Scott30
1Katie38
\n", "
" ], "text/plain": [ " name age\n", "0 Scott 30\n", "1 Katie 38" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# You can pass a list of dicts (which were previously JSON objects)\n", "# to the DataFrame constructor and select a subset of the data fields\n", "import pandas as pd\n", "siblings = pd.DataFrame(result['siblings'], columns=['name', 'age'])\n", "siblings" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{\"a\": 1, \"b\": 2, \"c\": 3},\n", " {\"a\": 4, \"b\": 5, \"c\": 6},\n", " {\"a\": 7, \"b\": 8, \"c\": 9}]\n" ] } ], "source": [ "# Use pandas.read_json to automatically convert JSON datasets into a Series or DataFrame\n", "!cat examples/example.json" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0123
1456
2789
\n", "
" ], "text/plain": [ " a b c\n", "0 1 2 3\n", "1 4 5 6\n", "2 7 8 9" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# The default options for pandas.read_json assume that each object in the JSON array\n", "# is a row in the table:\n", "data = pd.read_json('examples/example.json')\n", "data" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"a\":{\"0\":1,\"1\":4,\"2\":7},\"b\":{\"0\":2,\"1\":5,\"2\":8},\"c\":{\"0\":3,\"1\":6,\"2\":9}}\n" ] } ], "source": [ "# To export the data from pandas to JSON, use the to_json method\n", "print(data.to_json())" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{\"a\":1,\"b\":2,\"c\":3},{\"a\":4,\"b\":5,\"c\":6},{\"a\":7,\"b\":8,\"c\":9}]\n" ] } ], "source": [ "print(data.to_json(orient='records'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Features of the input\n", "* some cells of the dataframe are arrays\n", "* some elements in those arrays are None" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "df: \n", " day month\n", "0 sun [jan]\n", "1 mon [feb, mar]\n", "2 tue [apr, None] \n", "\n", "json: \n", " [{\"day\":\"sun\",\"month\":[\"jan\"]},{\"day\":\"mon\",\"month\":[\"feb\",\"mar\"]},{\"day\":\"tue\",\"month\":[\"apr\",null]}] \n", "\n", "df2: \n", " day month\n", "0 sun [jan]\n", "1 mon [feb, mar]\n", "2 tue [apr, None] \n", "\n" ] } ], "source": [ "import pandas as pd\n", "df = pd.DataFrame([{'day': 'sun', 'month': ['jan']},\n", " {'day': 'mon', 'month': ['feb', 'mar']},\n", " {'day': 'tue', 'month': ['apr', None]}])\n", "print('df: \\n', df, '\\n')\n", "\n", "json = df.to_json(orient='records')\n", "print('json: \\n', json, '\\n')\n", "# print(type(json))\n", "\n", "# Todo:- Is there a need to use orient='records' when calling read_json? Find out later.\n", "# df2 = pd.read_json(json, orient='records')\n", "df2 = pd.read_json(json)\n", "print('df2: \\n', df2, '\\n')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }