{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import json\n", "from collections import OrderedDict\n", "import pprint" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "s = r'{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(s)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "d = json.loads(s)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'あ'}\n" ] } ], "source": [ "pprint.pprint(d, width=40)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(d))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "od = json.loads(s, object_pairs_hook=OrderedDict)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OrderedDict([('C', 'あ'),\n", " ('A', OrderedDict([('i', 1), ('j', 2)])),\n", " ('B',\n", " [OrderedDict([('X', 1), ('Y', 10)]),\n", " OrderedDict([('X', 2), ('Y', 20)])])])\n" ] } ], "source": [ "pprint.pprint(od)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "b = s.encode()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'{\"C\": \"\\\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}'\n" ] } ], "source": [ "print(b)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(b))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "db = json.loads(b)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'あ'}\n" ] } ], "source": [ "pprint.pprint(db, width=40)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(db))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sb = b.decode()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(sb)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(sb))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dsb = json.loads(sb)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'あ'}\n" ] } ], "source": [ "pprint.pprint(dsb, width=40)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(dsb))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sb_u = b.decode('unicode-escape')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"あ\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(sb_u)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(sb_u))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dsb_u = json.loads(sb_u)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'あ'}\n" ] } ], "source": [ "pprint.pprint(dsb_u, width=40)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(dsb_u))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "with open('data/src/test.json') as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('data/src/test.json') as f:\n", " df = json.load(f)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'あ'}\n" ] } ], "source": [ "pprint.pprint(df, width=40)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(df))" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'あ'}\n" ] } ], "source": [ "pprint.pprint(d, width=40)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'i': 1, 'j': 2}\n" ] } ], "source": [ "print(d['A'])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "source": [ "print(d['A']['i'])" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'X': 1, 'Y': 10}, {'X': 2, 'Y': 20}]\n" ] } ], "source": [ "print(d['B'])" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'X': 1, 'Y': 10}\n" ] } ], "source": [ "print(d['B'][0])" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n" ] } ], "source": [ "print(d['B'][0]['X'])" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20\n" ] } ], "source": [ "value = d['B'][1]['Y']\n", "print(value)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# print(d['D'])\n", "# KeyError: 'D'" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n" ] } ], "source": [ "print(d.get('D'))" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'ん'}\n" ] } ], "source": [ "d['C'] = 'ん'\n", "pprint.pprint(d, width=40)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}]}\n" ] } ], "source": [ "d.pop('C')\n", "pprint.pprint(d, width=40)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': {'i': 1, 'j': 2},\n", " 'B': [{'X': 1, 'Y': 10},\n", " {'X': 2, 'Y': 20}],\n", " 'C': 'あ'}\n" ] } ], "source": [ "d['C'] = 'あ'\n", "pprint.pprint(d, width=40)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sd = json.dumps(d)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}], \"C\": \"\\u3042\"}\n" ] } ], "source": [ "print(sd)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(sd))" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OrderedDict([('C', 'あ'),\n", " ('A', OrderedDict([('i', 1), ('j', 2)])),\n", " ('B',\n", " [OrderedDict([('X', 1), ('Y', 10)]),\n", " OrderedDict([('X', 2), ('Y', 20)])])])\n" ] } ], "source": [ "pprint.pprint(od)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sod = json.dumps(od)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(sod)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(sod))" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"A\":{\"i\":1,\"j\":2},\"B\":[{\"X\":1,\"Y\":10},{\"X\":2,\"Y\":20}],\"C\":\"\\u3042\"}\n" ] } ], "source": [ "print(json.dumps(d, separators=(',', ':')))" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"A\"-> {\"i\"-> 1 / \"j\"-> 2} / \"B\"-> [{\"X\"-> 1 / \"Y\"-> 10} / {\"X\"-> 2 / \"Y\"-> 20}] / \"C\"-> \"\\u3042\"}\n" ] } ], "source": [ "print(json.dumps(d, separators=(' / ', '-> ')))" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"A\": {\n", " \"i\": 1,\n", " \"j\": 2\n", " },\n", " \"B\": [\n", " {\n", " \"X\": 1,\n", " \"Y\": 10\n", " },\n", " {\n", " \"X\": 2,\n", " \"Y\": 20\n", " }\n", " ],\n", " \"C\": \"\\u3042\"\n", "}\n" ] } ], "source": [ "print(json.dumps(d, indent=4))" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(json.dumps(od))" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}], \"C\": \"\\u3042\"}\n" ] } ], "source": [ "print(json.dumps(od, sort_keys=True))" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"あ\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(json.dumps(od, ensure_ascii=False))" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('data/dst/test2.json', 'w') as f:\n", " json.dump(d, f, indent=4)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"A\": {\n", " \"i\": 1,\n", " \"j\": 2\n", " },\n", " \"B\": [\n", " {\n", " \"X\": 1,\n", " \"Y\": 10\n", " },\n", " {\n", " \"X\": 2,\n", " \"Y\": 20\n", " }\n", " ],\n", " \"C\": \"\\u3042\"\n", "}\n" ] } ], "source": [ "with open('data/dst/test2.json') as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": true }, "outputs": [], "source": [ "d_new = {'A': 100, 'B': 'abc', 'C': 'あいうえお'}" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('data/dst/test_new.json', 'w') as f:\n", " json.dump(d_new, f, indent=2, ensure_ascii=False)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"A\": 100,\n", " \"B\": \"abc\",\n", " \"C\": \"あいうえお\"\n", "}\n" ] } ], "source": [ "with open('data/dst/test_new.json') as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('data/dst/test_new.json') as f:\n", " d_update = json.load(f, object_pairs_hook=OrderedDict)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OrderedDict([('A', 100), ('B', 'abc'), ('C', 'あいうえお')])\n" ] } ], "source": [ "print(d_update)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": true }, "outputs": [], "source": [ "d_update['A'] = 200\n", "d_update.pop('B')\n", "d_update['D'] = 'new value'" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OrderedDict([('A', 200), ('C', 'あいうえお'), ('D', 'new value')])\n" ] } ], "source": [ "print(d_update)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('data/dst/test_new_update.json', 'w') as f:\n", " json.dump(d_update, f, indent=2, ensure_ascii=False)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"A\": 200,\n", " \"C\": \"あいうえお\",\n", " \"D\": \"new value\"\n", "}\n" ] } ], "source": [ "with open('data/dst/test_new_update.json') as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "with open('data/src/test.json') as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"あ\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "with open('data/src/test.json', encoding='unicode-escape') as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'{\"C\": \"\\\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}'\n" ] } ], "source": [ "print(b)" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"\\u3042\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(b.decode())" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"C\": \"あ\", \"A\": {\"i\": 1, \"j\": 2}, \"B\": [{\"X\": 1, \"Y\": 10}, {\"X\": 2, \"Y\": 20}]}\n" ] } ], "source": [ "print(b.decode(encoding='unicode-escape'))" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": true }, "outputs": [], "source": [ "d = {\"A\": 100, \"B\": 'abc', \"C\": 'あいうえお'}" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': 100, 'B': 'abc', 'C': 'あいうえお'}\n" ] } ], "source": [ "print(str(d))" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# print(json.loads(str(d)))\n", "# JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"A\": 100, \"B\": \"abc\", \"C\": \"\\u3042\\u3044\\u3046\\u3048\\u304a\"}\n" ] } ], "source": [ "print(json.dumps(d))" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'A': 100, 'B': 'abc', 'C': 'あいうえお'}\n" ] } ], "source": [ "print(json.loads(json.dumps(d)))" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(json.loads(json.dumps(d))))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }