{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import json\n", "import urllib\n", "from pandas import read_sql_query, read_csv\n", "import os\n", "\n", "from fuzzywuzzy import fuzz\n", "import re\n", "import sys\n", "\n", "from os.path import expanduser\n", "\n", "import sqlite3\n", "con = sqlite3.connect(expanduser(\"~\")+\"/notebooks/onsgeocodes.sqlite\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "REQUEST = json.dumps({\n", "'path' : {},\n", "'args' : {}\n", "})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# GET /ons/:code\n", "request = json.loads(REQUEST)\n", "code = request['path'].get('code')\n", " \n", "q='SELECT * FROM codelist WHERE \"GEOGCD\"=\"{code}\"'.format(code=code)\n", "print('{\"codes\":%s}' % read_sql_query(q, con).to_json(orient='records'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# GET /ons/current/:name\n", "request = json.loads(REQUEST)\n", "name = request['path'].get('name')\n", "\n", "q='''\n", "SELECT * FROM codelist JOIN metadata \n", "WHERE \"GEOGNM\"=\"{name}\" AND codeAbbrv=sheet AND codelist.STATUS=\"live\"\n", "'''.format(name=name)\n", "\n", "print('{\"codes\":%s}' % read_sql_query(q, con).to_json(orient='records'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reconciliation demo" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "REQUEST = json.dumps({\n", "'path' : {'query':'Diana Abbot'},\n", "'args' : {}\n", "})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "RECONFILE=expanduser(\"~\")+\"/notebooks/mpnames.csv\"#os.getenv('RECONFILE', 'mpnames.csv')\n", "SEARCHCOL=os.getenv('SEARCHCOL','DisplayAs')\n", "IDCOL=os.getenv('IDCOL','Member_Id')\n", "\n", "\n", "# Matching threshold.\n", "match_threshold = 70\n", "\n", "# Basic service metadata. There are a number of other documented options\n", "# but this is all we need for a simple service.\n", "metadata = {\n", " \"name\": \"Person Reconciliation Service\",\n", " \"identifierSpace\": \"http://rdf.freebase.com/ns/type.object.id\",\n", " \"schemaSpace\": \"http://rdf.freebase.com/ns/type.object.id\",\n", " \"view\": {\n", " \"url\": \"http://127.0.0.1:8889{{id}}\"\n", " },\n", " \"preview\": {\n", " \"url\": \"http://127.0.0.1:8889{{id}}/preview\",\n", " \"width\": 430,\n", " \"height\": 300\n", " },\n", " \"defaultTypes\": [{\"id\": \"/people/person\", \"name\" : \"Person\"}],\n", "}\n", "\n", "\n", "# Read in person records from csv file.\n", "reader = read_csv(RECONFILE)\n", "records = reader.to_dict(orient='records')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def search(query):\n", "\n", " # Initialize matches.\n", " matches = []\n", "\n", " # Search person records for matches.\n", " for r in records:\n", " score = fuzz.token_set_ratio(query, r[SEARCHCOL])\n", " \n", " if score > match_threshold:\n", " matches.append({\n", " \"id\": r[IDCOL],\n", " \"name\": r[SEARCHCOL],\n", " \"score\": score,\n", " \"match\": query == r[SEARCHCOL],\n", " \"type\": [{\"id\": \"/people/person\", \"name\": \"Person\"}]\n", " })\n", " \n", " #print(sys.stderr, matches)\n", " return matches" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# GET /parliament/reconcile/\n", "request = json.loads(REQUEST)\n", "\n", "#http://foo.com/bar/reconcile?queries={ \"q0\" : { \"query\" : \"foo\" }, \"q1\" : { \"query\" : \"bar\" } }\n", "\n", "callback = request['args'].get('callback')\n", "queries = request['args'].get('queries')\n", "\n", "if callback:\n", " response = \"%s(%s)\" % (callback[0], json.dumps(metadata))\n", " print(response)\n", "elif queries:\n", " results = {}\n", " queries = json.loads(urllib.parse.unquote((queries[0])))\n", " if isinstance(queries,dict): \n", " results = {}\n", " for key in queries:\n", " results[key] = {\"result\": search(queries[key]['query'])}\n", " print(json.dumps(results))\n", " else:\n", " print(json.dumps({\"result\": []}))\n", "else:\n", " print(json.dumps(metadata))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }