{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "objective-choir", "metadata": {}, "outputs": [], "source": [ "# Get the latest kgtk-browser from GitHub\n", "!rm -rf kgtk-browser && git clone https://github.com/usc-isi-i2/kgtk-browser\n", "\n", "# Install the requirements (i.e. kgtk)\n", "!pip install -r kgtk-browser/requirements.txt" ] }, { "cell_type": "code", "execution_count": 15, "id": "adjusted-moldova", "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'google'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msqlite3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolab\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0meval_js\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'google'" ] } ], "source": [ "from kgtk.functions import kgtk, kypher\n", "from kgtk.configure_kgtk_notebooks import ConfigureKGTK\n", "import json\n", "import os, sqlite3\n", "from google.colab.output import eval_js" ] }, { "cell_type": "code", "execution_count": 4, "id": "inclusive-assurance", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "188" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "extra_files_config = {\n", " \"claims\": \"claims.tsv.gz\", \n", " \"labels\": \"labels.tsv.gz\",\n", " \"aliases\": \"aliases.tsv.gz\", \n", " \"descriptions\": \"descriptions.tsv.gz\",\n", " \"qualifiers\": \"qualifiers.tsv.gz\",\n", " \"metadata\": \"metadata.tsv.gz\"\n", "}\n", "\n", "open('/tmp/extra_files.json', 'w').write(json.dumps(extra_files_config))" ] }, { "cell_type": "code", "execution_count": null, "id": "direct-element", "metadata": {}, "outputs": [], "source": [ "files = ['claims', 'labels', 'aliases', 'descriptions', 'qualifiers', 'metadata']\n", "\n", "input_files_url=\"https://github.com/usc-isi-i2/kgtk-tutorial-files/raw/main/datasets/kgtk-browser\"\n", "\n", "input_path = None\n", "output_path = '/tmp/projects'\n", "project_name = 'kgtk-browser'\n", "\n", "\n", "# Minimal KGTK configuration for this example\n", "ck = ConfigureKGTK(files, input_files_url=input_files_url)\n", "ck.configure_kgtk(input_graph_path=input_path,\n", " output_path=output_path,\n", " project_name=project_name,\n", " json_config_file = '/tmp/extra_files.json')\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "compound-number", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "USE_CASES_DIR: /Users/amandeep/Github/kgtk-notebooks/use-cases\n", "OUT: /tmp/projects/kgtk-browser\n", "TEMP: /tmp/projects/kgtk-browser/temp.kgtk-browser\n", "STORE: /tmp/projects/kgtk-browser/temp.kgtk-browser/wikidata.sqlite3.db\n", "GRAPH: /Users/amandeep/isi-kgtk-tutorial/input\n", "kgtk: kgtk\n", "kypher: kgtk query --graph-cache /tmp/projects/kgtk-browser/temp.kgtk-browser/wikidata.sqlite3.db\n", "EXAMPLES_DIR: /Users/amandeep/Github/kgtk-notebooks/examples\n", "claims: /Users/amandeep/isi-kgtk-tutorial/input/claims.tsv.gz\n", "labels: /Users/amandeep/isi-kgtk-tutorial/input/labels.tsv.gz\n", "aliases: /Users/amandeep/isi-kgtk-tutorial/input/aliases.tsv.gz\n", "descriptions: /Users/amandeep/isi-kgtk-tutorial/input/descriptions.tsv.gz\n", "qualifiers: /Users/amandeep/isi-kgtk-tutorial/input/qualifiers.tsv.gz\n", "metadata: /Users/amandeep/isi-kgtk-tutorial/input/metadata.tsv.gz\n" ] } ], "source": [ "# Review all of the environment variables used:\n", "\n", "ck.print_env_variables()" ] }, { "cell_type": "code", "execution_count": 8, "id": "configured-processor", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "kgtk query --graph-cache /tmp/projects/kgtk-browser/temp.kgtk-browser/wikidata.sqlite3.db -i \"/Users/amandeep/isi-kgtk-tutorial/input/claims.tsv.gz\" --as claims -i \"/Users/amandeep/isi-kgtk-tutorial/input/labels.tsv.gz\" --as labels -i \"/Users/amandeep/isi-kgtk-tutorial/input/aliases.tsv.gz\" --as aliases -i \"/Users/amandeep/isi-kgtk-tutorial/input/descriptions.tsv.gz\" --as descriptions -i \"/Users/amandeep/isi-kgtk-tutorial/input/qualifiers.tsv.gz\" --as qualifiers -i \"/Users/amandeep/isi-kgtk-tutorial/input/metadata.tsv.gz\" --as metadata --limit 3\n", "node1\tlabel\tnode2\tid\n", "P10\tP31\tQ18610173\tP10-P31-Q18610173-85ef4d24-0\n", "P10\tPdirected_pagerank\t2.2996980087906936e-06\tP10-Pdirected_pagerank-2\n", "P10\tPout_degree\t1\tP10-Pout_degree-6b86b2\n" ] } ], "source": [ "ck.load_files_into_cache()" ] }, { "cell_type": "code", "execution_count": 11, "id": "considerable-schema", "metadata": {}, "outputs": [], "source": [ "conn = sqlite3.connect(os.environ['STORE'])" ] }, { "cell_type": "code", "execution_count": null, "id": "dimensional-bench", "metadata": {}, "outputs": [], "source": [ "# Build the \"node1;upper\" column in the label table\n", "conn.execute('ALTER TABLE graph_2 ADD COLUMN \"node1;upper\" text')\n", "conn.commit()\n", "\n", "conn.execute('UPDATE graph_2 SET \"node1;upper\" = upper(node1)')\n", "conn.commit()\n", "\n", "# Index the node1;upper column. It supports case-insensitive searches on item labels\n", "conn.execute('CREATE INDEX \"graph_2_node1upper_idx\" on graph_2 (\"node1;upper\")')\n", "conn.commit()\n", "\n", "conn.execute('ANALYZE \"graph_2_node1upper_idx\"')\n", "conn.commit()\n", "\n", "# Build the \"node2;upper\" column in the label table\n", "conn.execute('ALTER TABLE graph_2 ADD COLUMN \"node2;upper\" text')\n", "conn.commit()\n", "\n", "conn.execute('UPDATE graph_2 SET \"node2;upper\" = upper(node2)')\n", "conn.commit()\n", "\n", "# Index the node2;upper column. It supports case-insensitive searches on item labels\n", "conn.execute('CREATE INDEX \"graph_2_node2upper_idx\" on graph_2 (\"node2;upper\")')\n", "conn.commit()\n", "\n", "conn.execute('ANALYZE \"graph_2_node2upper_idx\"')\n", "conn.commit()\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "fuzzy-directive", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Graph Cache:\n", "DB file: /tmp/projects/kgtk-browser/temp.kgtk-browser/wikidata.sqlite3.db\n", " size: 171.91 MB \tfree: 0 Bytes \tmodified: 2021-10-23 14:08:00\n", "\n", "KGTK File Information:\n", "aliases:\n", " size: 1.28 MB \tmodified: 2021-10-23 14:06:10 \tgraph: graph_3\n", "claims:\n", " size: 31.25 MB \tmodified: 2021-10-23 14:06:06 \tgraph: graph_1\n", "descriptions:\n", " size: 1.28 MB \tmodified: 2021-10-23 14:06:11 \tgraph: graph_4\n", "labels:\n", " size: 1.02 MB \tmodified: 2021-10-23 14:06:08 \tgraph: graph_2\n", "metadata:\n", " size: 20.69 KB \tmodified: 2021-10-23 14:06:14 \tgraph: graph_6\n", "qualifiers:\n", " size: 5.73 MB \tmodified: 2021-10-14 16:04:41 \tgraph: graph_5\n", "\n", "Graph Table Information:\n", "graph_1:\n", " size: 118.56 MB \tcreated: 2021-10-23 14:07:06\n", " header: ['node1', 'label', 'node2', 'id']\n", "graph_2:\n", " size: 4.09 MB \tcreated: 2021-10-23 14:07:07\n", " header: ['node1', 'label', 'node2', 'id']\n", "graph_3:\n", " size: 5.27 MB \tcreated: 2021-10-23 14:07:08\n", " header: ['node1', 'label', 'node2', 'id']\n", "graph_4:\n", " size: 5.80 MB \tcreated: 2021-10-23 14:07:08\n", " header: ['node1', 'label', 'node2', 'id']\n", "graph_5:\n", " size: 32.21 MB \tcreated: 2021-10-23 14:07:14\n", " header: ['node1', 'label', 'node2', 'id']\n", "graph_6:\n", " size: 184.00 KB \tcreated: 2021-10-23 14:07:14\n", " header: ['node1', 'label', 'node2', 'id']\n", "\n" ] } ], "source": [ "# Verify that the graph cache has loaded as expected\n", "kgtk(\"\"\"\n", " query --show-cache \\\n", " --graph-cache ${STORE}\n", "\"\"\")" ] }, { "cell_type": "code", "execution_count": null, "id": "objective-immigration", "metadata": {}, "outputs": [], "source": [ "# Get a unique link pointing to this google colab environment\n", "\n", "print(eval_js(\"google.colab.kernel.proxyPort(8000)\") + \"browser\")" ] }, { "cell_type": "code", "execution_count": null, "id": "viral-diving", "metadata": {}, "outputs": [], "source": [ "# Set the environment variables for flask\n", "os.environ['FLASK_ENV'] = 'production'\n", "os.environ['FLASK_APP'] = 'kgtk-browser/kgtk_browser_app.py'\n", "os.environ['KGTK_BROWSER_CONFIG'] = 'kgtk_browser_config.py'\n", "\n", "# Run the kgtk-browser app\n", "!flask run --port 8000" ] } ], "metadata": { "kernelspec": { "display_name": "kgtk-env", "language": "python", "name": "kgtk-env" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" } }, "nbformat": 4, "nbformat_minor": 5 }