{ "cells": [ { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:19.036357Z", "start_time": "2019-09-23T18:50:19.031896Z" } }, "source": [ "# Querying\n", "\n", "This notebook demonstrates how to retrieve, query and search data using the Forge." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:20.068658Z", "start_time": "2019-09-23T18:50:19.054054Z" } }, "outputs": [], "source": [ "from kgforge.core import KnowledgeGraphForge" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "forge = KnowledgeGraphForge(\"../../configurations/demo-forge.yml\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from kgforge.core import Resource" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Retrieval" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "jane = Resource(type=\"Person\", name=\"Jane Doe\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _register_one\n", " True\n" ] } ], "source": [ "forge.register(jane)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "resource = forge.retrieve(jane.id)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "resource == jane" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### specific version" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "jane = Resource(type=\"Person\", name=\"Jane Doe\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _register_one\n", " True\n" ] } ], "source": [ "forge.register(jane)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _tag_one\n", " True\n" ] } ], "source": [ "forge.tag(jane, \"v1\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "jane.email = \"jane.doe@epfl.ch\"" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _update_one\n", " True\n" ] } ], "source": [ "forge.update(jane)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:21.317601Z", "start_time": "2019-09-23T18:50:21.310418Z" } }, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jane._store_metadata.version" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:21.332678Z", "start_time": "2019-09-23T18:50:21.322025Z" } }, "outputs": [], "source": [ "jane_v1 = forge.retrieve(jane.id, version=1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:21.370051Z", "start_time": "2019-09-23T18:50:21.363782Z" } }, "outputs": [], "source": [ "jane_v1_tag = forge.retrieve(jane.id, version=\"v1\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:21.379911Z", "start_time": "2019-09-23T18:50:21.373539Z" } }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "jane_v1 == jane_v1_tag" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " id: 9e689d89-3425-46d8-95e7-7058e9724ea4\n", " type: Person\n", " name: Jane Doe\n", "}\n" ] } ], "source": [ "print(jane_v1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### error handling" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " retrieve\n", " RetrievalError: resource not found\n", "\n" ] } ], "source": [ "resource = forge.retrieve(\"123\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n" ] } ], "source": [ "print(resource)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Searching" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "jane = Resource(type=\"Person\", name=\"Jane Doe\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "john = Resource(type=\"Person\", name=\"John Smith\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "association_jane = Resource(type=\"Association\", agent=jane)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "association_john = Resource(type=\"Association\", agent=john)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "associations = [association_jane, association_john]" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2\n", " _register_one\n", " True\n" ] } ], "source": [ "forge.register(associations)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`Association` is a known type by the Model, and the `paths` method will load the data structure for the given type. Refer to the `11 - Modeling.ipynb` notebook to learn about Modeling and Types." ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " DemoModel does not distinguish values and constraints in templates for now.\n", " DemoModel does not automatically include nested schemas for now.\n" ] } ], "source": [ "p = forge.paths(\"Association\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You have autocompletion on `p` and this can be used to build a search." ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " DemoStore does not support handling of errors with QueryingError for now.\n", " DemoStore does not support traversing lists for now.\n" ] } ], "source": [ "resources = forge.search(p.type == \"Association\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "list" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(resources)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(resources)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "kgforge.core.resource.Resource" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(resources[0])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtypeagent.typeagent.name
040cf0989-175a-4948-ae49-014546efbd8eAssociationPersonJane Doe
1b5c39b73-cc11-4339-8c70-47f802f8ddafAssociationPersonJohn Smith
\n", "
" ], "text/plain": [ " id type agent.type agent.name\n", "0 40cf0989-175a-4948-ae49-014546efbd8e Association Person Jane Doe\n", "1 b5c39b73-cc11-4339-8c70-47f802f8ddaf Association Person John Smith" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "forge.as_dataframe(resources)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtypeagent.typeagent.namedeprecatedversion
040cf0989-175a-4948-ae49-014546efbd8eAssociationPersonJane DoeFalse1
1b5c39b73-cc11-4339-8c70-47f802f8ddafAssociationPersonJohn SmithFalse1
\n", "
" ], "text/plain": [ " id type agent.type agent.name \\\n", "0 40cf0989-175a-4948-ae49-014546efbd8e Association Person Jane Doe \n", "1 b5c39b73-cc11-4339-8c70-47f802f8ddaf Association Person John Smith \n", "\n", " deprecated version \n", "0 False 1 \n", "1 False 1 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "forge.as_dataframe(resources, store_metadata=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### nested field querying" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You have autocompletion on `p` but also on nested properties like `p.agent`." ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " DemoStore does not support handling of errors with QueryingError for now.\n", " DemoStore does not support traversing lists for now.\n" ] } ], "source": [ "resources = forge.search(p.type == \"Association\", p.agent.name == \"John Smith\")" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(resources)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "# print(resources[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Graph traversing\n", "\n", "SPARQL is used to traverse the graph. The user can provide simplified queries: no prefixes, no compacted URIs, nor full URIs is required, only the term (property) available in the Model to the user. Refer to the `11 - Modeling.ipynb` notebook to learn about Templates." ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "jane = Resource(type=\"Person\", name=\"Jane Doe\")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "john = Resource(type=\"Person\", name=\"John Smith\")" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "association = Resource(type=\"Association\", agent=[jane, john])" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _register_one\n", " True\n" ] } ], "source": [ "forge.register(association)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " DemoModel does not distinguish values and constraints in templates for now.\n", " DemoModel does not automatically include nested schemas for now.\n", "{\n", " type: Association\n", " agent:\n", " {\n", " type: Person\n", " name: hasattr\n", " }\n", "}\n" ] } ], "source": [ "forge.template(\"Association\")" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "query = \"\"\"\n", " SELECT ?x ?name\n", " WHERE {\n", " ?x a Association ;\n", " agent ?agent .\n", " ?agent name ?name .\n", " }\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "# resources = forge.sparql(query)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "# type(resources)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "# len(resources)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# type(resources[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### rewritten query display" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Submitted query:\n", " PREFIX prov: \n", " PREFIX rdf: \n", " PREFIX schema: \n", " \n", " SELECT ?x ?name\n", " WHERE {\n", " ?x rdf:type prov:Association ;\n", " prov:agent ?agent .\n", " ?agent schema:name ?name .\n", " }\n", "\n", " not_supported\n", " NotSupportedError: DemoStore is not supporting _sparql()\n", "\n" ] } ], "source": [ "forge.sparql(query, debug=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Downloading" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "jane = Resource(type=\"Person\", name=\"Jane Doe\")" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "associations.tsv\n", "persons.csv\n" ] } ], "source": [ "! ls -p ../../data | egrep -v /$" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "distribution = forge.attach(\"../../data\")" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "association = Resource(type=\"Association\", agent=jane, distribution=distribution)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "# forge.register(association)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "# forge.download(association, \"distribution.contentUrl\", \"./downloaded/\")" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "# ! ls ./downloaded/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "kgforge(v2)", "language": "python", "name": "kgforge" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 4 }