{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:19.036357Z",
     "start_time": "2019-09-23T18:50:19.031896Z"
    }
   },
   "source": [
    "# Querying\n",
    "\n",
    "This notebook demonstrates Nexus Forge data [querying features](https://nexus-forge.readthedocs.io/en/latest/interaction.html#querying)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:20.068658Z",
     "start_time": "2019-09-23T18:50:19.054054Z"
    }
   },
   "outputs": [],
   "source": [
    "from kgforge.core import KnowledgeGraphForge"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook [00-Initialization.ipynb](00%20-%20Initialization.ipynb)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "forge = KnowledgeGraphForge(\"../../configurations/forge.yml\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kgforge.core import Resource\n",
    "from kgforge.specializations.resources import Dataset\n",
    "from kgforge.core.wrappings.paths import Filter, FilterOperator"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Retrieval"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### latest version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\", award=[\"Nobel\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "resource = forge.retrieve(jane.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource == jane"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### specific version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\", award=[\"Nobel\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _tag_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.tag(jane, \"v1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane.email = [\"jane.doe@epfl.ch\", \"jane.doe@example.org\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _update_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.update(jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.317601Z",
     "start_time": "2019-09-23T18:50:21.310418Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    # DemoStore\n",
    "    print(jane._store_metadata.version)\n",
    "except:\n",
    "    # BlueBrainNexus\n",
    "    print(jane._store_metadata._rev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.332678Z",
     "start_time": "2019-09-23T18:50:21.322025Z"
    }
   },
   "outputs": [],
   "source": [
    "jane_v1 = forge.retrieve(jane.id, version=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.370051Z",
     "start_time": "2019-09-23T18:50:21.363782Z"
    }
   },
   "outputs": [],
   "source": [
    "jane_v1_tag = forge.retrieve(jane.id, version=\"v1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane_v1_rev = forge.retrieve(jane.id+\"?rev=1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.379911Z",
     "start_time": "2019-09-23T18:50:21.373539Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jane_v1 == jane_v1_tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jane_v1 == jane_v1_rev"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jane_v1 != jane"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    # DemoStore\n",
    "    print(jane_v1._store_metadata.version)\n",
    "except:\n",
    "    # BlueBrainNexus\n",
    "    print(jane_v1._store_metadata._rev)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### crossbucket retrieval\n",
    "It is possible to retrieve resources stored in buckets different then the configured one. The configured store should of course support it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "resource = forge.retrieve(jane.id, cross_bucket=True) # cross_bucket defaults to False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98',\n",
       " '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json',\n",
       " '_createdAt': '2023-05-22T08:57:55.754Z',\n",
       " '_createdBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/crisely09',\n",
       " '_deprecated': False,\n",
       " '_incoming': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98/incoming',\n",
       " '_outgoing': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98/outgoing',\n",
       " '_project': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/crisely09',\n",
       " '_rev': 3,\n",
       " '_schemaProject': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/crisely09',\n",
       " '_self': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98',\n",
       " '_updatedAt': '2023-05-22T08:57:56.380Z',\n",
       " '_updatedBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/crisely09'}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._store_metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Action(error=None, message=None, operation='retrieve', succeeded=True)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._last_action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._synchronized"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Retrieving using the resorce url\n",
    "One can also use the value of `_self` from ._stote_metadata to retrieve a resource"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import copy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "other_resource = copy.deepcopy(resource)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "other_resource.id = \"https://myincreadibleid-987654321\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(other_resource)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = other_resource._store_metadata['_self']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "same_resource_url = forge.retrieve(id=url)\n",
    "same_resource_id = forge.retrieve(id=other_resource.id)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Confirm they are the same"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "same_resource_id == same_resource_url"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Original source retrieval\n",
    "When using BlueBrainNexusStore, it is possible to retrieve resources' payload as they were registered (retrieve_source=True) without any changes related to store added metadata or JSONLD framing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "resource = forge.retrieve(jane.id, retrieve_source=False) # retrieve_source defaults to True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98',\n",
       " 'type': 'Person',\n",
       " 'award': 'Nobel',\n",
       " 'email': ['jane.doe@epfl.ch', 'jane.doe@example.org'],\n",
       " 'name': 'Jane Doe'}"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_json(resource)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98',\n",
       " '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json',\n",
       " '_createdAt': '2023-05-22T08:57:55.754Z',\n",
       " '_createdBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/crisely09',\n",
       " '_deprecated': False,\n",
       " '_incoming': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98/incoming',\n",
       " '_outgoing': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98/outgoing',\n",
       " '_project': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/crisely09',\n",
       " '_rev': 3,\n",
       " '_schemaProject': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/crisely09',\n",
       " '_self': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/4088d652-7b1c-42a9-9570-97ca23f4ec98',\n",
       " '_updatedAt': '2023-05-22T08:57:56.380Z',\n",
       " '_updatedBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/crisely09'}"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._store_metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Action(error=None, message=None, operation='retrieve', succeeded=True)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._last_action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._synchronized"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### error handling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> raise_for_status\n",
      "<error> HTTPError: 404 Client Error: Not Found for url: https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/%3A%2F%2F123\n",
      "\n"
     ]
    }
   ],
   "source": [
    "resource = forge.retrieve(\"123\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource is None"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Searching"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoModel and RdfModel schemas have not been synchronized yet. This section is to be run with RdfModel. Commented lines are for DemoModel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")\n",
    "contribution_jane = Resource(type=\"Contribution\", agent=jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "john = Resource(type=\"Person\", name=\"John Smith\")\n",
    "contribution_john = Resource(type=\"Contribution\", agent=john)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = Dataset(forge, type=\"Dataset\", contribution=[contribution_jane, contribution_john])\n",
    "dataset.add_distribution(\"../../data/associations.tsv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/5b59d793-d0de-42ec-a841-eac6e06707db',\n",
       " 'type': 'Dataset',\n",
       " 'contribution': [{'type': 'Contribution',\n",
       "   'agent': {'type': 'Person', 'name': 'Jane Doe'}},\n",
       "  {'type': 'Contribution', 'agent': {'type': 'Person', 'name': 'John Smith'}}],\n",
       " 'distribution': {'type': 'DataDownload',\n",
       "  'atLocation': {'type': 'Location',\n",
       "   'store': {'id': 'https://bluebrain.github.io/nexus/vocabulary/diskStorageDefault',\n",
       "    'type': 'DiskStorage',\n",
       "    '_rev': 1}},\n",
       "  'contentSize': {'unitCode': 'bytes', 'value': 477},\n",
       "  'contentUrl': 'https://sandbox.bluebrainnexus.io/v1/files/github-users/crisely09/1f6d9e0d-a4e3-46de-b742-3665af2c6b53',\n",
       "  'digest': {'algorithm': 'SHA-256',\n",
       "   'value': '789aa07948683fe036ac29811814a826b703b562f7d168eb70dee1fabde26859'},\n",
       "  'encodingFormat': 'text/tab-separated-values',\n",
       "  'name': 'associations.tsv'}}"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_json(dataset)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using resource paths as filters"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `paths` method load the template or property paths (ie. expected properties) for a given type.\n",
    "\n",
    "Please refer to the [Modeling.ipynb](11%20-%20Modeling.ipynb) notebook to learn about templates and types."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "p = forge.paths(\"Dataset\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Autocompletion is enabled on `p` and this can be used to create search filters."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: There is a known issue for RdfModel which requires using `p.type.id` instead of `p.type`."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "All [python comparison operators](https://www.w3schools.com/python/gloss_python_comparison_operators.asp) are supported."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.search(p.type.id==\"Person\", limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Albert Einstein</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Marie Curie</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id    type             name  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Person  Albert Einstein   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Person      Marie Curie   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Person         Jane Doe   \n",
       "\n",
       "  distribution.type distribution.atLocation.type  \\\n",
       "0               NaN                          NaN   \n",
       "1               NaN                          NaN   \n",
       "2      DataDownload                     Location   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                                NaN                                 NaN   \n",
       "1                                NaN                                 NaN   \n",
       "2                        DiskStorage                                 1.0   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                               NaN                             NaN   \n",
       "1                               NaN                             NaN   \n",
       "2                             bytes                           477.0   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                           NaN   \n",
       "1                           NaN   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "\n",
       "  distribution.encodingFormat distribution.name  \n",
       "0                         NaN               NaN  \n",
       "1                         NaN               NaN  \n",
       "2   text/tab-separated-values  associations.tsv  "
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>_constrainedBy</th>\n",
       "      <th>_createdAt</th>\n",
       "      <th>_createdBy</th>\n",
       "      <th>_deprecated</th>\n",
       "      <th>_incoming</th>\n",
       "      <th>_outgoing</th>\n",
       "      <th>_project</th>\n",
       "      <th>...</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Albert Einstein</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2022-07-20T15:30:01.482Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Marie Curie</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2022-07-20T15:30:01.467Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2022-07-20T15:27:50.435Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id    type             name  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Person  Albert Einstein   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Person      Marie Curie   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Person         Jane Doe   \n",
       "\n",
       "                                      _constrainedBy  \\\n",
       "0  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "1  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "2  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "\n",
       "                 _createdAt  \\\n",
       "0  2022-07-20T15:30:01.482Z   \n",
       "1  2022-07-20T15:30:01.467Z   \n",
       "2  2022-07-20T15:27:50.435Z   \n",
       "\n",
       "                                          _createdBy  _deprecated  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "\n",
       "                                           _incoming  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                           _outgoing  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                            _project  ...  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type distribution.atLocation.store._rev  \\\n",
       "0                                NaN                                NaN   \n",
       "1                                NaN                                NaN   \n",
       "2                        DiskStorage                                1.0   \n",
       "\n",
       "  distribution.contentSize.unitCode distribution.contentSize.value  \\\n",
       "0                               NaN                            NaN   \n",
       "1                               NaN                            NaN   \n",
       "2                             bytes                          477.0   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                           NaN   \n",
       "1                           NaN   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "\n",
       "  distribution.encodingFormat  distribution.name  \n",
       "0                         NaN                NaN  \n",
       "1                         NaN                NaN  \n",
       "2   text/tab-separated-values   associations.tsv  \n",
       "\n",
       "[3 rows x 27 columns]"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search results are not synchronized\n",
    "resources[0]._synchronized"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Using nested resource property"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Property autocompletion is available on a path `p` even for nested properties like `p.contribution`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat\n",
    "resources = forge.search(p.type.id == \"Dataset\", p.distribution.encodingFormat == \"text/tab-separated-values\", limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>contribution</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id     type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "\n",
       "                                        contribution distribution.type  \\\n",
       "0  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "1  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "2  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                     Location   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                   1   \n",
       "1                        DiskStorage                                   1   \n",
       "2                        DiskStorage                                   1   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                             bytes                             477   \n",
       "1                             bytes                             477   \n",
       "2                             bytes                             477   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                       SHA-256   \n",
       "1                       SHA-256   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "1  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "2  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "\n",
       "  distribution.encodingFormat distribution.name  \n",
       "0   text/tab-separated-values  associations.tsv  \n",
       "1   text/tab-separated-values  associations.tsv  \n",
       "2   text/tab-separated-values  associations.tsv  "
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using dictionaries as filters\n",
    "A dictionary can be provided for filters:\n",
    "* {'type': {'id':'Dataset'}} is equivalent to p.type.id==\"Dataset\"\n",
    "* only the '==' operator is supported\n",
    "* nested dict are supported\n",
    "* it is not mandatory for the provided properties and values to be defined in the forge model. Results will be retrieved if there are corresponding data in the store.\n",
    "\n",
    "This feature is not supported when using the DemoStore\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat\n",
    "# and created a given dateTime (by default, dateTime values should be signaled by the suffix \"^^xsd:dateTime\")\n",
    "filters = {\n",
    "           \"type\": \"Dataset\", \n",
    "           \"distribution\":{\"encodingFormat\":\"text/tab-separated-values\"},\n",
    "           \"_createdAt\":dataset._store_metadata._createdAt+\"^^xsd:dateTime\"\n",
    "          }\n",
    "resources = forge.search(filters, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>contribution</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>...</th>\n",
       "      <th>_createdBy</th>\n",
       "      <th>_deprecated</th>\n",
       "      <th>_incoming</th>\n",
       "      <th>_outgoing</th>\n",
       "      <th>_project</th>\n",
       "      <th>_rev</th>\n",
       "      <th>_schemaProject</th>\n",
       "      <th>_self</th>\n",
       "      <th>_updatedAt</th>\n",
       "      <th>_updatedBy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>1</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>2023-05-22T08:58:03.175Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id     type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "\n",
       "                                        contribution distribution.type  \\\n",
       "0  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                   1   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  ...  \\\n",
       "0                             bytes                             477  ...   \n",
       "\n",
       "                                          _createdBy _deprecated  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/realms/gi...       False   \n",
       "\n",
       "                                           _incoming  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                           _outgoing  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                            _project _rev  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...    1   \n",
       "\n",
       "                                      _schemaProject  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...   \n",
       "\n",
       "                                               _self  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                 _updatedAt                                         _updatedBy  \n",
       "0  2023-05-22T08:58:03.175Z  https://sandbox.bluebrainnexus.io/v1/realms/gi...  \n",
       "\n",
       "[1 rows x 27 columns]"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using built-in Filter objects"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Supported filter operators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['__eq__ (EQUAL)',\n",
       " '__ne__ (NOT_EQUAL)',\n",
       " '__lt__ (LOWER_THAN)',\n",
       " '__le__ (LOWER_OR_Equal_Than)',\n",
       " '__gt__ (GREATER_Than)',\n",
       " '__ge__ (GREATER_OR_Equal_Than)']"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[f\"{op.value} ({op.name})\" for op in FilterOperator] # These are equivalent to the Python comparison operators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat\n",
    "# and created a given dateTime (dateTime values should be signaled by the suffix \"^^xsd:dateTime\")\n",
    "filter_1 = Filter(operator=FilterOperator.EQUAL, path=[\"type\"], value=\"Dataset\")\n",
    "filter_2 = Filter(operator=FilterOperator.EQUAL, path=[\"distribution\",\"encodingFormat\"], value=\"text/tab-separated-values\")\n",
    "filter_3 = Filter(operator=FilterOperator.LOWER_OR_Equal_Than, path=[\"_createdAt\"], value=dataset._store_metadata._createdAt+\"^^xsd:dateTime\")\n",
    "\n",
    "resources = forge.search(filter_1, filter_2, filter_3, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>contribution</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>...</th>\n",
       "      <th>_createdBy</th>\n",
       "      <th>_deprecated</th>\n",
       "      <th>_incoming</th>\n",
       "      <th>_outgoing</th>\n",
       "      <th>_project</th>\n",
       "      <th>_rev</th>\n",
       "      <th>_schemaProject</th>\n",
       "      <th>_self</th>\n",
       "      <th>_updatedAt</th>\n",
       "      <th>_updatedBy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>1</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>2023-03-29T12:57:29.375Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>1</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>2023-05-22T08:24:43.121Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>1</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>2023-05-22T08:58:03.175Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id     type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "\n",
       "                                        contribution distribution.type  \\\n",
       "0  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "1  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "2  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                     Location   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                   1   \n",
       "1                        DiskStorage                                   1   \n",
       "2                        DiskStorage                                   1   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  ...  \\\n",
       "0                             bytes                             477  ...   \n",
       "1                             bytes                             477  ...   \n",
       "2                             bytes                             477  ...   \n",
       "\n",
       "                                          _createdBy _deprecated  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/realms/gi...       False   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/realms/gi...       False   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/realms/gi...       False   \n",
       "\n",
       "                                           _incoming  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                           _outgoing  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                            _project _rev  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...    1   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...    1   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...    1   \n",
       "\n",
       "                                      _schemaProject  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...   \n",
       "\n",
       "                                               _self  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                 _updatedAt                                         _updatedBy  \n",
       "0  2023-03-29T12:57:29.375Z  https://sandbox.bluebrainnexus.io/v1/realms/gi...  \n",
       "1  2023-05-22T08:24:43.121Z  https://sandbox.bluebrainnexus.io/v1/realms/gi...  \n",
       "2  2023-05-22T08:58:03.175Z  https://sandbox.bluebrainnexus.io/v1/realms/gi...  \n",
       "\n",
       "[3 rows x 27 columns]"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using search endpoints\n",
    "\n",
    "Two types of search endpoints are supported: 'sparql' (default) for graph queries and 'elastic' for document oriented queries. The types of available search endpoint can be configured (see [00-Initialization.ipynb](00%20-%20Initialization.ipynb) for an example of search endpoints config) or set when creating a KnowledgeGraphForge session using the 'searchendpoints' arguments.\n",
    "\n",
    "The search endpoint to hit when calling forge.search(...) is 'sparql' by default but can be specified using the 'search_endpoint' argument."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### SPARQL Search Endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Person\n",
    "filters = {\"type\": \"Person\"}\n",
    "resources = forge.search(filters, limit=3, search_endpoint='sparql')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>_constrainedBy</th>\n",
       "      <th>_createdAt</th>\n",
       "      <th>_createdBy</th>\n",
       "      <th>_deprecated</th>\n",
       "      <th>_incoming</th>\n",
       "      <th>_outgoing</th>\n",
       "      <th>_project</th>\n",
       "      <th>...</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Albert Einstein</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2022-07-20T15:30:01.482Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Marie Curie</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2022-07-20T15:30:01.467Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2022-07-20T15:27:50.435Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id    type             name  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Person  Albert Einstein   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Person      Marie Curie   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Person         Jane Doe   \n",
       "\n",
       "                                      _constrainedBy  \\\n",
       "0  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "1  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "2  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "\n",
       "                 _createdAt  \\\n",
       "0  2022-07-20T15:30:01.482Z   \n",
       "1  2022-07-20T15:30:01.467Z   \n",
       "2  2022-07-20T15:27:50.435Z   \n",
       "\n",
       "                                          _createdBy  _deprecated  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "\n",
       "                                           _incoming  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                           _outgoing  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                            _project  ...  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type distribution.atLocation.store._rev  \\\n",
       "0                                NaN                                NaN   \n",
       "1                                NaN                                NaN   \n",
       "2                        DiskStorage                                1.0   \n",
       "\n",
       "  distribution.contentSize.unitCode distribution.contentSize.value  \\\n",
       "0                               NaN                            NaN   \n",
       "1                               NaN                            NaN   \n",
       "2                             bytes                          477.0   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                           NaN   \n",
       "1                           NaN   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "\n",
       "  distribution.encodingFormat  distribution.name  \n",
       "0                         NaN                NaN  \n",
       "1                         NaN                NaN  \n",
       "2   text/tab-separated-values   associations.tsv  \n",
       "\n",
       "[3 rows x 27 columns]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ElasticSearch Endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Person and retrieve their ids and names.\n",
    "filters = {\"@type\": \"http://schema.org/Person\"}\n",
    "resources = forge.search(filters, limit=3, \n",
    "                         search_endpoint='elastic', \n",
    "                         includes=[\"@id\", \"@type\"]) # fields can also be excluded with 'excludes'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>http://schema.org/Person</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>http://schema.org/Person</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>http://schema.org/Person</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id                      type\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  http://schema.org/Person\n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  http://schema.org/Person\n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  http://schema.org/Person"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search results are not synchronized\n",
    "resources[0]._synchronized"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/e491b538-0fc4-4fae-b5f7-aaf80839dbe5'"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resources[0].id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'http://schema.org/Person'"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resources[0].type"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Crossbucket search\n",
    "It is possible to search for resources stored in buckets different than the configured one. The configured store should of course support it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.search(p.type.id == \"Association\", limit=3, cross_bucket=True)  # cross_bucket defaults to False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>agent.type</th>\n",
       "      <th>agent.gender.id</th>\n",
       "      <th>agent.gender.type</th>\n",
       "      <th>agent.gender.label</th>\n",
       "      <th>agent.name</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000383</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>female</td>\n",
       "      <td>Marie Curie</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>46.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>marie_curie.txt</td>\n",
       "      <td>Curie Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000384</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>male</td>\n",
       "      <td>Albert Einstein</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>50.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>albert_einstein.txt</td>\n",
       "      <td>Einstein Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>John Smith</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id         type agent.type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "\n",
       "                               agent.gender.id      agent.gender.type  \\\n",
       "0  http://purl.obolibrary.org/obo/PATO_0000383  LabeledOntologyEntity   \n",
       "1  http://purl.obolibrary.org/obo/PATO_0000384  LabeledOntologyEntity   \n",
       "2                                          NaN                    NaN   \n",
       "\n",
       "  agent.gender.label       agent.name distribution.type  \\\n",
       "0             female      Marie Curie      DataDownload   \n",
       "1               male  Albert Einstein      DataDownload   \n",
       "2                NaN       John Smith               NaN   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                          NaN   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                 1.0   \n",
       "1                        DiskStorage                                 1.0   \n",
       "2                                NaN                                 NaN   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                             bytes                            46.0   \n",
       "1                             bytes                            50.0   \n",
       "2                               NaN                             NaN   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                       SHA-256   \n",
       "1                       SHA-256   \n",
       "2                           NaN   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0  e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...   \n",
       "1  91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.encodingFormat    distribution.name                  name  \n",
       "0                  text/plain      marie_curie.txt     Curie Association  \n",
       "1                  text/plain  albert_einstein.txt  Einstein Association  \n",
       "2                         NaN                  NaN                   NaN  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Furthermore it is possible to filter by bucket when cross_bucket is set to True. Setting a bucket value when cross_bucket is False will trigger a not_supported exception.\n",
    "resources = forge.search(p.type.id == \"Person\", limit=3, cross_bucket=True, bucket=\"dke/kgforge\")  # add a bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: []"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Searching original source\n",
    "When using BlueBrainNexusStore, it is possible to retrieve resources' payload as they were registered (retrieve_source=True) without any changes related to store added metadata or JSONLD framing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.search(p.type.id == \"Association\", limit=3, retrieve_source=False)  # retrieve_source defaults to True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>agent.type</th>\n",
       "      <th>agent.gender.id</th>\n",
       "      <th>agent.gender.type</th>\n",
       "      <th>agent.gender.label</th>\n",
       "      <th>agent.name</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000383</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>female</td>\n",
       "      <td>Marie Curie</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>46.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>marie_curie.txt</td>\n",
       "      <td>Curie Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000384</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>male</td>\n",
       "      <td>Albert Einstein</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>50.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>albert_einstein.txt</td>\n",
       "      <td>Einstein Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>John Smith</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id         type agent.type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "\n",
       "                               agent.gender.id      agent.gender.type  \\\n",
       "0  http://purl.obolibrary.org/obo/PATO_0000383  LabeledOntologyEntity   \n",
       "1  http://purl.obolibrary.org/obo/PATO_0000384  LabeledOntologyEntity   \n",
       "2                                          NaN                    NaN   \n",
       "\n",
       "  agent.gender.label       agent.name distribution.type  \\\n",
       "0             female      Marie Curie      DataDownload   \n",
       "1               male  Albert Einstein      DataDownload   \n",
       "2                NaN       John Smith               NaN   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                          NaN   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                 1.0   \n",
       "1                        DiskStorage                                 1.0   \n",
       "2                                NaN                                 NaN   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                             bytes                            46.0   \n",
       "1                             bytes                            50.0   \n",
       "2                               NaN                             NaN   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                       SHA-256   \n",
       "1                       SHA-256   \n",
       "2                           NaN   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0  e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...   \n",
       "1  91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.encodingFormat    distribution.name                  name  \n",
       "0                  text/plain      marie_curie.txt     Curie Association  \n",
       "1                  text/plain  albert_einstein.txt  Einstein Association  \n",
       "2                         NaN                  NaN                   NaN  "
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Graph traversing\n",
    "\n",
    "SPARQL is used as a query language to perform graph traversing.\n",
    "\n",
    "Nexus Forge implements a SPARQL query rewriting strategy leveraging a configured RDFModel that lets users write SPARQL queries without adding prefix declarations, prefix names or long IRIs. With this strategy, only type and property names can be provided.\n",
    "\n",
    "Please refer to the [Modeling.ipynb](11%20-%20Modeling.ipynb) notebook to learn about templates."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoStore doesn't implement SPARQL operations yet. Please use another store for this section."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoModel and RdfModel schemas have not been synchronized yet. This section is to be run with RdfModel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")\n",
    "contribution_jane = Resource(type=\"Contribution\", agent=jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "john = Resource(type=\"Person\", name=\"John Smith\")\n",
    "contribution_john = Resource(type=\"Contribution\", agent=john)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "association = Resource(type=\"Dataset\", contribution=[contribution_jane, contribution_john])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(association)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    id: \"\"\n",
      "    type:\n",
      "    {\n",
      "        id: \"\"\n",
      "    }\n",
      "    annotation:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: Annotation\n",
      "        hasBody:\n",
      "        {\n",
      "            id: \"\"\n",
      "            type:\n",
      "            {\n",
      "                id: \"\"\n",
      "            }\n",
      "            label: \"\"\n",
      "            note: \"\"\n",
      "        }\n",
      "        hasTarget:\n",
      "        {\n",
      "            id: \"\"\n",
      "            type: AnnotationTarget\n",
      "        }\n",
      "        note: \"\"\n",
      "    }\n",
      "    brainLocation:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: BrainLocation\n",
      "        atlasSpatialReferenceSystem:\n",
      "        {\n",
      "            id: \"\"\n",
      "            type: AtlasSpatialReferenceSystem\n",
      "        }\n",
      "        brainRegion:\n",
      "        {\n",
      "            id: \"\"\n",
      "            label: \"\"\n",
      "        }\n",
      "        coordinatesInBrainAtlas:\n",
      "        {\n",
      "            id: \"\"\n",
      "            valueX: 0.0\n",
      "            valueY: 0.0\n",
      "            valueZ: 0.0\n",
      "        }\n",
      "        coordinatesInSlice:\n",
      "        {\n",
      "            spatialReferenceSystem:\n",
      "            {\n",
      "                id: \"\"\n",
      "                type: SpatialReferenceSystem\n",
      "            }\n",
      "            valueX: 0.0\n",
      "            valueY: 0.0\n",
      "            valueZ: 0.0\n",
      "        }\n",
      "        distanceToBoundary:\n",
      "        {\n",
      "            boundary:\n",
      "            {\n",
      "                id: \"\"\n",
      "                label: \"\"\n",
      "            }\n",
      "            distance:\n",
      "            {\n",
      "                unitCode: \"\"\n",
      "                value:\n",
      "                [\n",
      "                    0.0\n",
      "                    0\n",
      "                ]\n",
      "            }\n",
      "        }\n",
      "        layer:\n",
      "        {\n",
      "            id: \"\"\n",
      "            label: \"\"\n",
      "        }\n",
      "        longitudinalAxis:\n",
      "        [\n",
      "            Dorsal\n",
      "            Ventral\n",
      "        ]\n",
      "        positionInLayer:\n",
      "        [\n",
      "            Deep\n",
      "            Superficial\n",
      "        ]\n",
      "    }\n",
      "    contribution:\n",
      "    {\n",
      "        id: \"\"\n",
      "    }\n",
      "    distribution:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: DataDownload\n",
      "        contentSize:\n",
      "        {\n",
      "            unitCode: \"\"\n",
      "            value:\n",
      "            [\n",
      "                0.0\n",
      "                0\n",
      "            ]\n",
      "        }\n",
      "        digest:\n",
      "        {\n",
      "            algorithm: \"\"\n",
      "            value: \"\"\n",
      "        }\n",
      "        encodingFormat: \"\"\n",
      "        license: \"\"\n",
      "        name: \"\"\n",
      "    }\n",
      "    objectOfStudy:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: ObjectOfStudy\n",
      "    }\n",
      "    releaseDate: 9999-12-31T00:00:00\n",
      "    subject:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: Subject\n",
      "    }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "forge.template(\"Dataset\") # Templates help know which property to use when writing a query to serach for a given type"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prefix and namespace free SPARQL query\n",
    "\n",
    "When a forge RDFModel is configured, then there is no need to provide prefixes and namespaces when writing a SPARQL query. Prefixes and namespaces will be automatically inferred from the provided schemas and/or JSON-LD context and the query rewritten accordingly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "    SELECT ?id ?name ?contributor\n",
    "    WHERE {\n",
    "        ?id a Dataset ;\n",
    "        contribution/agent ?contributor.\n",
    "        ?contributor name ?name.\n",
    "    }\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.sparql(query, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    id: https://sandbox.bluebrainnexus.io/v1/resources/github-users/crisely09/_/40162355-af23-44a9-9c9a-36b6a4d35dcf\n",
      "    contributor: t3318\n",
      "    name: Jane Doe\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "print(resources[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>contributor</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>t3318</td>\n",
       "      <td>Jane Doe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>t3319</td>\n",
       "      <td>John Smith</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>t3325</td>\n",
       "      <td>Jane Doe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id contributor        name\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...       t3318    Jane Doe\n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...       t3319  John Smith\n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...       t3325    Jane Doe"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### display rewritten SPARQL query "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "   PREFIX nsg: <https://neuroshapes.org/>\n",
      "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "   PREFIX schema: <http://schema.org/>\n",
      "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "   PREFIX void: <http://rdfs.org/ns/void#>\n",
      "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "   PREFIX : <https://neuroshapes.org/>\n",
      "   \n",
      "       SELECT ?id ?name ?contributor\n",
      "       WHERE {\n",
      "           ?id a schema:Dataset ;\n",
      "           nsg:contribution/prov:agent ?contributor.\n",
      "           ?contributor schema:name ?name.\n",
      "       }\n",
      "     LIMIT 3\n",
      "\n"
     ]
    }
   ],
   "source": [
    "resources = forge.sparql(query, limit=3, debug=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Full SPARQL query\n",
    "\n",
    "Regular SPARQL query can also be provided. When provided, the limit and offset arguments superseed any in query limit or offset values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
    "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
    "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
    "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
    "   PREFIX nsg: <https://neuroshapes.org/>\n",
    "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
    "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
    "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
    "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
    "   PREFIX schema: <http://schema.org/>\n",
    "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
    "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
    "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
    "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
    "   PREFIX void: <http://rdfs.org/ns/void#>\n",
    "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
    "   PREFIX : <https://neuroshapes.org/>\n",
    "   SELECT ?id ?name\n",
    "   WHERE {\n",
    "       ?id a schema:Dataset ;\n",
    "       nsg:contribution/prov:agent ?contributor.\n",
    "       ?contributor schema:name ?name.\n",
    "   }\n",
    "   ORDER BY ?id\n",
    "   LIMIT 1\n",
    "   OFFSET 0\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   \n",
      "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "      PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "      PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "      PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "      PREFIX nsg: <https://neuroshapes.org/>\n",
      "      PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "      PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "      PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "      PREFIX schema: <http://schema.org/>\n",
      "      PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "      PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "      PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "      PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "      PREFIX void: <http://rdfs.org/ns/void#>\n",
      "      PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "      PREFIX : <https://neuroshapes.org/>\n",
      "      SELECT ?id ?name\n",
      "      WHERE {\n",
      "          ?id a schema:Dataset ;\n",
      "          nsg:contribution/prov:agent ?contributor.\n",
      "          ?contributor schema:name ?name.\n",
      "      }\n",
      "      ORDER BY ?id\n",
      "      LIMIT 3\n",
      "      OFFSET 1\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# it is recommended to set 'rewrite' to 'False' to prevent the sparql query rewriting when a syntactically correct SPARQL query is provided.\n",
    "resources = forge.sparql(query, rewrite=False, limit=3, offset=1, debug=True) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "kgforge.core.resource.Resource"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>John Smith</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>John Smith</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Jane Doe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id        name\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  John Smith\n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  John Smith\n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...    Jane Doe"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Avoid rewriting the query"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To not assign any limit or offset, one can pass `None` to those parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_without_limit = \"\"\"\n",
    "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
    "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
    "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
    "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
    "   PREFIX nsg: <https://neuroshapes.org/>\n",
    "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
    "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
    "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
    "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
    "   PREFIX schema: <http://schema.org/>\n",
    "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
    "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
    "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
    "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
    "   PREFIX void: <http://rdfs.org/ns/void#>\n",
    "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
    "   PREFIX : <https://neuroshapes.org/>\n",
    "   SELECT ?id ?name\n",
    "   WHERE {\n",
    "       ?id a schema:Dataset ;\n",
    "       nsg:contribution/prov:agent ?contributor.\n",
    "       ?contributor schema:name ?name.\n",
    "   }\n",
    "   ORDER BY ?id\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   \n",
      "      PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "      PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "      PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "      PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "      PREFIX nsg: <https://neuroshapes.org/>\n",
      "      PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "      PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "      PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "      PREFIX schema: <http://schema.org/>\n",
      "      PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "      PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "      PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "      PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "      PREFIX void: <http://rdfs.org/ns/void#>\n",
      "      PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "      PREFIX : <https://neuroshapes.org/>\n",
      "      SELECT ?id ?name\n",
      "      WHERE {\n",
      "          ?id a schema:Dataset ;\n",
      "          nsg:contribution/prov:agent ?contributor.\n",
      "          ?contributor schema:name ?name.\n",
      "      }\n",
      "      ORDER BY ?id\n",
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "resources = forge.sparql(query_without_limit, rewrite=False, limit=None, offset=None, debug=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you only want to add the context, but keep the content of the query the same, you need to set the `rewrite` parameter to `False`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_without_context = \"\"\"\n",
    "    SELECT ?id ?name ?contributor\n",
    "    WHERE {\n",
    "        ?id a Dataset ;\n",
    "        contribution/agent ?contributor.\n",
    "        ?contributor name ?name.\n",
    "    }\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "   PREFIX nsg: <https://neuroshapes.org/>\n",
      "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "   PREFIX schema: <http://schema.org/>\n",
      "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "   PREFIX void: <http://rdfs.org/ns/void#>\n",
      "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "   PREFIX : <https://neuroshapes.org/>\n",
      "   \n",
      "       SELECT ?id ?name ?contributor\n",
      "       WHERE {\n",
      "           ?id a schema:Dataset ;\n",
      "           nsg:contribution/prov:agent ?contributor.\n",
      "           ?contributor schema:name ?name.\n",
      "       }\n",
      "\n"
     ]
    }
   ],
   "source": [
    "resources = forge.sparql(query_without_context, limit=None, debug=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ElasticSearch DSL Query\n",
    "\n",
    "ElasticSearch DSL can be used as a query language search for resources provided that the configured store supports it. The 'BlueBrainNexusStore' supports ElasticSearch."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoStore doesn't implement ElasaticSearch DSL operations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")\n",
    "contribution_jane = Resource(type=\"Contribution\", agent=jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "john = Resource(type=\"Person\", name=\"John Smith\")\n",
    "contribution_john = Resource(type=\"Contribution\", agent=john)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "association = Resource(type=\"Dataset\", contribution=[contribution_jane, contribution_john])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(association)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Plain ElasticSearch DSL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "        {\n",
    "          \"_source\": {\n",
    "            \"includes\": [\n",
    "              \"@id\",\n",
    "              \"name\"\n",
    "            ]\n",
    "          },\n",
    "          \"query\": {\n",
    "            \"term\": {\n",
    "              \"@type\": \"http://schema.org/Dataset\"\n",
    "            }\n",
    "          }\n",
    "        }\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "# limit and offset (when provided in this method call) superseed 'size' and 'from' values provided in the query\n",
    "resources = forge.elastic(query, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "kgforge.core.resource.Resource"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Interesting associations</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Interesting people</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Interesting people</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id                      name\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Interesting associations\n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...        Interesting people\n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...        Interesting people"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Downloading"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoStore doesn't implement file operations yet. Please use another store for this section."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "associations.tsv\n",
      "my_data.xwz\n",
      "my_data_derived.txt\n",
      "persons-with-id.csv\n",
      "persons.csv\n",
      "tfidfvectorizer_model_schemaorg_linking\n"
     ]
    }
   ],
   "source": [
    "! ls -p ../../data | egrep -v /$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "distribution = forge.attach(\"../../data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "association = Resource(type=\"Association\", agent=jane, distribution=distribution)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(association)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "# By default, the downladable file urls are collected from the json path \"distribution.contentUrl\" (follow=\"distribution.contentUrl\") and\n",
    "# the files are downloaded in the current path (path=\".\"). \n",
    "# The argument overwrite: bool can be provided to decide whether to overwrite (True) existing files with the same name or\n",
    "# to create new ones (False) with their names suffixed with a timestamp.\n",
    "# A cross_bucket argument can be provided to download data from the configured bucket (cross_bucket=False - the default value) \n",
    "# or from a bucket different than the configured one (cross_bucket=True). The configured store should support crossing buckets for this to work.\n",
    "forge.download(association)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Specific content type can be downloaded.\n",
    "forge.download(association, content_type=\"text/tab-separated-values\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The urls or the files to download can be collected from a different json path (by setting a value for \"follow\") and \n",
    "# the files downloaded to a different path (by setting a value for \"path\")\n",
    "forge.download(association, follow=\"distribution.contentUrl\", path=\"./downloaded/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "total 896\n",
      "-rw-r--r--  1 cgonzale  10067     477 May 22 10:24 associations.tsv\n",
      "-rw-r--r--  1 cgonzale  10067     477 May 22 10:58 associations.tsv.20230522105817\n",
      "-rw-r--r--  1 cgonzale  10067      16 May 22 10:24 my_data.xwz\n",
      "-rw-r--r--  1 cgonzale  10067      16 May 22 10:58 my_data.xwz.20230522105817\n",
      "-rw-r--r--  1 cgonzale  10067      24 May 22 10:24 my_data_derived.txt\n",
      "-rw-r--r--  1 cgonzale  10067      24 May 22 10:58 my_data_derived.txt.20230522105817\n",
      "-rw-r--r--  1 cgonzale  10067     126 May 22 10:24 persons-with-id.csv\n",
      "-rw-r--r--  1 cgonzale  10067     126 May 22 10:58 persons-with-id.csv.20230522105817\n",
      "-rw-r--r--  1 cgonzale  10067      52 May 22 10:24 persons.csv\n",
      "-rw-r--r--  1 cgonzale  10067      52 May 22 10:58 persons.csv.20230522105817\n",
      "-rw-r--r--  1 cgonzale  10067  204848 May 22 10:24 tfidfvectorizer_model_schemaorg_linking\n",
      "-rw-r--r--  1 cgonzale  10067  204848 May 22 10:58 tfidfvectorizer_model_schemaorg_linking.20230522105817\n"
     ]
    }
   ],
   "source": [
    "! ls -l ./downloaded/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "#! rm -R ./downloaded/"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "kgforge",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  },
  "vscode": {
   "interpreter": {
    "hash": "9ac393a5ddd595f2c78ea58b15bf8d269850a4413729cbea5c5fae9013762763"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}