{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:19.036357Z",
     "start_time": "2019-09-23T18:50:19.031896Z"
    }
   },
   "source": [
    "# Querying\n",
    "\n",
    "This notebook demonstrates Nexus Forge data [querying features](https://nexus-forge.readthedocs.io/en/latest/interaction.html#querying)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:20.068658Z",
     "start_time": "2019-09-23T18:50:19.054054Z"
    }
   },
   "outputs": [],
   "source": [
    "from kgforge.core import KnowledgeGraphForge"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook [00-Initialization.ipynb](00%20-%20Initialization.ipynb)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "forge = KnowledgeGraphForge(\"../../configurations/forge.yml\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kgforge.core import Resource\n",
    "from kgforge.specializations.resources import Dataset\n",
    "from kgforge.core.wrappings.paths import Filter, FilterOperator"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Retrieval"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### latest version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\", award=[\"Nobel\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "resource = forge.retrieve(jane.id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource == jane"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### specific version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\", award=[\"Nobel\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _tag_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.tag(jane, \"v1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane.email = [\"jane.doe@epfl.ch\", \"jane.doe@example.org\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _update_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.update(jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.317601Z",
     "start_time": "2019-09-23T18:50:21.310418Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    # DemoStore\n",
    "    print(jane._store_metadata.version)\n",
    "except:\n",
    "    # BlueBrainNexus\n",
    "    print(jane._store_metadata._rev)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.332678Z",
     "start_time": "2019-09-23T18:50:21.322025Z"
    }
   },
   "outputs": [],
   "source": [
    "jane_v1 = forge.retrieve(jane.id, version=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.370051Z",
     "start_time": "2019-09-23T18:50:21.363782Z"
    }
   },
   "outputs": [],
   "source": [
    "jane_v1_tag = forge.retrieve(jane.id, version=\"v1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane_v1_rev = forge.retrieve(jane.id+\"?rev=1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-09-23T18:50:21.379911Z",
     "start_time": "2019-09-23T18:50:21.373539Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jane_v1 == jane_v1_tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jane_v1 == jane_v1_rev"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jane_v1 != jane"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    # DemoStore\n",
    "    print(jane_v1._store_metadata.version)\n",
    "except:\n",
    "    # BlueBrainNexus\n",
    "    print(jane_v1._store_metadata._rev)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### crossbucket retrieval\n",
    "It is possible to retrieve resources stored in buckets different then the configured one. The configured store should of course support it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "resource = forge.retrieve(jane.id, cross_bucket=True) # cross_bucket defaults to False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/91246fb9-3193-4c56-b9b4-8e88d828273c',\n",
       " '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json',\n",
       " '_createdAt': '2024-02-15T13:28:29.759Z',\n",
       " '_createdBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah',\n",
       " '_deprecated': False,\n",
       " '_incoming': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/incoming',\n",
       " '_outgoing': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/outgoing',\n",
       " '_project': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah',\n",
       " '_rev': 3,\n",
       " '_schemaProject': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah',\n",
       " '_self': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c',\n",
       " '_updatedAt': '2024-02-15T13:28:29.960Z',\n",
       " '_updatedBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah'}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._store_metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Action(error=None, message=None, operation='retrieve', succeeded=True)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._last_action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._synchronized"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Retrieving using the resource url\n",
    "One can also use the value of `_self` from ._stote_metadata to retrieve a resource"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import copy\n",
    "import string\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "other_resource = copy.deepcopy(resource)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "other_resource.id = f\"https://my-incredible-id-{''.join(random.choices(string.digits, k=5))}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(other_resource)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = other_resource._store_metadata['_self']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "same_resource_url = forge.retrieve(id=url)\n",
    "same_resource_id = forge.retrieve(id=other_resource.id)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Confirm they are the same"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "same_resource_id == same_resource_url"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Original source retrieval\n",
    "When using BlueBrainNexusStore, it is possible to retrieve resources' payload as they were registered (retrieve_source=True) without any changes related to store added metadata or JSONLD framing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "resource = forge.retrieve(jane.id, retrieve_source=False) # retrieve_source defaults to True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/91246fb9-3193-4c56-b9b4-8e88d828273c',\n",
       " 'type': 'Person',\n",
       " 'award': 'Nobel',\n",
       " 'email': ['jane.doe@epfl.ch', 'jane.doe@example.org'],\n",
       " 'name': 'Jane Doe'}"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_json(resource)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/91246fb9-3193-4c56-b9b4-8e88d828273c',\n",
       " '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json',\n",
       " '_createdAt': '2024-02-15T13:28:29.759Z',\n",
       " '_createdBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah',\n",
       " '_deprecated': False,\n",
       " '_incoming': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/incoming',\n",
       " '_outgoing': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/outgoing',\n",
       " '_project': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah',\n",
       " '_rev': 3,\n",
       " '_schemaProject': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah',\n",
       " '_self': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c',\n",
       " '_updatedAt': '2024-02-15T13:28:29.960Z',\n",
       " '_updatedBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah'}"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._store_metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "resource._last_action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource._synchronized"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### error handling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> catch_http_error\n",
      "<error> RetrievalError: 404 Client Error: Not Found for url: https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/%3A%2F%2F123\n",
      "\n"
     ]
    }
   ],
   "source": [
    "resource = forge.retrieve(\"123\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resource is None"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Searching"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoModel and RdfModel schemas have not been synchronized yet. This section is to be run with RdfModel. Commented lines are for DemoModel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")\n",
    "contribution_jane = Resource(type=\"Contribution\", agent=jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "john = Resource(type=\"Person\", name=\"John Smith\")\n",
    "contribution_john = Resource(type=\"Contribution\", agent=john)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = Dataset(forge, type=\"Dataset\", contribution=[contribution_jane, contribution_john])\n",
    "dataset.add_distribution(\"../../data/associations.tsv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/e593f0b8-09f4-4438-b7a0-4024e06b47b6',\n",
       " 'type': 'Dataset',\n",
       " 'contribution': [{'type': 'Contribution',\n",
       "   'agent': {'type': 'Person', 'name': 'Jane Doe'}},\n",
       "  {'type': 'Contribution', 'agent': {'type': 'Person', 'name': 'John Smith'}}],\n",
       " 'distribution': {'type': 'DataDownload',\n",
       "  'atLocation': {'type': 'Location',\n",
       "   'store': {'id': 'https://bluebrain.github.io/nexus/vocabulary/diskStorageDefault',\n",
       "    'type': 'DiskStorage',\n",
       "    '_rev': 1}},\n",
       "  'contentSize': {'unitCode': 'bytes', 'value': 477},\n",
       "  'contentUrl': 'https://sandbox.bluebrainnexus.io/v1/files/github-users/ssssarah/https%3A%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F9863c9c1-6005-4634-9989-199ab4ed8bf1',\n",
       "  'digest': {'algorithm': 'SHA-256',\n",
       "   'value': '789aa07948683fe036ac29811814a826b703b562f7d168eb70dee1fabde26859'},\n",
       "  'encodingFormat': 'text/tab-separated-values',\n",
       "  'name': 'associations.tsv'}}"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_json(dataset)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using resource paths as filters"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The `paths` method load the template or property paths (ie. expected properties) for a given type.\n",
    "\n",
    "Please refer to the [Modeling.ipynb](11%20-%20Modeling.ipynb) notebook to learn about templates and types."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "p = forge.paths(\"Dataset\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Autocompletion is enabled on `p` and this can be used to create search filters."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: There is a known issue for RdfModel which requires using `p.type.id` instead of `p.type`."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "All [python comparison operators](https://www.w3schools.com/python/gloss_python_comparison_operators.asp) are supported."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.search(p.type.id==\"Person\", limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>John Smith</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>52.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>1dacd765946963fda4949753659089c5f532714b418d30...</td>\n",
       "      <td>text/csv</td>\n",
       "      <td>persons.csv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id    type        name  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Person  John Smith   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Person    Jane Doe   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Person    Jane Doe   \n",
       "\n",
       "  distribution.type distribution.atLocation.type  \\\n",
       "0               NaN                          NaN   \n",
       "1               NaN                          NaN   \n",
       "2      DataDownload                     Location   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                                NaN                                 NaN   \n",
       "1                                NaN                                 NaN   \n",
       "2                        DiskStorage                                 1.0   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                               NaN                             NaN   \n",
       "1                               NaN                             NaN   \n",
       "2                             bytes                            52.0   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                           NaN   \n",
       "1                           NaN   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  1dacd765946963fda4949753659089c5f532714b418d30...   \n",
       "\n",
       "  distribution.encodingFormat distribution.name  \n",
       "0                         NaN               NaN  \n",
       "1                         NaN               NaN  \n",
       "2                    text/csv       persons.csv  "
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>_constrainedBy</th>\n",
       "      <th>_createdAt</th>\n",
       "      <th>_createdBy</th>\n",
       "      <th>_deprecated</th>\n",
       "      <th>_incoming</th>\n",
       "      <th>_outgoing</th>\n",
       "      <th>_project</th>\n",
       "      <th>...</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>John Smith</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2024-02-13T13:04:27.840Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2024-02-13T13:04:27.842Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2024-02-13T13:04:28.270Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>52.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>1dacd765946963fda4949753659089c5f532714b418d30...</td>\n",
       "      <td>text/csv</td>\n",
       "      <td>persons.csv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id    type        name  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Person  John Smith   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Person    Jane Doe   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Person    Jane Doe   \n",
       "\n",
       "                                      _constrainedBy  \\\n",
       "0  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "1  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "2  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "\n",
       "                 _createdAt  \\\n",
       "0  2024-02-13T13:04:27.840Z   \n",
       "1  2024-02-13T13:04:27.842Z   \n",
       "2  2024-02-13T13:04:28.270Z   \n",
       "\n",
       "                                          _createdBy  _deprecated  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "\n",
       "                                           _incoming  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                           _outgoing  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                            _project  ...  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type distribution.atLocation.store._rev  \\\n",
       "0                                NaN                                NaN   \n",
       "1                                NaN                                NaN   \n",
       "2                        DiskStorage                                1.0   \n",
       "\n",
       "  distribution.contentSize.unitCode distribution.contentSize.value  \\\n",
       "0                               NaN                            NaN   \n",
       "1                               NaN                            NaN   \n",
       "2                             bytes                           52.0   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                           NaN   \n",
       "1                           NaN   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  1dacd765946963fda4949753659089c5f532714b418d30...   \n",
       "\n",
       "  distribution.encodingFormat  distribution.name  \n",
       "0                         NaN                NaN  \n",
       "1                         NaN                NaN  \n",
       "2                    text/csv        persons.csv  \n",
       "\n",
       "[3 rows x 27 columns]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search results are not synchronized\n",
    "resources[0]._synchronized"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Using nested resource property"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Property autocompletion is available on a path `p` even for nested properties like `p.contribution`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat\n",
    "resources = forge.search(p.type.id == \"Dataset\", p.distribution.encodingFormat == \"text/tab-separated-values\", limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>contribution</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>789aa07948683fe036ac29811814a826b703b562f7d168...</td>\n",
       "      <td>text/tab-separated-values</td>\n",
       "      <td>associations.tsv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id     type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "\n",
       "                                        contribution distribution.type  \\\n",
       "0  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "1  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "2  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                     Location   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                   1   \n",
       "1                        DiskStorage                                   1   \n",
       "2                        DiskStorage                                   1   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                             bytes                             477   \n",
       "1                             bytes                             477   \n",
       "2                             bytes                             477   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                       SHA-256   \n",
       "1                       SHA-256   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "1  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "2  789aa07948683fe036ac29811814a826b703b562f7d168...   \n",
       "\n",
       "  distribution.encodingFormat distribution.name  \n",
       "0   text/tab-separated-values  associations.tsv  \n",
       "1   text/tab-separated-values  associations.tsv  \n",
       "2   text/tab-separated-values  associations.tsv  "
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using dictionaries as filters\n",
    "A dictionary can be provided for filters:\n",
    "* {'type': {'id':'Dataset'}} is equivalent to p.type.id==\"Dataset\"\n",
    "* only the '==' operator is supported\n",
    "* nested dict are supported\n",
    "* it is not mandatory for the provided properties and values to be defined in the forge model. Results will be retrieved if there are corresponding data in the store.\n",
    "\n",
    "This feature is not supported when using the DemoStore\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat\n",
    "# and created a given dateTime (by default, dateTime values should be signaled by the suffix \"^^xsd:dateTime\")\n",
    "filters = {\n",
    "           \"type\": \"Dataset\", \n",
    "           \"distribution\":{\"encodingFormat\":\"text/tab-separated-values\"},\n",
    "           \"_createdAt\":dataset._store_metadata._createdAt+\"^^xsd:dateTime\"\n",
    "          }\n",
    "resources = forge.search(filters, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: []"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using built-in Filter objects"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Supported filter operators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['__eq__ (EQUAL)',\n",
       " '__ne__ (NOT_EQUAL)',\n",
       " '__lt__ (LOWER_THAN)',\n",
       " '__le__ (LOWER_OR_Equal_Than)',\n",
       " '__gt__ (GREATER_Than)',\n",
       " '__ge__ (GREATER_OR_Equal_Than)']"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[f\"{op.value} ({op.name})\" for op in FilterOperator] # These are equivalent to the Python comparison operators"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat\n",
    "# and created a given dateTime (dateTime values should be signaled by the suffix \"^^xsd:dateTime\")\n",
    "filter_1 = Filter(operator=FilterOperator.EQUAL, path=[\"type\"], value=\"Dataset\")\n",
    "filter_2 = Filter(operator=FilterOperator.EQUAL, path=[\"distribution\",\"encodingFormat\"], value=\"text/tab-separated-values\")\n",
    "filter_3 = Filter(operator=FilterOperator.LOWER_OR_Equal_Than, path=[\"_createdAt\"], value=dataset._store_metadata._createdAt+\"^^xsd:dateTime\")\n",
    "\n",
    "resources = forge.search(filter_1, filter_2, filter_3, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>contribution</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>...</th>\n",
       "      <th>_createdBy</th>\n",
       "      <th>_deprecated</th>\n",
       "      <th>_incoming</th>\n",
       "      <th>_outgoing</th>\n",
       "      <th>_project</th>\n",
       "      <th>_rev</th>\n",
       "      <th>_schemaProject</th>\n",
       "      <th>_self</th>\n",
       "      <th>_updatedAt</th>\n",
       "      <th>_updatedBy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>1</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>2024-02-02T09:47:31.662Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>1</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>2024-02-02T09:59:31.740Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Dataset</td>\n",
       "      <td>[{'type': 'Contribution', 'agent': {'type': 'P...</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1</td>\n",
       "      <td>bytes</td>\n",
       "      <td>477</td>\n",
       "      <td>...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>1</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>2024-02-02T15:21:32.898Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id     type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Dataset   \n",
       "\n",
       "                                        contribution distribution.type  \\\n",
       "0  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "1  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "2  [{'type': 'Contribution', 'agent': {'type': 'P...      DataDownload   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                     Location   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                   1   \n",
       "1                        DiskStorage                                   1   \n",
       "2                        DiskStorage                                   1   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  ...  \\\n",
       "0                             bytes                             477  ...   \n",
       "1                             bytes                             477  ...   \n",
       "2                             bytes                             477  ...   \n",
       "\n",
       "                                          _createdBy _deprecated  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/realms/gi...       False   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/realms/gi...       False   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/realms/gi...       False   \n",
       "\n",
       "                                           _incoming  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                           _outgoing  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                            _project _rev  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...    1   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...    1   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...    1   \n",
       "\n",
       "                                      _schemaProject  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...   \n",
       "\n",
       "                                               _self  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                 _updatedAt                                         _updatedBy  \n",
       "0  2024-02-02T09:47:31.662Z  https://sandbox.bluebrainnexus.io/v1/realms/gi...  \n",
       "1  2024-02-02T09:59:31.740Z  https://sandbox.bluebrainnexus.io/v1/realms/gi...  \n",
       "2  2024-02-02T15:21:32.898Z  https://sandbox.bluebrainnexus.io/v1/realms/gi...  \n",
       "\n",
       "[3 rows x 27 columns]"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using search endpoints\n",
    "\n",
    "Two types of search endpoints are supported: 'sparql' (default) for graph queries and 'elastic' for document oriented queries. The types of available search endpoint can be configured (see [00-Initialization.ipynb](00%20-%20Initialization.ipynb) for an example of search endpoints config) or set when creating a KnowledgeGraphForge session using the 'searchendpoints' arguments.\n",
    "\n",
    "The search endpoint to hit when calling forge.search(...) is 'sparql' by default but can be specified using the 'search_endpoint' argument.\n",
    "\n",
    "The data that is available through these searchendpoints is limited to data indexed in specific indices that are configured through views. The project is set-up with default a elastic search view that indexes all data, and a default sparql view that indexes all data.\n",
    "You may define a view that targets a subset of the data based on some filters."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### SPARQL Search Endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Person\n",
    "filters = {\"type\": \"Person\"}\n",
    "resources = forge.search(filters, limit=3, search_endpoint='sparql')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>_constrainedBy</th>\n",
       "      <th>_createdAt</th>\n",
       "      <th>_createdBy</th>\n",
       "      <th>_deprecated</th>\n",
       "      <th>_incoming</th>\n",
       "      <th>_outgoing</th>\n",
       "      <th>_project</th>\n",
       "      <th>...</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>John Smith</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2024-02-13T13:04:27.840Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2024-02-13T13:04:27.842Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Person</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>https://bluebrain.github.io/nexus/schemas/unco...</td>\n",
       "      <td>2024-02-13T13:04:28.270Z</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/realms/gi...</td>\n",
       "      <td>False</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/projects/...</td>\n",
       "      <td>...</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>52.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>1dacd765946963fda4949753659089c5f532714b418d30...</td>\n",
       "      <td>text/csv</td>\n",
       "      <td>persons.csv</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id    type        name  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Person  John Smith   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Person    Jane Doe   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Person    Jane Doe   \n",
       "\n",
       "                                      _constrainedBy  \\\n",
       "0  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "1  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "2  https://bluebrain.github.io/nexus/schemas/unco...   \n",
       "\n",
       "                 _createdAt  \\\n",
       "0  2024-02-13T13:04:27.840Z   \n",
       "1  2024-02-13T13:04:27.842Z   \n",
       "2  2024-02-13T13:04:28.270Z   \n",
       "\n",
       "                                          _createdBy  _deprecated  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/realms/gi...        False   \n",
       "\n",
       "                                           _incoming  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                           _outgoing  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...   \n",
       "\n",
       "                                            _project  ...  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/projects/...  ...   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "\n",
       "  distribution.atLocation.store.type distribution.atLocation.store._rev  \\\n",
       "0                                NaN                                NaN   \n",
       "1                                NaN                                NaN   \n",
       "2                        DiskStorage                                1.0   \n",
       "\n",
       "  distribution.contentSize.unitCode distribution.contentSize.value  \\\n",
       "0                               NaN                            NaN   \n",
       "1                               NaN                            NaN   \n",
       "2                             bytes                           52.0   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                           NaN   \n",
       "1                           NaN   \n",
       "2                       SHA-256   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2  1dacd765946963fda4949753659089c5f532714b418d30...   \n",
       "\n",
       "  distribution.encodingFormat  distribution.name  \n",
       "0                         NaN                NaN  \n",
       "1                         NaN                NaN  \n",
       "2                    text/csv        persons.csv  \n",
       "\n",
       "[3 rows x 27 columns]"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ElasticSearch Endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Search for resources of type Person and retrieve their ids and names.\n",
    "filters = {\"@type\": \"http://schema.org/Person\"}\n",
    "resources = forge.search(\n",
    "    filters, limit=3, search_endpoint='elastic', includes=[\"@id\", \"@type\"]\n",
    ") # fields can also be excluded with 'excludes'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>http://schema.org/Person</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>http://schema.org/Person</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>http://schema.org/Person</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id                      type\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  http://schema.org/Person\n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  http://schema.org/Person\n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  http://schema.org/Person"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources, store_metadata=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Search results are not synchronized\n",
    "resources[0]._synchronized"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/e01663b6-604b-42db-b958-da9fbc1baca2'"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resources[0].id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'http://schema.org/Person'"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resources[0].type"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### ElasticSearch Endpoint - Alternative view"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- Create a view that only indexes resources of type http://schema.org/Person"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests \n",
    "\n",
    "payload = {\n",
    "    \"@type\": [\"View\", \"ElasticSearchView\"],\n",
    "    \"pipeline\": [{\n",
    "                \"name\": \"filterByType\",\n",
    "                \"config\": {\"types\": [\"http://schema.org/Person\"]}\n",
    "            }],\n",
    "    \"mapping\": {\n",
    "        \"dynamic\": True,\n",
    "        \"properties\": {\n",
    "          \"@id\": {\n",
    "            \"type\": \"keyword\"\n",
    "          },\n",
    "          \"@type\": {\n",
    "            \"type\": \"keyword\"\n",
    "          }\n",
    "        }\n",
    "    }\n",
    "}\n",
    "\n",
    "url = f\"{forge._store.endpoint}/views/{forge._store.bucket}\"\n",
    "\n",
    "headers = {\n",
    "    \"mode\": \"cors\",\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Accept\": \"application/ld+json, application/json\",\n",
    "    \"Authorization\": \"Bearer \" + forge._store.token\n",
    "}\n",
    "response = requests.post(url=url, headers=headers, json=payload)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/69311bb0-7509-4e39-933d-6ea1865ed9ed'"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "view_id = response.json()[\"@id\"]\n",
    "view_id"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- Query the view (may take a few seconds to get a response due to indexing time)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query: {'query': {'bool': {'filter': [{'term': {'@type': 'Dataset'}}], 'must': [{'match': {'_deprecated': False}}], 'must_not': []}}, 'size': 100}\n",
      "Submitted query: {'query': {'bool': {'filter': [{'term': {'@type': 'Person'}}], 'must': [{'match': {'_deprecated': False}}], 'must_not': []}}, 'size': 100}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(0, 0)"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result_1 = forge.search({\"@type\": \"Dataset\"}, search_endpoint='elastic', view=view_id, cross_bucket=True, debug=True)\n",
    "\n",
    "result_2 =  forge.search({\"@type\": \"Person\"}, search_endpoint='elastic', view=view_id, cross_bucket=True, debug=True)\n",
    "\n",
    "len(result_1), len(result_2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Providing a view is feature that is also available through the forge.elastic, and forge.sparql calls"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Crossbucket search\n",
    "It is possible to search for resources stored in buckets different than the configured one. The configured store should of course support it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.search(p.type.id == \"Association\", limit=3, cross_bucket=True)  # cross_bucket defaults to False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>agent.type</th>\n",
       "      <th>agent.gender.id</th>\n",
       "      <th>agent.gender.type</th>\n",
       "      <th>agent.gender.label</th>\n",
       "      <th>agent.name</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000383</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>female</td>\n",
       "      <td>Marie Curie</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>46.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>marie_curie.txt</td>\n",
       "      <td>Curie Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000384</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>male</td>\n",
       "      <td>Albert Einstein</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>50.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>albert_einstein.txt</td>\n",
       "      <td>Einstein Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id         type agent.type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "\n",
       "                               agent.gender.id      agent.gender.type  \\\n",
       "0  http://purl.obolibrary.org/obo/PATO_0000383  LabeledOntologyEntity   \n",
       "1  http://purl.obolibrary.org/obo/PATO_0000384  LabeledOntologyEntity   \n",
       "2                                          NaN                    NaN   \n",
       "\n",
       "  agent.gender.label       agent.name distribution.type  \\\n",
       "0             female      Marie Curie      DataDownload   \n",
       "1               male  Albert Einstein      DataDownload   \n",
       "2                NaN         Jane Doe               NaN   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                          NaN   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                 1.0   \n",
       "1                        DiskStorage                                 1.0   \n",
       "2                                NaN                                 NaN   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                             bytes                            46.0   \n",
       "1                             bytes                            50.0   \n",
       "2                               NaN                             NaN   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                       SHA-256   \n",
       "1                       SHA-256   \n",
       "2                           NaN   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0  e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...   \n",
       "1  91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.encodingFormat    distribution.name                  name  \n",
       "0                  text/plain      marie_curie.txt     Curie Association  \n",
       "1                  text/plain  albert_einstein.txt  Einstein Association  \n",
       "2                         NaN                  NaN                   NaN  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Furthermore it is possible to filter by bucket when cross_bucket is set to True. Setting a bucket value when cross_bucket is False will trigger a not_supported exception.\n",
    "resources = forge.search(p.type.id == \"Person\", limit=3, cross_bucket=True, bucket=\"dke/kgforge\")  # add a bucket"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: []\n",
       "Index: []"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Searching original source\n",
    "When using BlueBrainNexusStore, it is possible to retrieve resources' payload as they were registered (retrieve_source=True) without any changes related to store added metadata or JSONLD framing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.search(p.type.id == \"Association\", limit=3, retrieve_source=False)  # retrieve_source defaults to True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>type</th>\n",
       "      <th>agent.type</th>\n",
       "      <th>agent.gender.id</th>\n",
       "      <th>agent.gender.type</th>\n",
       "      <th>agent.gender.label</th>\n",
       "      <th>agent.name</th>\n",
       "      <th>distribution.type</th>\n",
       "      <th>distribution.atLocation.type</th>\n",
       "      <th>distribution.atLocation.store.id</th>\n",
       "      <th>distribution.atLocation.store.type</th>\n",
       "      <th>distribution.atLocation.store._rev</th>\n",
       "      <th>distribution.contentSize.unitCode</th>\n",
       "      <th>distribution.contentSize.value</th>\n",
       "      <th>distribution.contentUrl</th>\n",
       "      <th>distribution.digest.algorithm</th>\n",
       "      <th>distribution.digest.value</th>\n",
       "      <th>distribution.encodingFormat</th>\n",
       "      <th>distribution.name</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000383</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>female</td>\n",
       "      <td>Marie Curie</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>46.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>marie_curie.txt</td>\n",
       "      <td>Curie Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>http://purl.obolibrary.org/obo/PATO_0000384</td>\n",
       "      <td>LabeledOntologyEntity</td>\n",
       "      <td>male</td>\n",
       "      <td>Albert Einstein</td>\n",
       "      <td>DataDownload</td>\n",
       "      <td>Location</td>\n",
       "      <td>https://bluebrain.github.io/nexus/vocabulary/d...</td>\n",
       "      <td>DiskStorage</td>\n",
       "      <td>1.0</td>\n",
       "      <td>bytes</td>\n",
       "      <td>50.0</td>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/files/git...</td>\n",
       "      <td>SHA-256</td>\n",
       "      <td>91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...</td>\n",
       "      <td>text/plain</td>\n",
       "      <td>albert_einstein.txt</td>\n",
       "      <td>Einstein Association</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Association</td>\n",
       "      <td>Person</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Jane Doe</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id         type agent.type  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...  Association     Person   \n",
       "\n",
       "                               agent.gender.id      agent.gender.type  \\\n",
       "0  http://purl.obolibrary.org/obo/PATO_0000383  LabeledOntologyEntity   \n",
       "1  http://purl.obolibrary.org/obo/PATO_0000384  LabeledOntologyEntity   \n",
       "2                                          NaN                    NaN   \n",
       "\n",
       "  agent.gender.label       agent.name distribution.type  \\\n",
       "0             female      Marie Curie      DataDownload   \n",
       "1               male  Albert Einstein      DataDownload   \n",
       "2                NaN         Jane Doe               NaN   \n",
       "\n",
       "  distribution.atLocation.type  \\\n",
       "0                     Location   \n",
       "1                     Location   \n",
       "2                          NaN   \n",
       "\n",
       "                    distribution.atLocation.store.id  \\\n",
       "0  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "1  https://bluebrain.github.io/nexus/vocabulary/d...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.atLocation.store.type  distribution.atLocation.store._rev  \\\n",
       "0                        DiskStorage                                 1.0   \n",
       "1                        DiskStorage                                 1.0   \n",
       "2                                NaN                                 NaN   \n",
       "\n",
       "  distribution.contentSize.unitCode  distribution.contentSize.value  \\\n",
       "0                             bytes                            46.0   \n",
       "1                             bytes                            50.0   \n",
       "2                               NaN                             NaN   \n",
       "\n",
       "                             distribution.contentUrl  \\\n",
       "0  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "1  https://sandbox.bluebrainnexus.io/v1/files/git...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.digest.algorithm  \\\n",
       "0                       SHA-256   \n",
       "1                       SHA-256   \n",
       "2                           NaN   \n",
       "\n",
       "                           distribution.digest.value  \\\n",
       "0  e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3...   \n",
       "1  91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164...   \n",
       "2                                                NaN   \n",
       "\n",
       "  distribution.encodingFormat    distribution.name                  name  \n",
       "0                  text/plain      marie_curie.txt     Curie Association  \n",
       "1                  text/plain  albert_einstein.txt  Einstein Association  \n",
       "2                         NaN                  NaN                   NaN  "
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Graph traversing\n",
    "\n",
    "SPARQL is used as a query language to perform graph traversing.\n",
    "\n",
    "Nexus Forge implements a SPARQL query rewriting strategy leveraging a configured RDFModel that lets users write SPARQL queries without adding prefix declarations, prefix names or long IRIs. With this strategy, only type and property names can be provided.\n",
    "\n",
    "Please refer to the [Modeling.ipynb](11%20-%20Modeling.ipynb) notebook to learn about templates."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoStore doesn't implement SPARQL operations yet. Please use another store for this section."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoModel and RdfModel schemas have not been synchronized yet. This section is to be run with RdfModel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")\n",
    "contribution_jane = Resource(type=\"Contribution\", agent=jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "john = Resource(type=\"Person\", name=\"John Smith\")\n",
    "contribution_john = Resource(type=\"Contribution\", agent=john)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "association = Resource(type=\"Dataset\", contribution=[contribution_jane, contribution_john])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(association)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    id: \"\"\n",
      "    type:\n",
      "    {\n",
      "        id: \"\"\n",
      "    }\n",
      "    annotation:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: Annotation\n",
      "        hasBody:\n",
      "        {\n",
      "            id: \"\"\n",
      "            type:\n",
      "            {\n",
      "                id: \"\"\n",
      "            }\n",
      "            label: \"\"\n",
      "            note: \"\"\n",
      "        }\n",
      "        hasTarget:\n",
      "        {\n",
      "            id: \"\"\n",
      "            type: AnnotationTarget\n",
      "        }\n",
      "        note: \"\"\n",
      "    }\n",
      "    brainLocation:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: BrainLocation\n",
      "        atlasSpatialReferenceSystem:\n",
      "        {\n",
      "            id: \"\"\n",
      "            type: AtlasSpatialReferenceSystem\n",
      "        }\n",
      "        brainRegion:\n",
      "        {\n",
      "            id: \"\"\n",
      "            label: \"\"\n",
      "        }\n",
      "        coordinatesInBrainAtlas:\n",
      "        {\n",
      "            id: \"\"\n",
      "            valueX: 0.0\n",
      "            valueY: 0.0\n",
      "            valueZ: 0.0\n",
      "        }\n",
      "        coordinatesInSlice:\n",
      "        {\n",
      "            spatialReferenceSystem:\n",
      "            {\n",
      "                id: \"\"\n",
      "                type: SpatialReferenceSystem\n",
      "            }\n",
      "            valueX: 0.0\n",
      "            valueY: 0.0\n",
      "            valueZ: 0.0\n",
      "        }\n",
      "        distanceToBoundary:\n",
      "        {\n",
      "            boundary:\n",
      "            {\n",
      "                id: \"\"\n",
      "                label: \"\"\n",
      "            }\n",
      "            distance:\n",
      "            {\n",
      "                unitCode: \"\"\n",
      "                value:\n",
      "                [\n",
      "                    0.0\n",
      "                    0\n",
      "                ]\n",
      "            }\n",
      "        }\n",
      "        layer:\n",
      "        {\n",
      "            id: \"\"\n",
      "            label: \"\"\n",
      "        }\n",
      "        longitudinalAxis:\n",
      "        [\n",
      "            Dorsal\n",
      "            Ventral\n",
      "        ]\n",
      "        positionInLayer:\n",
      "        [\n",
      "            Deep\n",
      "            Superficial\n",
      "        ]\n",
      "    }\n",
      "    contribution:\n",
      "    {\n",
      "        id: \"\"\n",
      "    }\n",
      "    distribution:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: DataDownload\n",
      "        contentSize:\n",
      "        {\n",
      "            unitCode: \"\"\n",
      "            value:\n",
      "            [\n",
      "                0.0\n",
      "                0\n",
      "            ]\n",
      "        }\n",
      "        digest:\n",
      "        {\n",
      "            algorithm: \"\"\n",
      "            value: \"\"\n",
      "        }\n",
      "        encodingFormat: \"\"\n",
      "        license: \"\"\n",
      "        name: \"\"\n",
      "    }\n",
      "    objectOfStudy:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: ObjectOfStudy\n",
      "    }\n",
      "    releaseDate: 9999-12-31T00:00:00\n",
      "    subject:\n",
      "    {\n",
      "        id: \"\"\n",
      "        type: Subject\n",
      "    }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "forge.template(\"Dataset\") # Templates help know which property to use when writing a query to serach for a given type"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prefix and namespace free SPARQL query\n",
    "\n",
    "When a forge RDFModel is configured, then there is no need to provide prefixes and namespaces when writing a SPARQL query. Prefixes and namespaces will be automatically inferred from the provided schemas and/or JSON-LD context and the query rewritten accordingly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "    SELECT ?id ?name ?contributor\n",
    "    WHERE {\n",
    "        ?id a Dataset ;\n",
    "        contribution/agent ?contributor.\n",
    "        ?contributor name ?name.\n",
    "    }\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources = forge.sparql(query, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "    id: https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/ca722ef8-2295-4cf7-a35b-3c1cf3037232\n",
      "    contributor: t3532\n",
      "    name: John Smith\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "print(resources[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>contributor</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>t3532</td>\n",
       "      <td>John Smith</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>t3533</td>\n",
       "      <td>Jane Doe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>t3549</td>\n",
       "      <td>John Smith</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id contributor        name\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...       t3532  John Smith\n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...       t3533    Jane Doe\n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...       t3549  John Smith"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### display rewritten SPARQL query "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "   PREFIX nsg: <https://neuroshapes.org/>\n",
      "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "   PREFIX schema: <http://schema.org/>\n",
      "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "   PREFIX void: <http://rdfs.org/ns/void#>\n",
      "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "   PREFIX : <https://neuroshapes.org/>\n",
      "   \n",
      "       SELECT ?id ?name ?contributor\n",
      "       WHERE {\n",
      "           ?id a schema:Dataset ;\n",
      "           nsg:contribution/prov:agent ?contributor.\n",
      "           ?contributor schema:name ?name.\n",
      "       }\n",
      "     LIMIT 3\n"
     ]
    }
   ],
   "source": [
    "resources = forge.sparql(query, limit=3, debug=True)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Full SPARQL query\n",
    "\n",
    "Regular SPARQL query can also be provided. When provided, the limit and offset arguments superseed any in query limit or offset values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
    "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
    "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
    "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
    "   PREFIX nsg: <https://neuroshapes.org/>\n",
    "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
    "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
    "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
    "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
    "   PREFIX schema: <http://schema.org/>\n",
    "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
    "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
    "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
    "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
    "   PREFIX void: <http://rdfs.org/ns/void#>\n",
    "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
    "   PREFIX : <https://neuroshapes.org/>\n",
    "   SELECT ?id ?name\n",
    "   WHERE {\n",
    "       ?id a schema:Dataset ;\n",
    "       nsg:contribution/prov:agent ?contributor.\n",
    "       ?contributor schema:name ?name.\n",
    "   }\n",
    "   ORDER BY ?id\n",
    "   LIMIT 1\n",
    "   OFFSET 0\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   \n",
      "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "      PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "      PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "      PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "      PREFIX nsg: <https://neuroshapes.org/>\n",
      "      PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "      PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "      PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "      PREFIX schema: <http://schema.org/>\n",
      "      PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "      PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "      PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "      PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "      PREFIX void: <http://rdfs.org/ns/void#>\n",
      "      PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "      PREFIX : <https://neuroshapes.org/>\n",
      "      SELECT ?id ?name\n",
      "      WHERE {\n",
      "          ?id a schema:Dataset ;\n",
      "          nsg:contribution/prov:agent ?contributor.\n",
      "          ?contributor schema:name ?name.\n",
      "      }\n",
      "      ORDER BY ?id\n",
      "      LIMIT 3\n",
      "      OFFSET 1\n"
     ]
    }
   ],
   "source": [
    "# it is recommended to set 'rewrite' to 'False' to prevent the sparql query rewriting when a syntactically correct SPARQL query is provided.\n",
    "resources = forge.sparql(query, rewrite=False, limit=3, offset=1, debug=True) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "kgforge.core.resource.Resource"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Jane Doe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>John Smith</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://sandbox.bluebrainnexus.io/v1/resources...</td>\n",
       "      <td>Jane Doe</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id        name\n",
       "0  https://sandbox.bluebrainnexus.io/v1/resources...    Jane Doe\n",
       "1  https://sandbox.bluebrainnexus.io/v1/resources...  John Smith\n",
       "2  https://sandbox.bluebrainnexus.io/v1/resources...    Jane Doe"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Avoid rewriting the query"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To not assign any limit or offset, one can pass `None` to those parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_without_limit = \"\"\"\n",
    "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
    "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
    "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
    "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
    "   PREFIX nsg: <https://neuroshapes.org/>\n",
    "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
    "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
    "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
    "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
    "   PREFIX schema: <http://schema.org/>\n",
    "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
    "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
    "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
    "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
    "   PREFIX void: <http://rdfs.org/ns/void#>\n",
    "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
    "   PREFIX : <https://neuroshapes.org/>\n",
    "   SELECT ?id ?name\n",
    "   WHERE {\n",
    "       ?id a schema:Dataset ;\n",
    "       nsg:contribution/prov:agent ?contributor.\n",
    "       ?contributor schema:name ?name.\n",
    "   }\n",
    "   ORDER BY ?id\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   \n",
      "      PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "      PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "      PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "      PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "      PREFIX nsg: <https://neuroshapes.org/>\n",
      "      PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "      PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "      PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "      PREFIX schema: <http://schema.org/>\n",
      "      PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "      PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "      PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "      PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "      PREFIX void: <http://rdfs.org/ns/void#>\n",
      "      PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "      PREFIX : <https://neuroshapes.org/>\n",
      "      SELECT ?id ?name\n",
      "      WHERE {\n",
      "          ?id a schema:Dataset ;\n",
      "          nsg:contribution/prov:agent ?contributor.\n",
      "          ?contributor schema:name ?name.\n",
      "      }\n",
      "      ORDER BY ?id\n"
     ]
    }
   ],
   "source": [
    "\n",
    "resources = forge.sparql(query_without_limit, rewrite=False, limit=None, offset=None, debug=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "108"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you only want to add the context, but keep the content of the query the same, you need to set the `rewrite` parameter to `False`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_without_context = \"\"\"\n",
    "    SELECT ?id ?name ?contributor\n",
    "    WHERE {\n",
    "        ?id a Dataset ;\n",
    "        contribution/agent ?contributor.\n",
    "        ?contributor name ?name.\n",
    "    }\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Submitted query:\n",
      "   PREFIX dc: <http://purl.org/dc/elements/1.1/>\n",
      "   PREFIX dcat: <http://www.w3.org/ns/dcat#>\n",
      "   PREFIX dcterms: <http://purl.org/dc/terms/>\n",
      "   PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>\n",
      "   PREFIX nsg: <https://neuroshapes.org/>\n",
      "   PREFIX owl: <http://www.w3.org/2002/07/owl#>\n",
      "   PREFIX prov: <http://www.w3.org/ns/prov#>\n",
      "   PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n",
      "   PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n",
      "   PREFIX schema: <http://schema.org/>\n",
      "   PREFIX sh: <http://www.w3.org/ns/shacl#>\n",
      "   PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>\n",
      "   PREFIX skos: <http://www.w3.org/2004/02/skos/core#>\n",
      "   PREFIX vann: <http://purl.org/vocab/vann/>\n",
      "   PREFIX void: <http://rdfs.org/ns/void#>\n",
      "   PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n",
      "   PREFIX : <https://neuroshapes.org/>\n",
      "   \n",
      "       SELECT ?id ?name ?contributor\n",
      "       WHERE {\n",
      "           ?id a schema:Dataset ;\n",
      "           nsg:contribution/prov:agent ?contributor.\n",
      "           ?contributor schema:name ?name.\n",
      "       }\n"
     ]
    }
   ],
   "source": [
    "resources = forge.sparql(query_without_context, limit=None, debug=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "108"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ElasticSearch DSL Query\n",
    "\n",
    "ElasticSearch DSL can be used as a query language search for resources provided that the configured store supports it. The 'BlueBrainNexusStore' supports ElasticSearch."
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoStore doesn't implement ElasaticSearch DSL operations."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")\n",
    "contribution_jane = Resource(type=\"Contribution\", agent=jane)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "john = Resource(type=\"Person\", name=\"John Smith\")\n",
    "contribution_john = Resource(type=\"Contribution\", agent=john)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "association = Resource(type=\"Dataset\", contribution=[contribution_jane, contribution_john])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(association)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Plain ElasticSearch DSL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "        {\n",
    "          \"_source\": {\n",
    "            \"includes\": [\n",
    "              \"@id\",\n",
    "              \"name\"\n",
    "            ]\n",
    "          },\n",
    "          \"query\": {\n",
    "            \"term\": {\n",
    "              \"@type\": \"http://schema.org/Dataset\"\n",
    "            }\n",
    "          }\n",
    "        }\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "# limit and offset (when provided in this method call) superseed 'size' and 'from' values provided in the query\n",
    "resources = forge.elastic(query, limit=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "list"
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 120,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(resources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "kgforge.core.resource.Resource"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://bbp.epfl.ch/neurosciencegraph/data/neu...</td>\n",
       "      <td>AA1543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://bbp.epfl.ch/neurosciencegraph/data/neu...</td>\n",
       "      <td>AA1542</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://bbp.epfl.ch/neurosciencegraph/data/neu...</td>\n",
       "      <td>AA1544</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  id    name\n",
       "0  https://bbp.epfl.ch/neurosciencegraph/data/neu...  AA1543\n",
       "1  https://bbp.epfl.ch/neurosciencegraph/data/neu...  AA1542\n",
       "2  https://bbp.epfl.ch/neurosciencegraph/data/neu...  AA1544"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "forge.as_dataframe(resources)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### ElasticSearch results as dictionaries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "resources_2 = forge.elastic(query, limit=3, as_resource=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(resources_2[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'_id': 'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/25ca99ad-495d-4962-99de-9d4abbc5521c',\n",
       " '_index': 'nexus_7018fa21-cf03-4b16-b603-db111f6b8527_1',\n",
       " '_score': 1.5287807,\n",
       " '_source': {'@id': 'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/25ca99ad-495d-4962-99de-9d4abbc5521c',\n",
       "  'name': 'AA1543'}}"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resources_2[0]"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Downloading"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: DemoStore doesn't implement file operations yet. Please use another store for this section."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "jane = Resource(type=\"Person\", name=\"Jane Doe\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "associations.tsv\n",
      "my_data.xwz\n",
      "my_data_derived.txt\n",
      "persons-with-id.csv\n",
      "persons.csv\n",
      "tfidfvectorizer_model_schemaorg_linking\n"
     ]
    }
   ],
   "source": [
    "! ls -p ../../data | egrep -v /$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "distribution = forge.attach(\"../../data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [],
   "source": [
    "association = Resource(type=\"Association\", agent=jane, distribution=distribution)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<action> _register_one\n",
      "<succeeded> True\n"
     ]
    }
   ],
   "source": [
    "forge.register(association)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "# By default, the downladable file urls are collected from the json path \"distribution.contentUrl\" (follow=\"distribution.contentUrl\") and\n",
    "# the files are downloaded in the current path (path=\".\"). \n",
    "# The argument overwrite: bool can be provided to decide whether to overwrite (True) existing files with the same name or\n",
    "# to create new ones (False) with their names suffixed with a timestamp.\n",
    "# A cross_bucket argument can be provided to download data from the configured bucket (cross_bucket=False - the default value) \n",
    "# or from a bucket different than the configured one (cross_bucket=True). The configured store should support crossing buckets for this to work.\n",
    "forge.download(association)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Specific content type can be downloaded.\n",
    "forge.download(association, content_type=\"text/tab-separated-values\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The urls or the files to download can be collected from a different json path (by setting a value for \"follow\") and \n",
    "# the files downloaded to a different path (by setting a value for \"path\")\n",
    "forge.download(association, follow=\"distribution.contentUrl\", path=\"./downloaded/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "total 6352\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Oct 20 10:54 associations.tsv\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Nov 23 15:10 associations.tsv.20231123151033\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Nov 23 15:10 associations.tsv.20231123151035\n",
      "-rw-r--r--  1 mouffok  10067     477 Nov 23 15:41 associations.tsv.20231123154107\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Nov 27 14:20 associations.tsv.20231127142049\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Nov 27 14:20 associations.tsv.20231127142050\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Nov 27 14:24 associations.tsv.20231127142458\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Dec  5 15:37 associations.tsv.20231205153754\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Dec  5 15:44 associations.tsv.20231205154444\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  1 18:40 associations.tsv.20240201184048\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  2 10:47 associations.tsv.20240202104741\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  2 10:59 associations.tsv.20240202105941\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  2 16:41 associations.tsv.20240202164100\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  5 18:34 associations.tsv.20240205183409\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  5 18:34 associations.tsv.20240205183410\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  5 18:34 associations.tsv.20240205183417\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  5 18:36 associations.tsv.20240205183603\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb  6 16:09 associations.tsv.20240206160920\n",
      "-rw-r--r--@ 1 mouffok  10067     477 Feb 13 14:16 associations.tsv.20240213141617\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb 14 16:14 associations.tsv.20240214161402\n",
      "-rw-r--r--  1 mouffok  10067     477 Feb 15 14:28 associations.tsv.20240215142837\n",
      "-rw-r--r--@ 1 mouffok  10067      16 Oct 20 10:54 my_data.xwz\n",
      "-rw-r--r--  1 mouffok  10067      16 Nov 23 15:41 my_data.xwz.20231123154107\n",
      "-rw-r--r--@ 1 mouffok  10067      16 Nov 27 14:24 my_data.xwz.20231127142458\n",
      "-rw-r--r--@ 1 mouffok  10067      16 Dec  5 15:37 my_data.xwz.20231205153754\n",
      "-rw-r--r--@ 1 mouffok  10067      16 Dec  5 15:44 my_data.xwz.20231205154444\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb  1 18:40 my_data.xwz.20240201184048\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb  2 10:47 my_data.xwz.20240202104741\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb  2 10:59 my_data.xwz.20240202105941\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb  2 16:41 my_data.xwz.20240202164100\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb  5 18:36 my_data.xwz.20240205183603\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb  6 16:09 my_data.xwz.20240206160920\n",
      "-rw-r--r--@ 1 mouffok  10067      16 Feb 13 14:16 my_data.xwz.20240213141617\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb 14 16:14 my_data.xwz.20240214161402\n",
      "-rw-r--r--  1 mouffok  10067      16 Feb 15 14:28 my_data.xwz.20240215142837\n",
      "-rw-r--r--@ 1 mouffok  10067      24 Oct 20 10:54 my_data_derived.txt\n",
      "-rw-r--r--  1 mouffok  10067      24 Nov 23 15:41 my_data_derived.txt.20231123154107\n",
      "-rw-r--r--@ 1 mouffok  10067      24 Nov 27 14:24 my_data_derived.txt.20231127142458\n",
      "-rw-r--r--@ 1 mouffok  10067      24 Dec  5 15:37 my_data_derived.txt.20231205153754\n",
      "-rw-r--r--@ 1 mouffok  10067      24 Dec  5 15:44 my_data_derived.txt.20231205154444\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb  1 18:40 my_data_derived.txt.20240201184048\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb  2 10:47 my_data_derived.txt.20240202104741\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb  2 10:59 my_data_derived.txt.20240202105941\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb  2 16:41 my_data_derived.txt.20240202164100\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb  5 18:36 my_data_derived.txt.20240205183603\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb  6 16:09 my_data_derived.txt.20240206160920\n",
      "-rw-r--r--@ 1 mouffok  10067      24 Feb 13 14:16 my_data_derived.txt.20240213141617\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb 14 16:14 my_data_derived.txt.20240214161402\n",
      "-rw-r--r--  1 mouffok  10067      24 Feb 15 14:28 my_data_derived.txt.20240215142837\n",
      "-rw-r--r--@ 1 mouffok  10067     126 Oct 20 10:54 persons-with-id.csv\n",
      "-rw-r--r--  1 mouffok  10067     126 Nov 23 15:41 persons-with-id.csv.20231123154107\n",
      "-rw-r--r--@ 1 mouffok  10067     126 Nov 27 14:24 persons-with-id.csv.20231127142458\n",
      "-rw-r--r--@ 1 mouffok  10067     126 Dec  5 15:37 persons-with-id.csv.20231205153754\n",
      "-rw-r--r--@ 1 mouffok  10067     126 Dec  5 15:44 persons-with-id.csv.20231205154444\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb  1 18:40 persons-with-id.csv.20240201184048\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb  2 10:47 persons-with-id.csv.20240202104741\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb  2 10:59 persons-with-id.csv.20240202105941\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb  2 16:41 persons-with-id.csv.20240202164100\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb  5 18:36 persons-with-id.csv.20240205183603\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb  6 16:09 persons-with-id.csv.20240206160920\n",
      "-rw-r--r--@ 1 mouffok  10067     126 Feb 13 14:16 persons-with-id.csv.20240213141617\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb 14 16:14 persons-with-id.csv.20240214161402\n",
      "-rw-r--r--  1 mouffok  10067     126 Feb 15 14:28 persons-with-id.csv.20240215142837\n",
      "-rw-r--r--@ 1 mouffok  10067      52 Oct 20 10:54 persons.csv\n",
      "-rw-r--r--@ 1 mouffok  10067      52 Nov 23 15:10 persons.csv.20231123151035\n",
      "-rw-r--r--  1 mouffok  10067      52 Nov 23 15:41 persons.csv.20231123154107\n",
      "-rw-r--r--@ 1 mouffok  10067      52 Nov 27 14:20 persons.csv.20231127142050\n",
      "-rw-r--r--@ 1 mouffok  10067      52 Nov 27 14:24 persons.csv.20231127142458\n",
      "-rw-r--r--@ 1 mouffok  10067      52 Dec  5 15:37 persons.csv.20231205153754\n",
      "-rw-r--r--@ 1 mouffok  10067      52 Dec  5 15:44 persons.csv.20231205154444\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb  1 18:40 persons.csv.20240201184048\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb  2 10:47 persons.csv.20240202104741\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb  2 10:59 persons.csv.20240202105941\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb  2 16:41 persons.csv.20240202164100\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb  5 18:34 persons.csv.20240205183417\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb  5 18:36 persons.csv.20240205183603\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb  6 16:09 persons.csv.20240206160920\n",
      "-rw-r--r--@ 1 mouffok  10067      52 Feb 13 14:16 persons.csv.20240213141617\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb 14 16:14 persons.csv.20240214161402\n",
      "-rw-r--r--  1 mouffok  10067      52 Feb 15 14:28 persons.csv.20240215142837\n",
      "-rw-r--r--@ 1 mouffok  10067  204848 Oct 20 10:54 tfidfvectorizer_model_schemaorg_linking\n",
      "-rw-r--r--  1 mouffok  10067  204848 Nov 23 15:41 tfidfvectorizer_model_schemaorg_linking.20231123154107\n",
      "-rw-r--r--@ 1 mouffok  10067  204848 Nov 27 14:24 tfidfvectorizer_model_schemaorg_linking.20231127142458\n",
      "-rw-r--r--@ 1 mouffok  10067  204848 Dec  5 15:37 tfidfvectorizer_model_schemaorg_linking.20231205153754\n",
      "-rw-r--r--@ 1 mouffok  10067  204848 Dec  5 15:44 tfidfvectorizer_model_schemaorg_linking.20231205154444\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb  1 18:40 tfidfvectorizer_model_schemaorg_linking.20240201184048\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb  2 10:47 tfidfvectorizer_model_schemaorg_linking.20240202104741\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb  2 10:59 tfidfvectorizer_model_schemaorg_linking.20240202105941\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb  2 16:41 tfidfvectorizer_model_schemaorg_linking.20240202164100\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb  5 18:36 tfidfvectorizer_model_schemaorg_linking.20240205183603\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb  6 16:09 tfidfvectorizer_model_schemaorg_linking.20240206160920\n",
      "-rw-r--r--@ 1 mouffok  10067  204848 Feb 13 14:16 tfidfvectorizer_model_schemaorg_linking.20240213141617\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb 14 16:14 tfidfvectorizer_model_schemaorg_linking.20240214161402\n",
      "-rw-r--r--  1 mouffok  10067  204848 Feb 15 14:28 tfidfvectorizer_model_schemaorg_linking.20240215142837\n"
     ]
    }
   ],
   "source": [
    "! ls -l ./downloaded/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "#! rm -R ./downloaded/"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "forge_venv2",
   "language": "python",
   "name": "venv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.17"
  },
  "vscode": {
   "interpreter": {
    "hash": "9ac393a5ddd595f2c78ea58b15bf8d269850a4413729cbea5c5fae9013762763"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}