{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This is a notebook demos how to query associations from IMPC include evidence and provenance modeled with SEPIO\n", "\n", "To setup:\n", "\n", "pip install dipper jupyter ipython" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ ")>" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from dipper.graph.RDFGraph import RDFGraph\n", "\n", "impc_graph = \"https://data.monarchinitiative.org/ttl/impc.ttl\"\n", "\n", "graph = RDFGraph()\n", "\n", "# Import the rdf file, this takes a minute or two\n", "graph.parse(impc_graph, format='turtle')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "19843" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How many subjects have a has_phenotype relation\n", "\n", "has_phenotype = graph._getNode(\"RO:0002200\")\n", "len(list(graph.subjects(predicate=has_phenotype)))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "19843" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How many oban associations are in the graph\n", "from rdflib.namespace import RDF\n", "\n", "association = graph._getNode(\"OBAN:association\")\n", "len(list(graph.subjects(RDF.type, association)))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "23518" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How many lines of mutant phenotype evidence\n", "\n", "mut_pheno_evidence = graph._getNode(\"ECO:0000015\")\n", "len(list(graph.subjects(RDF.type, mut_pheno_evidence)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Given a list of genotype and phenotype labels, create a subgraph containing the 'has phenotype' relation, and all evidence.\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "@prefix OBAN: .\n", "@prefix OBO: .\n", "@prefix rdf: .\n", "@prefix rdfs: .\n", "@prefix xml: .\n", "@prefix xsd: .\n", "\n", " a OBAN:association ;\n", " OBO:RO_0002558 OBO:ECO_0000015 ;\n", " OBO:SEPIO_0000007 ,\n", " ;\n", " OBO:SEPIO_0000015 ;\n", " OBAN:association_has_object OBO:MP_0001399 ;\n", " OBAN:association_has_predicate OBO:RO_0002200 ;\n", " OBAN:association_has_subject .\n", "\n", " a ;\n", " rdfs:label \"International Mouse Phenotyping Consortium\" .\n", "\n", " OBO:RO_0002353 .\n", "\n", " OBO:RO_0002353 .\n", "\n", " OBO:RO_0002353 .\n", "\n", " OBO:RO_0002353 .\n", "\n", " OBO:RO_0002353 .\n", "\n", " a OBO:SEPIO_0000001 ;\n", " OBO:SEPIO_0000018 ;\n", " OBO:SEPIO_0000111 ,\n", " .\n", "\n", " OBO:RO_0002353 .\n", "\n", " rdfs:label \"Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (female)\" ;\n", " OBO:RO_0002200 OBO:MP_0001399 .\n", "\n", " a ;\n", " rdfs:label \"Whole arena resting time (Open-field)\" .\n", "\n", " a ;\n", " rdfs:label \"Locomotor activity (Modified SHIRPA)\" .\n", "\n", " a ;\n", " rdfs:label \"Open-field\" .\n", "\n", " a ;\n", " rdfs:label \"Modified SHIRPA\" .\n", "\n", " a ;\n", " rdfs:label \"European Mouse Disease Clinic\" .\n", "\n", " a ;\n", " rdfs:label \"WTSI\" .\n", "\n", " a OBO:ECO_0000015 ;\n", " OBO:SEPIO_0000084 ,\n", " ,\n", " ;\n", " OBO:SEPIO_0000085 .\n", "\n", " a OBO:ECO_0000015 ;\n", " OBO:SEPIO_0000084 ,\n", " ,\n", " ;\n", " OBO:SEPIO_0000085 .\n", "\n", " a ;\n", " rdfs:label \"EUMODIC Pipeline 2\" .\n", "\n", " a OBO:OBI_0000471 ;\n", " OBO:BFO_0000050 ,\n", " ;\n", " OBO:BFO_0000051 OBO:STATO_0000372,\n", " ;\n", " OBO:SEPIO_0000017 ;\n", " OBO:SEPIO_0000114 .\n", "\n", " a OBO:OBI_0000471 ;\n", " OBO:BFO_0000050 ,\n", " ;\n", " OBO:BFO_0000051 OBO:STATO_0000372,\n", " ;\n", " OBO:SEPIO_0000017 ;\n", " OBO:SEPIO_0000114 .\n", "\n", "\n" ] } ], "source": [ "# Bind all namespaces in curie_map for sparql\n", "graph.bind_all_namespaces()\n", "\n", "genotype = \"Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (female)\"\n", "phenotype = \"MP:0001399\"\n", "\n", "query = \"\"\"\n", " ?genotype rdfs:label '{0}' .\n", " ?genotype RO:0002200 {1} .\n", " \n", " ?assoc a ?assoc_type ;\n", " OBAN:association_has_object {1} ;\n", " OBAN:association_has_predicate RO:0002200 ;\n", " OBAN:association_has_subject ?genotype ;\n", " RO:0002558 ECO:0000015 ;\n", " SEPIO:0000007 ?evidenceline ;\n", " SEPIO:0000015 ?assertion .\n", " \n", " ?assertion SEPIO:0000018 ?creator ;\n", " SEPIO:0000111 ?evidenceline .\n", " \n", " ?assertion a ?assertion_type .\n", " \n", " ?creator ?creator_predicates ?creator_objects .\n", " \n", " ?evidenceline SEPIO:0000084 ?measure1 ;\n", " SEPIO:0000085 ?study .\n", " \n", " ?evidenceline a ?ev_type .\n", " \n", " ?measure1 ?measure_predicates ?measure_objects .\n", " ?study ?study_predicates ?study_objects .\n", " \n", " ?study_objects ?stud_p ?stud_o .\n", " ?measure_objects ?meas_p ?meas_o .\n", " \n", "\"\"\".format(genotype, phenotype)\n", "\n", "sparql_query = \"\"\"\n", " CONSTRUCT {{ \n", " {0}\n", " }}\n", " WHERE {{\n", " {0}\n", " }}\n", "\"\"\".format(query)\n", "\n", "sparql_output = graph.query(sparql_query)\n", "\n", "subGraph = RDFGraph()\n", "\n", "for triple in sparql_output:\n", " subGraph.add(triple)\n", "\n", "subGraph.bind(\"OBAN\", \"http://purl.org/oban/\")\n", "\n", "print(subGraph.serialize(format='turtle').decode(\"utf-8\"))" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "# Turn this into a function\n", "\n", "template = \"\"\"\n", " ?genotype rdfs:label '{0}' .\n", " ?genotype RO:0002200 {1} .\n", " \n", " ?assoc a ?assoc_type ;\n", " OBAN:association_has_object {1} ;\n", " OBAN:association_has_predicate RO:0002200 ;\n", " OBAN:association_has_subject ?genotype ;\n", " RO:0002558 ECO:0000015 ;\n", " SEPIO:0000007 ?evidenceline ;\n", " SEPIO:0000015 ?assertion .\n", " \n", " ?assertion SEPIO:0000018 ?creator ;\n", " SEPIO:0000111 ?evidenceline .\n", " \n", " ?assertion a ?assertion_type .\n", " \n", " ?creator ?creator_predicates ?creator_objects .\n", " \n", " ?evidenceline SEPIO:0000084 ?measure1 ;\n", " SEPIO:0000085 ?study .\n", " \n", " ?evidenceline a ?ev_type .\n", " \n", " ?measure1 ?measure_predicates ?measure_objects .\n", " ?study ?study_predicates ?study_objects .\n", " \n", " ?study_objects ?stud_p ?stud_o .\n", " ?measure_objects ?meas_p ?meas_o .\n", "\"\"\"\n", "\n", "subGraph = RDFGraph()\n", "subGraph.bind(\"OBAN\", \"http://purl.org/oban/\")\n", "\n", "def create_subgraph(query, graph, new_graph):\n", " sparql_query = \"\"\"\n", " CONSTRUCT {{ \n", " {0}\n", " }}\n", " WHERE {{\n", " {0}\n", " }}\n", " \"\"\".format(query)\n", "\n", " sparql_output = graph.query(sparql_query)\n", "\n", " for triple in sparql_output:\n", " new_graph.add(triple)\n", " \n", "g2p_list = [\n", " [\"Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (female)\",\n", " \"MP:0001399\"\n", " ],\n", " [\"Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (male)\",\n", " \"MP:0001399\"\n", " ],\n", " [\"Mapkap1/Mapkap1<+> [C57BL/6N-BCM-Mapkap1 EPD0609_2_F05-B] (female)\",\n", " \"MP:0002753\"\n", " ],\n", " [\"Hbs1l/Hbs1l [C57BL/6N-WTSI-METC] (female)\",\n", " \"MP:0005292\"\n", " ],\n", " [\"Gnao1/Gnao1<+> [C57BL/6NTac-MRC Harwell-H-GNAO1-G05-TM1B] (male)\",\n", " \"MP:0001399\"\n", " ]\n", "]\n", "\n", "for g2p in g2p_list:\n", " query = template.format(g2p[0], g2p[1])\n", " create_subgraph(query, graph, subGraph)\n", "\n", "subGraph.serialize(\"/home/kshefchek/impc_test.ttl\", format=\"ttl\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }