This is a notebook demos how to query associations from IMPC include evidence and provenance modeled with SEPIO

To setup:

pip install dipper jupyter ipython

In [17]:
from dipper.graph.RDFGraph import RDFGraph

impc_graph = "https://data.monarchinitiative.org/ttl/impc.ttl"

graph = RDFGraph()

# Import the rdf file, this takes a minute or two
graph.parse(impc_graph, format='turtle')

)>

In [18]:
# How many subjects have a has_phenotype relation

has_phenotype = graph._getNode("RO:0002200")
len(list(graph.subjects(predicate=has_phenotype)))

19843

In [19]:
# How many oban associations are in the graph
from rdflib.namespace import RDF

association = graph._getNode("OBAN:association")
len(list(graph.subjects(RDF.type, association)))

19843

In [20]:
# How many lines of mutant phenotype evidence

mut_pheno_evidence = graph._getNode("ECO:0000015")
len(list(graph.subjects(RDF.type, mut_pheno_evidence)))

23518

Given a list of genotype and phenotype labels, create a subgraph containing the 'has phenotype' relation, and all evidence.


In [84]:
# Bind all namespaces in curie_map for sparql
graph.bind_all_namespaces()

genotype = "Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (female)"
phenotype = "MP:0001399"

query = """
 ?genotype rdfs:label '{0}' .
 ?genotype RO:0002200 {1} .
 
 ?assoc a ?assoc_type ;
 OBAN:association_has_object {1} ;
 OBAN:association_has_predicate RO:0002200 ;
 OBAN:association_has_subject ?genotype ;
 RO:0002558 ECO:0000015 ;
 SEPIO:0000007 ?evidenceline ;
 SEPIO:0000015 ?assertion .
 
 ?assertion SEPIO:0000018 ?creator ;
 SEPIO:0000111 ?evidenceline .
 
 ?assertion a ?assertion_type .
 
 ?creator ?creator_predicates ?creator_objects .
 
 ?evidenceline SEPIO:0000084 ?measure1 ;
 SEPIO:0000085 ?study .
 
 ?evidenceline a ?ev_type .
 
 ?measure1 ?measure_predicates ?measure_objects .
 ?study ?study_predicates ?study_objects .
 
 ?study_objects ?stud_p ?stud_o .
 ?measure_objects ?meas_p ?meas_o .
 
""".format(genotype, phenotype)

sparql_query = """
 CONSTRUCT {{ 
 {0}
 }}
 WHERE {{
 {0}
 }}
""".format(query)

sparql_output = graph.query(sparql_query)

subGraph = RDFGraph()

for triple in sparql_output:
 subGraph.add(triple)

subGraph.bind("OBAN", "http://purl.org/oban/")

print(subGraph.serialize(format='turtle').decode("utf-8"))

@prefix OBAN: .
@prefix OBO: .
@prefix rdf: .
@prefix rdfs: .
@prefix xml: .
@prefix xsd: .

 a OBAN:association ;
 OBO:RO_0002558 OBO:ECO_0000015 ;
 OBO:SEPIO_0000007 ,
 ;
 OBO:SEPIO_0000015 ;
 OBAN:association_has_object OBO:MP_0001399 ;
 OBAN:association_has_predicate OBO:RO_0002200 ;
 OBAN:association_has_subject .

 a ;
 rdfs:label "International Mouse Phenotyping Consortium" .

 OBO:RO_0002353 .

 OBO:RO_0002353 .

 OBO:RO_0002353 .

 OBO:RO_0002353 .

 OBO:RO_0002353 .

 a OBO:SEPIO_0000001 ;
 OBO:SEPIO_0000018 ;
 OBO:SEPIO_0000111 ,
 .

 OBO:RO_0002353 .

 rdfs:label "Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (female)" ;
 OBO:RO_0002200 OBO:MP_0001399 .

 a ;
 rdfs:label "Whole arena resting time (Open-field)" .

 a ;
 rdfs:label "Locomotor activity (Modified SHIRPA)" .

 a ;
 rdfs:label "Open-field" .

 a ;
 rdfs:label "Modified SHIRPA" .

 a ;
 rdfs:label "European Mouse Disease Clinic" .

 a ;
 rdfs:label "WTSI" .

 a OBO:ECO_0000015 ;
 OBO:SEPIO_0000084 ,
 ,
 ;

In [91]:
# Turn this into a function

template = """
 ?genotype rdfs:label '{0}' .
 ?genotype RO:0002200 {1} .
 
 ?assoc a ?assoc_type ;
 OBAN:association_has_object {1} ;
 OBAN:association_has_predicate RO:0002200 ;
 OBAN:association_has_subject ?genotype ;
 RO:0002558 ECO:0000015 ;
 SEPIO:0000007 ?evidenceline ;
 SEPIO:0000015 ?assertion .
 
 ?assertion SEPIO:0000018 ?creator ;
 SEPIO:0000111 ?evidenceline .
 
 ?assertion a ?assertion_type .
 
 ?creator ?creator_predicates ?creator_objects .
 
 ?evidenceline SEPIO:0000084 ?measure1 ;
 SEPIO:0000085 ?study .
 
 ?evidenceline a ?ev_type .
 
 ?measure1 ?measure_predicates ?measure_objects .
 ?study ?study_predicates ?study_objects .
 
 ?study_objects ?stud_p ?stud_o .
 ?measure_objects ?meas_p ?meas_o .
"""

subGraph = RDFGraph()
subGraph.bind("OBAN", "http://purl.org/oban/")

def create_subgraph(query, graph, new_graph):
 sparql_query = """
 CONSTRUCT {{ 
 {0}
 }}
 WHERE {{
 {0}
 }}
 """.format(query)

 sparql_output = graph.query(sparql_query)

 for triple in sparql_output:
 new_graph.add(triple)
 
g2p_list = [
 ["Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (female)",
 "MP:0001399"
 ],
 ["Ankrd13a/Ankrd13a [CBA/Ca;129P2-WTSI-Ankrd13aAnkrd13a] (male)",
 "MP:0001399"
 ],
 ["Mapkap1/Mapkap1<+> [C57BL/6N-BCM-Mapkap1 EPD0609_2_F05-B] (female)",
 "MP:0002753"
 ],
 ["Hbs1l/Hbs1l [C57BL/6N-WTSI-METC] (female)",
 "MP:0005292"
 ],
 ["Gnao1/Gnao1<+> [C57BL/6NTac-MRC Harwell-H-GNAO1-G05-TM1B] (male)",
 "MP:0001399"
 ]
]

for g2p in g2p_list:
 query = template.format(g2p[0], g2p[1])
 create_subgraph(query, graph, subGraph)

subGraph.serialize("/home/kshefchek/impc_test.ttl", format="ttl")
