{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Intermine-Python: Tutorial 10: Enrichment Calculations on Lists" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This tutorial will talk about how you can perform enrichment calculations on lists that you have access to. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from intermine.webservice import Service" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "service = Service(\"https://www.flymine.org/flymine/service\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The intermine service has various widgets that can perform different functions. These widgets are stored in a dictionary in the Service class. To view all the widgets use service.widgets." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'flyatlas_for_gene': {'name': 'flyatlas_for_gene',\n", " 'chartType': 'BarChart',\n", " 'description': 'For each tissue in the fly, the number of genes from this list for which the levels of expression are significantly high (Up) or low (Down) according to FlyAtlas AffyCall.',\n", " 'startClass': 'FlyAtlasResult',\n", " 'title': 'Gene Expression in the Fly (FlyAtlas)',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'chart',\n", " 'labels': {'x': 'Tissue', 'y': 'Up (+) or Down (-) gene count'}},\n", " 'go_enrichment_for_gene': {'startClassDisplay': 'primaryIdentifier',\n", " 'enrichIdentifier': 'goAnnotation.ontologyTerm.parents.identifier',\n", " 'name': 'go_enrichment_for_gene',\n", " 'description': 'GO terms enriched for items in this list.',\n", " 'enrich': 'goAnnotation.ontologyTerm.parents.name',\n", " 'filters': 'biological_process,cellular_component,molecular_function',\n", " 'startClass': 'Gene',\n", " 'title': 'Gene Ontology Enrichment',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'enrichment'},\n", " 'bdgp': {'name': 'bdgp',\n", " 'chartType': 'ColumnChart',\n", " 'description': 'Expression patterns of Drosophila mRNAs during embryogenesis - data from BGDP. Note that not all genes have been assayed by BGDP. ',\n", " 'startClass': 'Gene',\n", " 'title': 'BDGP expression patterns',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'chart',\n", " 'labels': {'x': 'Stage', 'y': 'Gene count'}},\n", " 'pathway_enrichment': {'startClassDisplay': 'primaryIdentifier',\n", " 'enrichIdentifier': 'pathways.identifier',\n", " 'name': 'pathway_enrichment',\n", " 'description': 'Pathways enriched for genes in this list - data from KEGG and Reactome',\n", " 'enrich': 'pathways.name',\n", " 'filters': 'All,KEGG pathways data set,Reactome data set',\n", " 'startClass': 'Gene',\n", " 'title': 'Pathway Enrichment',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'enrichment'},\n", " 'flyfish': {'name': 'flyfish',\n", " 'chartType': 'ColumnChart',\n", " 'description': 'Expression patterns of Drosophila mRNAs at the subcellular level during early embryogenesis - data from Fly-FISH. Note that not all genes have been assayed by Fly-FISH.',\n", " 'startClass': 'Gene',\n", " 'title': 'mRNA subcellular localisation (fly-FISH)',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'chart',\n", " 'labels': {'x': 'Stage', 'y': 'Gene count'}},\n", " 'prot_dom_enrichment_for_protein': {'startClassDisplay': 'primaryIdentifier',\n", " 'enrichIdentifier': 'proteinDomainRegions.proteinDomain.primaryIdentifier',\n", " 'name': 'prot_dom_enrichment_for_protein',\n", " 'description': 'Protein Domains enriched for items in this list.',\n", " 'enrich': 'proteinDomainRegions.proteinDomain.name',\n", " 'startClass': 'Protein',\n", " 'title': 'Protein Domain Enrichment',\n", " 'targets': ['Protein'],\n", " 'widgetType': 'enrichment'},\n", " 'protein_features': {'startClassDisplay': 'primaryIdentifier',\n", " 'enrichIdentifier': 'features.type',\n", " 'name': 'protein_features',\n", " 'description': 'UniProt features enriched for proteins in this list.',\n", " 'enrich': 'features.type',\n", " 'startClass': 'Protein',\n", " 'title': 'UniProt Features',\n", " 'targets': ['Protein'],\n", " 'widgetType': 'enrichment'},\n", " 'flyatlas_for_probeset': {'name': 'flyatlas_for_probeset',\n", " 'chartType': 'BarChart',\n", " 'description': 'For each tissue, the number of genes from this list for which the levels of expression are significantly high (Up) or low (Down) according to FlyAtlas AffyCall.',\n", " 'startClass': 'FlyAtlasResult',\n", " 'title': 'Gene Expression in Fly Tissues (FlyAtlas Data)',\n", " 'targets': ['ProbeSet'],\n", " 'widgetType': 'chart',\n", " 'labels': {'x': 'Tissue', 'y': 'Up (+) or Down (-) gene count'}},\n", " 'uniprot_keywords': {'startClassDisplay': 'primaryIdentifier',\n", " 'name': 'uniprot_keywords',\n", " 'description': 'UniProt keywords enriched for proteins in this list.',\n", " 'enrich': 'keywords.name',\n", " 'startClass': 'Protein',\n", " 'title': 'UniProt Keywords',\n", " 'targets': ['Protein'],\n", " 'widgetType': 'enrichment'},\n", " 'orthologues': {'name': 'orthologues',\n", " 'description': 'Counts of genes in list with orthologues.',\n", " 'title': 'Orthologues',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'table'},\n", " 'publication_enrichment': {'startClassDisplay': 'primaryIdentifier',\n", " 'enrichIdentifier': 'publications.pubMedId',\n", " 'name': 'publication_enrichment',\n", " 'description': 'Publications enriched for genes in this list.',\n", " 'enrich': 'publications.title',\n", " 'startClass': 'Gene',\n", " 'title': 'Publication Enrichment',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'enrichment'},\n", " 'bdgp_enrichment': {'startClassDisplay': 'primaryIdentifier',\n", " 'name': 'bdgp_enrichment',\n", " 'description': 'ImaGO terms enriched for genes in this list - data from BDGP. Note that not all genes have been assayed by BDGP.',\n", " 'enrich': 'mRNAExpressionResults.mRNAExpressionTerms[MRNAExpressionTerm].name',\n", " 'startClass': 'Gene',\n", " 'title': 'BDGP Enrichment',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'enrichment'},\n", " 'chromosome_distribution_for_gene': {'name': 'chromosome_distribution_for_gene',\n", " 'chartType': 'ColumnChart',\n", " 'description': 'Actual: number of items in this list found on each chromosome. Expected: given the total number of items on the chromosome and the number of items in this list, the number of items expected to be found on each chromosome.',\n", " 'filters': 'organism.name=[list]',\n", " 'startClass': 'SequenceFeature',\n", " 'title': 'Chromosome Distribution',\n", " 'targets': ['SequenceFeature'],\n", " 'widgetType': 'chart',\n", " 'labels': {'x': 'Chromosome', 'y': 'Count'}},\n", " 'prot_dom_enrichment_for_gene': {'startClassDisplay': 'primaryIdentifier',\n", " 'enrichIdentifier': 'proteins.proteinDomainRegions.proteinDomain.primaryIdentifier',\n", " 'name': 'prot_dom_enrichment_for_gene',\n", " 'description': 'Protein Domains enriched for items in this list.',\n", " 'enrich': 'proteins.proteinDomainRegions.proteinDomain.name',\n", " 'startClass': 'Gene',\n", " 'title': 'Protein Domain Enrichment',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'enrichment'},\n", " 'miranda_enrichment': {'startClassDisplay': 'symbol',\n", " 'name': 'miranda_enrichment',\n", " 'description': 'MiRNAs enriched for items in this list.',\n", " 'enrich': 'transcripts[MRNA].miRNAinteractions.mirnagene.symbol',\n", " 'startClass': 'Gene',\n", " 'title': 'MiRNA Enrichment',\n", " 'targets': ['Gene'],\n", " 'widgetType': 'enrichment'}}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "service.widgets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you want to view only those widgets that are related to enrichment, you can use filter based on the widget type. I've printed all the enrichment related widgets below. " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'startClassDisplay': 'primaryIdentifier', 'enrichIdentifier': 'goAnnotation.ontologyTerm.parents.identifier', 'name': 'go_enrichment_for_gene', 'description': 'GO terms enriched for items in this list.', 'enrich': 'goAnnotation.ontologyTerm.parents.name', 'filters': 'biological_process,cellular_component,molecular_function', 'startClass': 'Gene', 'title': 'Gene Ontology Enrichment', 'targets': ['Gene'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'primaryIdentifier', 'enrichIdentifier': 'pathways.identifier', 'name': 'pathway_enrichment', 'description': 'Pathways enriched for genes in this list - data from KEGG and Reactome', 'enrich': 'pathways.name', 'filters': 'All,KEGG pathways data set,Reactome data set', 'startClass': 'Gene', 'title': 'Pathway Enrichment', 'targets': ['Gene'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'primaryIdentifier', 'enrichIdentifier': 'proteinDomainRegions.proteinDomain.primaryIdentifier', 'name': 'prot_dom_enrichment_for_protein', 'description': 'Protein Domains enriched for items in this list.', 'enrich': 'proteinDomainRegions.proteinDomain.name', 'startClass': 'Protein', 'title': 'Protein Domain Enrichment', 'targets': ['Protein'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'primaryIdentifier', 'enrichIdentifier': 'features.type', 'name': 'protein_features', 'description': 'UniProt features enriched for proteins in this list.', 'enrich': 'features.type', 'startClass': 'Protein', 'title': 'UniProt Features', 'targets': ['Protein'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'primaryIdentifier', 'name': 'uniprot_keywords', 'description': 'UniProt keywords enriched for proteins in this list.', 'enrich': 'keywords.name', 'startClass': 'Protein', 'title': 'UniProt Keywords', 'targets': ['Protein'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'primaryIdentifier', 'enrichIdentifier': 'publications.pubMedId', 'name': 'publication_enrichment', 'description': 'Publications enriched for genes in this list.', 'enrich': 'publications.title', 'startClass': 'Gene', 'title': 'Publication Enrichment', 'targets': ['Gene'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'primaryIdentifier', 'name': 'bdgp_enrichment', 'description': 'ImaGO terms enriched for genes in this list - data from BDGP. Note that not all genes have been assayed by BDGP.', 'enrich': 'mRNAExpressionResults.mRNAExpressionTerms[MRNAExpressionTerm].name', 'startClass': 'Gene', 'title': 'BDGP Enrichment', 'targets': ['Gene'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'primaryIdentifier', 'enrichIdentifier': 'proteins.proteinDomainRegions.proteinDomain.primaryIdentifier', 'name': 'prot_dom_enrichment_for_gene', 'description': 'Protein Domains enriched for items in this list.', 'enrich': 'proteins.proteinDomainRegions.proteinDomain.name', 'startClass': 'Gene', 'title': 'Protein Domain Enrichment', 'targets': ['Gene'], 'widgetType': 'enrichment'}\n", "{'startClassDisplay': 'symbol', 'name': 'miranda_enrichment', 'description': 'MiRNAs enriched for items in this list.', 'enrich': 'transcripts[MRNA].miRNAinteractions.mirnagene.symbol', 'startClass': 'Gene', 'title': 'MiRNA Enrichment', 'targets': ['Gene'], 'widgetType': 'enrichment'}\n" ] } ], "source": [ "for key in service.widgets.keys():\n", " if service.widgets[key][\"widgetType\"]==\"enrichment\":\n", " print(service.widgets[key])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will now get a list on which we want to perform the analysis using the list manager. " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "lm=service.list_manager()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "l=lm.get_list(name=\"PL FlyAtlas_brain_top\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To perform any enrichment analysis on the list we use the calculate_enrichment method. I've stored the results in \"r\". We will now iterate through R and view the results. " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "r=l.calculate_enrichment(widget=\"publication_enrichment\",maxp=0.01)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "29671739 Drosophilamidbrain revealed by single-cell transcriptomics. 4.311080647146878e-05\n", "22683328 Mutation of Drosophila dopamine receptor DopR leads to male-male courtship behavior. 0.0004231640537470922\n", "23895060 Temporally dimorphic recruitment of dopamine neurons into stress response circuitry in Drosophila. 0.0004231640537470922\n", "24128361 Sexually dimorphic recruitment of dopamine neurons into the stress response circuitry. 0.0004231640537470922\n", "15987944 Rapid, nongenomic responses to ecdysteroids and catecholamines mediated by a novel Drosophila G-protein-coupled receptor. 0.0016901138347928336\n", "17986026 Suppression of excitatory cholinergic synaptic transmission by Drosophila dopamine D1-like receptors. 0.0016901138347928336\n", "24303109 Pharmacological analysis of dopamine modulation in the Drosophila melanogaster larval heart. 0.0016901138347928336\n", "28902472 The mushroom body D1 dopamine receptor controls innate courtship drive. 0.0016901138347928336\n", "27160913 Cell-Type-Specific Transcriptome Analysis in the Drosophila Mushroom Body Reveals Memory-Related Changes in Gene Expression. 0.0027171143620512823\n", "27571359 Visual Attention in Flies-Dopamine in the Mushroom Bodies Mediates the After-Effect of Cueing. 0.004218770306957829\n", "29779874 A GABAergic Feedback Shapes Dopaminergic Input on the Drosophila Mushroom Body to Promote Appetitive Long-Term Memory. 0.004218770306957829\n", "30397017 DrosophilaMushroom Bodies. 0.004460444207028984\n", "27487216 Operation of a homeostatic sleep switch. 0.005793537089956694\n", "21286249 Sleep deprivation during early-adult development results in long-lasting learning deficits in adult Drosophila. 0.008424700608528294\n", "25632118 Behavioral and circuit basis of sucrose rejection by Drosophila females in a simple decision-making task. 0.008424700608528294\n", "27292538 Dopaminergic Circuitry Underlying Mating Drive. 0.008424700608528294\n", "28473588 Branch-specific plasticity of a bifunctional dopamine circuit encodes protein hunger. 0.008424700608528294\n" ] } ], "source": [ "for i in r:\n", " print(i.identifier,i.description,i.p_value)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "This is how you can perform enrichment calculations on lists of your choice. " ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 1 }