{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os\n", "import pandas" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
problem_definition_idproblemindicationsname_lower
063467Hypertension242hypertension
163468Essential Hypertension204essential hypertension
263470Benign Essential Hypertension190benign essential hypertension
\n", "
" ], "text/plain": [ " problem_definition_id problem indications \\\n", "0 63467 Hypertension 242 \n", "1 63468 Essential Hypertension 204 \n", "2 63470 Benign Essential Hypertension 190 \n", "\n", " name_lower \n", "0 hypertension \n", "1 essential hypertension \n", "2 benign essential hypertension " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = os.path.join('data', 'problems.tsv')\n", "problem_df = pandas.read_table(path)\n", "problem_df['name_lower'] = list(map(str.lower, problem_df.problem))\n", "problem_df[:3]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1596" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(problem_df)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'exact-synonym', 'name', 'narrow-synonym', 'related-synonym'}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url = 'http://git.dhimmel.com/disease-ontology/data/term-names.tsv'\n", "doterm_df = pandas.read_table(url)\n", "doterm_df['name_lower'] = list(map(str.lower, doterm_df.name))\n", "set(doterm_df.type)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
doidnametypename_lower
0DOID:0050911appendix carcinoid tumornameappendix carcinoid tumor
1DOID:0050911appendix carcinoid endocrine tumourexact-synonymappendix carcinoid endocrine tumour
2DOID:0050703infancy electroclinical syndromenameinfancy electroclinical syndrome
\n", "
" ], "text/plain": [ " doid name type \\\n", "0 DOID:0050911 appendix carcinoid tumor name \n", "1 DOID:0050911 appendix carcinoid endocrine tumour exact-synonym \n", "2 DOID:0050703 infancy electroclinical syndrome name \n", "\n", " name_lower \n", "0 appendix carcinoid tumor \n", "1 appendix carcinoid endocrine tumour \n", "2 infancy electroclinical syndrome " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "doterm_df = doterm_df.query(\"type in ['exact-synonym', 'name']\")\n", "doterm_df[:3]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "domap_df = problem_df.merge(doterm_df, how='left', on='name_lower')\n", "path = os.path.join('data', 'problem-to-doid.tsv')\n", "domap_df.to_csv(path, index=False, sep='\\t')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "367" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pair_df = domap_df[['problem_definition_id', 'doid']].dropna().drop_duplicates()\n", "len(pair_df)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
problem_definition_idproblemindicationsname_lowerdoidnametype
063467Hypertension242hypertensionDOID:10763hypertensionname
163468Essential Hypertension204essential hypertensionDOID:10825essential hypertensionname
263470Benign Essential Hypertension190benign essential hypertensionDOID:10913benign essential hypertensionname
363470Benign Essential Hypertension190benign essential hypertensionDOID:10913benign Essential hypertensionexact-synonym
469380Diabetes Mellitus151diabetes mellitusDOID:9351diabetes mellitusname
569402Diabetes Mellitus Poorly Controlled146diabetes mellitus poorly controlledNaNNaNNaN
669381Type II Diabetes Mellitus112type ii diabetes mellitusDOID:9352type II diabetes mellitusexact-synonym
776351Depression105depressionNaNNaNNaN
862598Allergic Rhinitis103allergic rhinitisDOID:4481allergic rhinitisname
974988Seizure Disorder98seizure disorderNaNNaNNaN
1062975Coronary Artery Disease88coronary artery diseaseDOID:3393coronary artery diseasename
1170455Rheumatoid Arthritis86rheumatoid arthritisDOID:7148rheumatoid arthritisname
1277891Acute Upper Respiratory Infection77acute upper respiratory infectionNaNNaNNaN
1376388Bipolar Disorder NOS77bipolar disorder nosNaNNaNNaN
1460869Cough76coughNaNNaNNaN
1564181Asthma74asthmaDOID:2841asthmaname
1675002Partial Complex Psychomotor Seizure With Intra...72partial complex psychomotor seizure with intra...NaNNaNNaN
1775125Multiple Sclerosis70multiple sclerosisDOID:2377multiple sclerosisname
1867970Hyperlipidemia69hyperlipidemiaNaNNaNNaN
1976357Major Depression, Recurrent67major depression, recurrentNaNNaNNaN
2075080Parkinson's Disease64parkinson's diseaseDOID:14330Parkinson's diseasename
2170475Scleroderma63sclerodermaDOID:419sclerodermaname
2270475Scleroderma63sclerodermaDOID:418Sclerodermaexact-synonym
2363217Congestive Heart Failure61congestive heart failureDOID:6000congestive heart failurename
2464688Esophageal Reflux61esophageal refluxNaNNaNNaN
2591089Lower Back Pain60lower back painNaNNaNNaN
2676272Schizoaffective Disorder57schizoaffective disorderDOID:5418schizoaffective disordername
2760899Abdominal Pain54abdominal painNaNNaNNaN
2876835HIV Infection54hiv infectionDOID:526HIV infectionexact-synonym
2961118Anxiety (Symptom)53anxiety (symptom)NaNNaNNaN
3062588Chronic Ethmoidal Sinusitis52chronic ethmoidal sinusitisDOID:9312chronic ethmoidal sinusitisexact-synonym
3162588Chronic Ethmoidal Sinusitis52chronic ethmoidal sinusitisDOID:9312chronic ethmoidal sinusitisexact-synonym
3269258Hypothyroidism52hypothyroidismDOID:1459hypothyroidismname
33278350Eczema51eczemaDOID:2723eczemaexact-synonym
3464118Acute Bronchitis51acute bronchitisDOID:6132acute Bronchitisexact-synonym
3574533Migraine Headache51migraine headacheNaNNaNNaN
3662916Atrial Fibrillation50atrial fibrillationDOID:0060224atrial fibrillationname
3776536Generalized Anxiety Disorder50generalized anxiety disorderDOID:14320generalized anxiety disordername
38275486ADHD, Combined Type50adhd, combined typeNaNNaNNaN
3962578Acute Sinusitis50acute sinusitisNaNNaNNaN
4065762Urinary Tract Infection48urinary tract infectionDOID:13148urinary tract infectionexact-synonym
4161108Insomnia46insomniaNaNNaNNaN
42166777Constipation46constipationDOID:2089constipationname
4376306Episodic Mood Disorders45episodic mood disordersNaNNaNNaN
4464205Chronic Obstructive Pulmonary Disease45chronic obstructive pulmonary diseaseDOID:3083chronic obstructive pulmonary diseasename
4564205Chronic Obstructive Pulmonary Disease45chronic obstructive pulmonary diseaseDOID:3083CHRONIC OBSTRUCTIVE pulmonary DISEASEexact-synonym
46251528Acute Otitis Media44acute otitis mediaNaNNaNNaN
47297231Epilepsy44epilepsyDOID:1826epilepsyexact-synonym
4876307Bipolar Disorder44bipolar disorderDOID:3312bipolar disordername
49288921Chronic Pain43chronic painNaNNaNNaN
\n", "
" ], "text/plain": [ " problem_definition_id problem \\\n", "0 63467 Hypertension \n", "1 63468 Essential Hypertension \n", "2 63470 Benign Essential Hypertension \n", "3 63470 Benign Essential Hypertension \n", "4 69380 Diabetes Mellitus \n", "5 69402 Diabetes Mellitus Poorly Controlled \n", "6 69381 Type II Diabetes Mellitus \n", "7 76351 Depression \n", "8 62598 Allergic Rhinitis \n", "9 74988 Seizure Disorder \n", "10 62975 Coronary Artery Disease \n", "11 70455 Rheumatoid Arthritis \n", "12 77891 Acute Upper Respiratory Infection \n", "13 76388 Bipolar Disorder NOS \n", "14 60869 Cough \n", "15 64181 Asthma \n", "16 75002 Partial Complex Psychomotor Seizure With Intra... \n", "17 75125 Multiple Sclerosis \n", "18 67970 Hyperlipidemia \n", "19 76357 Major Depression, Recurrent \n", "20 75080 Parkinson's Disease \n", "21 70475 Scleroderma \n", "22 70475 Scleroderma \n", "23 63217 Congestive Heart Failure \n", "24 64688 Esophageal Reflux \n", "25 91089 Lower Back Pain \n", "26 76272 Schizoaffective Disorder \n", "27 60899 Abdominal Pain \n", "28 76835 HIV Infection \n", "29 61118 Anxiety (Symptom) \n", "30 62588 Chronic Ethmoidal Sinusitis \n", "31 62588 Chronic Ethmoidal Sinusitis \n", "32 69258 Hypothyroidism \n", "33 278350 Eczema \n", "34 64118 Acute Bronchitis \n", "35 74533 Migraine Headache \n", "36 62916 Atrial Fibrillation \n", "37 76536 Generalized Anxiety Disorder \n", "38 275486 ADHD, Combined Type \n", "39 62578 Acute Sinusitis \n", "40 65762 Urinary Tract Infection \n", "41 61108 Insomnia \n", "42 166777 Constipation \n", "43 76306 Episodic Mood Disorders \n", "44 64205 Chronic Obstructive Pulmonary Disease \n", "45 64205 Chronic Obstructive Pulmonary Disease \n", "46 251528 Acute Otitis Media \n", "47 297231 Epilepsy \n", "48 76307 Bipolar Disorder \n", "49 288921 Chronic Pain \n", "\n", " indications name_lower \\\n", "0 242 hypertension \n", "1 204 essential hypertension \n", "2 190 benign essential hypertension \n", "3 190 benign essential hypertension \n", "4 151 diabetes mellitus \n", "5 146 diabetes mellitus poorly controlled \n", "6 112 type ii diabetes mellitus \n", "7 105 depression \n", "8 103 allergic rhinitis \n", "9 98 seizure disorder \n", "10 88 coronary artery disease \n", "11 86 rheumatoid arthritis \n", "12 77 acute upper respiratory infection \n", "13 77 bipolar disorder nos \n", "14 76 cough \n", "15 74 asthma \n", "16 72 partial complex psychomotor seizure with intra... \n", "17 70 multiple sclerosis \n", "18 69 hyperlipidemia \n", "19 67 major depression, recurrent \n", "20 64 parkinson's disease \n", "21 63 scleroderma \n", "22 63 scleroderma \n", "23 61 congestive heart failure \n", "24 61 esophageal reflux \n", "25 60 lower back pain \n", "26 57 schizoaffective disorder \n", "27 54 abdominal pain \n", "28 54 hiv infection \n", "29 53 anxiety (symptom) \n", "30 52 chronic ethmoidal sinusitis \n", "31 52 chronic ethmoidal sinusitis \n", "32 52 hypothyroidism \n", "33 51 eczema \n", "34 51 acute bronchitis \n", "35 51 migraine headache \n", "36 50 atrial fibrillation \n", "37 50 generalized anxiety disorder \n", "38 50 adhd, combined type \n", "39 50 acute sinusitis \n", "40 48 urinary tract infection \n", "41 46 insomnia \n", "42 46 constipation \n", "43 45 episodic mood disorders \n", "44 45 chronic obstructive pulmonary disease \n", "45 45 chronic obstructive pulmonary disease \n", "46 44 acute otitis media \n", "47 44 epilepsy \n", "48 44 bipolar disorder \n", "49 43 chronic pain \n", "\n", " doid name type \n", "0 DOID:10763 hypertension name \n", "1 DOID:10825 essential hypertension name \n", "2 DOID:10913 benign essential hypertension name \n", "3 DOID:10913 benign Essential hypertension exact-synonym \n", "4 DOID:9351 diabetes mellitus name \n", "5 NaN NaN NaN \n", "6 DOID:9352 type II diabetes mellitus exact-synonym \n", "7 NaN NaN NaN \n", "8 DOID:4481 allergic rhinitis name \n", "9 NaN NaN NaN \n", "10 DOID:3393 coronary artery disease name \n", "11 DOID:7148 rheumatoid arthritis name \n", "12 NaN NaN NaN \n", "13 NaN NaN NaN \n", "14 NaN NaN NaN \n", "15 DOID:2841 asthma name \n", "16 NaN NaN NaN \n", "17 DOID:2377 multiple sclerosis name \n", "18 NaN NaN NaN \n", "19 NaN NaN NaN \n", "20 DOID:14330 Parkinson's disease name \n", "21 DOID:419 scleroderma name \n", "22 DOID:418 Scleroderma exact-synonym \n", "23 DOID:6000 congestive heart failure name \n", "24 NaN NaN NaN \n", "25 NaN NaN NaN \n", "26 DOID:5418 schizoaffective disorder name \n", "27 NaN NaN NaN \n", "28 DOID:526 HIV infection exact-synonym \n", "29 NaN NaN NaN \n", "30 DOID:9312 chronic ethmoidal sinusitis exact-synonym \n", "31 DOID:9312 chronic ethmoidal sinusitis exact-synonym \n", "32 DOID:1459 hypothyroidism name \n", "33 DOID:2723 eczema exact-synonym \n", "34 DOID:6132 acute Bronchitis exact-synonym \n", "35 NaN NaN NaN \n", "36 DOID:0060224 atrial fibrillation name \n", "37 DOID:14320 generalized anxiety disorder name \n", "38 NaN NaN NaN \n", "39 NaN NaN NaN \n", "40 DOID:13148 urinary tract infection exact-synonym \n", "41 NaN NaN NaN \n", "42 DOID:2089 constipation name \n", "43 NaN NaN NaN \n", "44 DOID:3083 chronic obstructive pulmonary disease name \n", "45 DOID:3083 CHRONIC OBSTRUCTIVE pulmonary DISEASE exact-synonym \n", "46 NaN NaN NaN \n", "47 DOID:1826 epilepsy exact-synonym \n", "48 DOID:3312 bipolar disorder name \n", "49 NaN NaN NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "domap_df[:50]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mapped_problems = set(pair_df.problem_definition_id)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.2286967418546366" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(mapped_problems) / len(problem_df)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "365" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(mapped_problems)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
problem_definition_idproblemindicationsname_lowerdoidnametype
2170475Scleroderma63sclerodermaDOID:419sclerodermaname
2270475Scleroderma63sclerodermaDOID:418Sclerodermaexact-synonym
932167200Premature Menopause2premature menopauseDOID:10787premature menopausename
933167200Premature Menopause2premature menopauseDOID:10787Premature menopauseexact-synonym
934167200Premature Menopause2premature menopauseDOID:5426premature menopauseexact-synonym
\n", "
" ], "text/plain": [ " problem_definition_id problem indications \\\n", "21 70475 Scleroderma 63 \n", "22 70475 Scleroderma 63 \n", "932 167200 Premature Menopause 2 \n", "933 167200 Premature Menopause 2 \n", "934 167200 Premature Menopause 2 \n", "\n", " name_lower doid name type \n", "21 scleroderma DOID:419 scleroderma name \n", "22 scleroderma DOID:418 Scleroderma exact-synonym \n", "932 premature menopause DOID:10787 premature menopause name \n", "933 premature menopause DOID:10787 Premature menopause exact-synonym \n", "934 premature menopause DOID:5426 premature menopause exact-synonym " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Find problems that mapped to multiple DO terms\n", "duplicates = set(pair_df[pair_df.duplicated('problem_definition_id')].problem_definition_id)\n", "domap_df[domap_df.problem_definition_id.isin(duplicates)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "137" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url = 'http://git.dhimmel.com/disease-ontology/data/slim-terms.tsv'\n", "doslim_df = pandas.read_table(url)\n", "len(doslim_df)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "50" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(doslim_df.doid.isin(set(pair_df.doid)))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "55" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# with propagation\n", "url = 'http://git.dhimmel.com/disease-ontology/data/slim-terms-prop.tsv'\n", "doslim_df = pandas.read_table(url)\n", "len(set(doslim_df.slim_id[doslim_df.subsumed_id.isin(set(pair_df.doid))]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.0" } }, "nbformat": 4, "nbformat_minor": 0 }