{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import collections\n", "import requests\n", "import re\n", "import os\n", "import csv" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# url of a search through MeSH which has empty terms\n", "MeSH_url = 'http://www.nlm.nih.gov/cgi/mesh/2015/MB_cgi?mode=&term='" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Function (MeSH_match) to take a single disease MeSH name, search the 2014\n", "# MeSH database, and extract the unique MeSH ID associated with that disease.\n", "# Returns a two membered list including the disease name, followed by the unique MeSH ID\n", "def MeSH_match(disease):\n", " disease_edit = disease.replace(' ', '+')\n", " url = MeSH_url + disease_edit\n", " returned = requests.get(url)\n", " returned = returned.text\n", " sub_returned = re.search('Unique ID(.+?)<', returned)\n", " sub_returned = sub_returned.group(1)\n", " returned_list = [disease, sub_returned]\n", " return(returned_list)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/leobrueggeman/GitHub/incomplete-interactome\n" ] } ], "source": [ "# opens a file that contains a list of diseases, strips the newlines, and creates a list (your_list) \n", "# variable containing these diseases\n", "print(os.getcwd())\n", "f = open('disease_input.txt', 'r')\n", "your_list = [x.rstrip() for x in f]\n", "f.close()\n", "\n", "# runs the MeSH_match function on your list of diseases (your_list) and returns\n", "# the output to a new list (output1)\n", "output1 = list(map(MeSH_match, your_list))\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# creates a csv file which contains MeSH disease names matched with their unique MeSH IDs\n", "with open('disease_output.tsv', 'w') as f:\n", " writer = csv.writer(f, delimiter = '\\t')\n", " writer.writerow(['name', 'mesh_id'])\n", " writer.writerows(output1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }