{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-10-03T10:41:50.883723Z", "start_time": "2019-10-03T10:41:50.202209Z" }, "hide_input": false, "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

Loading results of previous run on Mon Oct 7 20:16:48 2019 from ./persistent_storage/20191007.pkl" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 12954 entries, 0 to 12953\n", "Data columns (total 9 columns):\n", "WkD_disease 12954 non-null object\n", "WkD_diseaseLabel 12954 non-null object\n", "WP_en_article 4715 non-null object\n", "class 4525 non-null object\n", "importance 4515 non-null object\n", "taskForces 2600 non-null object\n", "cochrane_reviews_html 4715 non-null object\n", "cochrane_reviews 4715 non-null object\n", "talkPage_categories 4668 non-null object\n", "dtypes: object(9)\n", "memory usage: 910.9+ KB\n" ] } ], "source": [ "#------------------------------------------------------------------------------------------------------\n", "# imports, logger initiation, loading persistant data\n", "#------------------------------------------------------------------------------------------------------\n", "from IPython.core.display import display, HTML\n", "from IPython.display import clear_output, Markdown\n", "from IPython.display import FileLink, FileLinks\n", "\n", "import logging\n", "import dill\n", "# import qgrid\n", "# from wikidata.client import Client\n", "\n", "import pandas as pd\n", "from pandas.compat import StringIO\n", "\n", "\n", "import numpy as np\n", "import json\n", "from SPARQLWrapper import SPARQLWrapper, JSON\n", "import ipywidgets as widgets\n", "\n", "from urllib.parse import unquote\n", "from urllib.parse import quote\n", "\n", "import wikipedia\n", "from tqdm import tnrange, tqdm_notebook\n", "from tqdm import tqdm\n", "\n", "import ipywidgets as widgetsM\n", "from ipywidgets import HBox, VBox\n", "from ipywidgets import Button, Layout\n", "\n", "from operator import itemgetter\n", "from collections import OrderedDict\n", "\n", "import time\n", "from datetime import datetime\n", "import html2text\n", "import re\n", "import sys\n", "import os\n", "import io\n", "\n", "from Bio import Entrez\n", "from urllib.error import HTTPError\n", "\n", "# from fuzzywuzzy import fuzz\n", "# from fuzzywuzzy import process\n", "\n", "from itertools import chain\n", "from collections import Counter\n", "\n", "\n", "display(HTML(\"\"))\n", "\n", "logger = logging.getLogger(__name__)\n", "logger.setLevel(logging.DEBUG)\n", "\n", "# create file handler which logs even debug messages\n", "fh1 = logging.FileHandler('./logs/log.html',mode='w')\n", "fh1.setLevel(logging.DEBUG)\n", "\n", "# create console handler with a higher log level\n", "console_handler = logging.StreamHandler()\n", "console_handler.setLevel(logging.ERROR) #------------------change your log setting here DEBUG INFO WARNING ERROR CRITICAL\n", "\n", "formatter = logging.Formatter('

%(asctime)s - %(name)s - %(levelname)s - %(message)s

' , datefmt='%d-%b-%y %H:%M:%S')\n", "fh1.setFormatter(formatter)\n", "console_handler.setFormatter(formatter)\n", "\n", "# add the handlers to the logger\n", "logger.addHandler(fh1)\n", "logger.addHandler(console_handler)\n", "\n", "listOfFiles = os.listdir('./persistent_storage/')\n", "pkl_file = (sorted(listOfFiles, reverse=True)[0])\n", "\n", "display(HTML(f\"

Loading results of previous run on \\\n", " {time.ctime(os.path.getmtime(f'./persistent_storage/{pkl_file}'))} from ./persistent_storage/{pkl_file}\"))\n", "logger.info(f\"Loading df from ./persistent_storage/{pkl_file}\")\n", "#with open('./persistent_storage/df.dill', 'rb') as in_strm:\n", "# df = dill.load(in_strm)\n", "#dill.load_session('./persistent_storage/dill_session') \n", "df=pd.read_pickle(f\"./persistent_storage/{pkl_file}\")\n", "df.info()\n", "\n", "# def on_button_load_clicked(b):\n", "# display(HTML(f\"

Loading df from ./persistent_storage/...\"))\n", "# logger.info(f\"Loading df from ./persistent_storage...\")\n", "# #with open('./persistent_storage/df.dill', 'rb') as in_strm:\n", "# # df = dill.load(in_strm)\n", "# #dill.load_session('./persistent_storage/dill_session') \n", "# df=pd.read_pickle(\"./persistent_storage/df.pkl\")\n", "# df.info()\n", "# text_area.value=df.info()\n", "\n", " \n", "# button_load = widgets.Button(description=\"Load data from persistent storage\",layout=Layout(width='20%', height='80px'))\n", "# button_load.style.button_color = 'lightgreen'\n", "# button_load.on_click(on_button_load_clicked)\n", "# display(button_load)\n", "\n", "# text_area= widgets.Textarea(value=df.info(),placeholder='',description='Loaded data summary:',disabled=False)\n", "# display(text_area)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-10-03T10:42:00.073241Z", "start_time": "2019-10-03T10:41:50.899299Z" }, "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "text/markdown": [ "# Statistical Summaries:" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### Total number of diseases identified in Wikidata: 12954" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### Total number of diseases identified in Wikidata which have a corresponding article in Wikipedia: 4715" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "-----------------------------------------------------" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### Wikipedia articles class:" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Article quality class Total count
Start1829
Stub1035
C991
B541
nan190
GA79
FA23
List16
Disambig11
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### Wikipedia articles importance:" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Article quality category Total count
3-Mid2151
4-Low2007
2-High292
nan200
1-Top65
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### Wikipedia articles taskForces:" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
taskForce Total count
NA2115
Ophthalmology237
Dermatology951
Toxicology11
Cardiology186
Neurology398
Pathology247
Medical genetics462
Pulmonology65
Reproductive medicine72
Hematology-oncology251
Psychiatry74
Gastroenterology52
Nephrology68
Livestock2
Radiology4
Applied Linguistics1
Emergency medicine and EMS6
Ethics1
Sustainability1
Military logistics and medicine1
World War I1
Balkan military history1
European military history1
German military history1
World War II1
Theoretical Linguistics1
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "-----------------------------------------------------" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### Number of disease related Wikipedia articles which cite one or more Cochrane reviews: 1219" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### Number of disease related Wikipedia articles which do not cite any Cochrane reviews: 3496" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "-----------------------------------------------------" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### PMIDs statistical summary 1 (LATEST VERSIONS of Cochrane reviews found and/or cited):" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PMIDs of Cochrane reviews (old versions only) Title Times appeared in search results Times cited in Wikipedia Version (1=latest, 0=old) Times cited in a Top Importance WP articles Times cited in a High Importance WP articles Times cited in a Mid Importance WP articles Times cited in a Low Importance WP articles Times cited in an Unknown Importance WP articles Times cited in a FA quality WP articles Times cited in a A quality WP articles Times cited in a GA quality WP articles Times cited in a B quality WP articles Times cited in a C quality WP articles Times cited in a Start quality WP articles Times cited in a Stub quality WP articles Times cited in a Other quality WP articles Times cited in an Unknown quality WP articles
index
TotalN/AN/A15382114368944253912863011860288521169625111
154518254088Intravitreal steroids for macular edema in diabetes.42100200000101000
571827245310Speech and language therapy for aphasia following stroke.22110100000200000
463326241698Post-pyloric versus gastric tube feeding for preventing pneumonia and improving nutritional outcomes in critically ill adults.22110100001001000
501529664187Exercise interventions and patient beliefs for people with hip, knee or hip and knee osteoarthritis: a mixed methods review.22120000000200000
502227103611Topical NSAIDs for chronic musculoskeletal pain in adults.22120000000200000
471326824399Surgical versus non-surgical treatment for lumbar spinal stenosis.32100200000020000
114816856036Self-help and guided self-help for eating disorders.32100101000010001
266120927726Grommets (ventilation tubes) for hearing loss associated with otitis media with effusion in children.22101100000110000
625424170669Blood pressure targets for hypertension in people with diabetes mellitus.22120000001100000
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

See the complete table here (sort, filter, and search):

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "./results/PMIDs_latestVersions_only.html" ], "text/plain": [ "/home/arash/Desktop/WP2Cochrane/results/PMIDs_latestVersions_only.html" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "-----------------------------------------------------" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "### PMIDs statistical summary 2 (OLD VERSIONS of Cochrane reviews found and/or cited):" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PMIDs of Cochrane reviews (old versions only) Title Times appeared in search results Times cited in Wikipedia Version (1=latest, 0=old) Times cited in a Top Importance WP articles Times cited in a High Importance WP articles Times cited in a Mid Importance WP articles Times cited in a Low Importance WP articles Times cited in an Unknown Importance WP articles Times cited in a FA quality WP articles Times cited in a A quality WP articles Times cited in a GA quality WP articles Times cited in a B quality WP articles Times cited in a C quality WP articles Times cited in a Start quality WP articles Times cited in a Stub quality WP articles Times cited in a Other quality WP articles Times cited in an Unknown quality WP articles
index
TotalN/AN/A5103342770410154120611111201
11024142399Pharmacological treatment for pain in Guillain-Barré syndrome.141000100001000000
48025102015Antioxidant supplementation for lung disease in cystic fibrosis.11001000000100000
194326174592Assisted reproductive technology: an overview of Cochrane Reviews.11000010000010000
124227089005Non-absorbable disaccharides versus placebo/no intervention and lactulose versus lactitol for the prevention and treatment of hepatic encephalopathy in people with cirrhosis.51000100001000000
255422161393Progestogen for treating threatened miscarriage.11001000000100000
50227552284Inhaled corticosteroids for cystic fibrosis.11001000000100000
122927884041Follow-up strategies for patients treated for non-metastatic colorectal cancer.21010000000100000
19926932750Anti-vascular endothelial growth factor (VEGF) drugs for treatment of retinopathy of prematurity.51001000000010000
53025093421Pneumococcal vaccines for cystic fibrosis.11001000000100000
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

See the complete table here (sort, filter, and search):

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "./results/PMIDs_oldVersions_only.html" ], "text/plain": [ "/home/arash/Desktop/WP2Cochrane/results/PMIDs_oldVersions_only.html" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "-----------------------------------------------------" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#------------------------------------------------------------------------------------------------------\n", "# Statistical summary generator \n", "#------------------------------------------------------------------------------------------------------\n", "datatables_js_script=\"\"\"\n", "\n", "\n", "\n", "\n", "\"\"\"\n", "\n", "def make_clickable_PMID(val):\n", " # target _blank to open new window\n", " if not val: \n", " return None\n", " else:\n", " return f'{val}'\n", "\n", "def imp2int(WP_imp,targetImportance):\n", " val=0\n", " if not WP_imp:\n", " if targetImportance==\"Unknown\":\n", " val=1\n", " else:\n", " if targetImportance==WP_imp:\n", " val=1\n", " return val\n", "\n", "def class2int(WP_class,targetClass):\n", " val=0\n", " if not WP_class:\n", " if targetClass==\"Unknown\":\n", " val=1\n", " else:\n", " if targetClass==WP_class or (targetClass==\"Other\" and WP_class in ['List','Disambig','FL']):\n", " val=1\n", " return val \n", "\n", "display(Markdown(\"# Statistical Summaries:\"))\n", "display(Markdown(f\"### Total number of diseases identified in Wikidata: {df.shape[0]}\"))\n", "display(Markdown(f\"### Total number of diseases identified in Wikidata which have a corresponding article in Wikipedia:\\\n", " {df[df['WP_en_article'].notnull()].shape[0]}\"))\n", "display(Markdown(\"-----------------------------------------------------\")) \n", "\n", "display(Markdown(\"### Wikipedia articles class:\"))\n", "display(df.loc[df['WP_en_article'].notnull()]['class'].value_counts(dropna=False).to_frame().reset_index().rename_axis('index').\\\n", " rename(columns = {'class': 'Total count','index': 'Article quality class'}).\\\n", " sort_values(by = 'Total count', ascending = False).style.hide_index()) \n", "\n", "display(Markdown(\"### Wikipedia articles importance:\"))\n", "display(df.loc[df['WP_en_article'].notnull()]['importance'].value_counts(dropna=False).to_frame().reset_index().rename_axis('index').\\\n", " rename(columns = {'importance': 'Total count','index': 'Article quality category'}).\\\n", " sort_values(by = 'Total count', ascending = False).style.hide_index())\n", "\n", "\n", "display(Markdown('### Wikipedia articles taskForces:'))\n", "#display(pd.Series(Counter(chain.from_iterable( df.loc[df['taskForces'].notnull()]['taskForces'] ) )))\n", "taskForces_count = {'NA':0}\n", "for taskForces in df.loc[df['WP_en_article'].notnull()]['taskForces']:\n", " if taskForces:\n", " for taskForce in taskForces:\n", " if taskForce in taskForces_count:\n", " taskForces_count[taskForce] += 1\n", " else :\n", " taskForces_count[taskForce] = 1\n", " else:\n", " taskForces_count['NA'] += 1\n", "display(pd.DataFrame(taskForces_count.items(), columns = ['taskForce', 'Total count']).style.hide_index())\n", "display(Markdown(\"-----------------------------------------------------\")) \n", "\n", " \n", "display(Markdown(f\"### Number of disease related Wikipedia articles which cite one or more Cochrane reviews: \\\n", " {df[df['cochrane_reviews'].notnull()].shape[0] - df[(df['cochrane_reviews']=={})].shape[0] }\"))\n", "display(Markdown(f\"### Number of disease related Wikipedia articles which do not cite any Cochrane reviews: \\\n", " { df[(df['cochrane_reviews']=={})].shape[0] }\"))\n", "display(Markdown(\"-----------------------------------------------------\")) \n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# PIMD stats - PMIDs of Cochrane reviews (LATEST versions only)\n", "#------------------------------------------------------------------------------------------------------ \n", "uniquePMIDs_count={}\n", "for index, row in df.iterrows(): \n", " WP_class = row['class']\n", " WP_imp = row['importance']\n", " PIMD_paper_dict=row['cochrane_reviews']\n", " if PIMD_paper_dict:\n", " for PMID, paper in PIMD_paper_dict.items():\n", " title = paper[0]\n", " cited = paper[1]\n", " latestVersion = paper[2]\n", " if latestVersion:\n", " if PMID in uniquePMIDs_count:\n", " uniquePMIDs_count[PMID]=[title, uniquePMIDs_count[PMID][1]+1, uniquePMIDs_count[PMID][2]+int(cited),int(latestVersion)\\\n", " , uniquePMIDs_count[PMID][4]+(imp2int(WP_imp,'1-Top')*int(cited))\n", " , uniquePMIDs_count[PMID][5]+(imp2int(WP_imp,'2-High')*int(cited))\n", " , uniquePMIDs_count[PMID][6]+(imp2int(WP_imp,'3-Mid')*int(cited))\n", " , uniquePMIDs_count[PMID][7]+(imp2int(WP_imp,'4-Low')*int(cited))\n", " , uniquePMIDs_count[PMID][8]+(imp2int(WP_imp,'Unknown')*int(cited)) \n", " , uniquePMIDs_count[PMID][9]+(class2int(WP_class,'FA')*int(cited))\n", " , uniquePMIDs_count[PMID][10]+(class2int(WP_class,'A')*int(cited))\n", " , uniquePMIDs_count[PMID][11]+(class2int(WP_class,'GA')*int(cited))\n", " , uniquePMIDs_count[PMID][12]+(class2int(WP_class,'B')*int(cited))\n", " , uniquePMIDs_count[PMID][13]+(class2int(WP_class,'C')*int(cited))\n", " , uniquePMIDs_count[PMID][14]+(class2int(WP_class,'Start')*int(cited))\n", " , uniquePMIDs_count[PMID][15]+(class2int(WP_class,'Stub')*int(cited))\n", " , uniquePMIDs_count[PMID][16]+(class2int(WP_class,'Other')*int(cited)) \n", " , uniquePMIDs_count[PMID][17]+(class2int(WP_class,'Unknown')*int(cited))]\n", " else:\n", " uniquePMIDs_count[PMID]=[title, 1, int(cited),int(latestVersion)\n", " , imp2int(WP_imp,'1-Top')*int(cited)\n", " , imp2int(WP_imp,'2-High')*int(cited)\n", " , imp2int(WP_imp,'3-Mid')*int(cited)\n", " , imp2int(WP_imp,'4-Low')*int(cited)\n", " , imp2int(WP_imp,'Unknown')*int(cited)\n", " , class2int(WP_class,'FA')*int(cited)\n", " , class2int(WP_class,'A')*int(cited)\n", " , class2int(WP_class,'GA')*int(cited)\n", " , class2int(WP_class,'B')*int(cited)\n", " , class2int(WP_class,'C')*int(cited)\n", " , class2int(WP_class,'Start')*int(cited)\n", " , class2int(WP_class,'Stub')*int(cited)\n", " , class2int(WP_class,'Other')*int(cited)\n", " , class2int(WP_class,'Unknown')*int(cited)]\n", "\n", "display(Markdown(f'### PMIDs statistical summary 1 (LATEST VERSIONS of Cochrane reviews found and/or cited):'))\n", "PMIDs_df= pd.DataFrame.from_dict(uniquePMIDs_count,orient='index').reset_index().rename_axis('index')\\\n", " .rename(columns = {'index': 'PMIDs of Cochrane reviews (old versions only)'\n", " ,0: 'Title'\n", " ,1: 'Times appeared in search results'\n", " ,2: 'Times cited in Wikipedia'\n", " ,3: 'Version (1=latest, 0=old)'\n", " ,4: 'Times cited in a Top Importance WP articles'\n", " ,5: 'Times cited in a High Importance WP articles'\n", " ,6: 'Times cited in a Mid Importance WP articles'\n", " ,7: 'Times cited in a Low Importance WP articles'\n", " ,8: 'Times cited in an Unknown Importance WP articles'\n", " ,9: 'Times cited in a FA quality WP articles'\n", " ,10: 'Times cited in a A quality WP articles'\n", " ,11: 'Times cited in a GA quality WP articles'\n", " ,12: 'Times cited in a B quality WP articles'\n", " ,13: 'Times cited in a C quality WP articles'\n", " ,14: 'Times cited in a Start quality WP articles'\n", " ,15: 'Times cited in a Stub quality WP articles'\n", " ,16: 'Times cited in a Other quality WP articles'\n", " ,17: 'Times cited in an Unknown quality WP articles'\n", " }).sort_values(by = 'Times cited in Wikipedia', ascending = False)\n", "PMIDs_df.loc['Total'] = ['N/A','N/A',PMIDs_df['Times appeared in search results'].sum()\\\n", " ,PMIDs_df['Times cited in Wikipedia'].sum()\n", " #,PMIDs_df['Latest version (1=Yes, 0=No)'].sum()\n", " ,PMIDs_df['Version (1=latest, 0=old)'].count()\n", " ,PMIDs_df['Times cited in a Top Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a High Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Mid Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Low Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in an Unknown Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a FA quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a A quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a GA quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a B quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a C quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Start quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Stub quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Other quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in an Unknown quality WP articles'].sum()\n", " ]\n", "display(PMIDs_df.sort_values(by = 'Times cited in Wikipedia', ascending = False).head(10)\n", " .style.set_table_attributes('class=\"results\"')\\\n", " .format({'PMIDs of Cochrane reviews (old versions only)': make_clickable_PMID}))\n", "display(HTML('

'))\n", "f=open(\"./results/PMIDs_latestVersions_only.html\",\"w\")\n", "f.write(datatables_js_script + PMIDs_df.sort_values(by = 'Times cited in Wikipedia', ascending = False)\n", " .style.set_table_attributes('class=\"results\"')\\\n", " .format({'PMIDs of Cochrane reviews (latest versions only)': make_clickable_PMID}).render())\n", "f.close()\n", "display(HTML('

See the complete table here (sort, filter, and search):

')\n", " ,FileLink('./results/PMIDs_latestVersions_only.html', result_html_prefix='', result_html_suffix='')) \n", "display(Markdown(\"-----------------------------------------------------\")) \n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# PIMD stats - PMIDs of Cochrane reviews (old versions only)\n", "#------------------------------------------------------------------------------------------------------ \n", "uniquePMIDs_count={}\n", "for index, row in df.iterrows(): \n", " WP_class = row['class']\n", " WP_imp = row['importance']\n", " PIMD_paper_dict=row['cochrane_reviews']\n", " if PIMD_paper_dict:\n", " for PMID, paper in PIMD_paper_dict.items():\n", " title = paper[0]\n", " cited = paper[1]\n", " latestVersion = paper[2]\n", " if not latestVersion:\n", " if PMID in uniquePMIDs_count:\n", " uniquePMIDs_count[PMID]=[title, uniquePMIDs_count[PMID][1]+1, uniquePMIDs_count[PMID][2]+int(cited),int(latestVersion)\\\n", " , uniquePMIDs_count[PMID][4]+(imp2int(WP_imp,'1-Top')*int(cited))\n", " , uniquePMIDs_count[PMID][5]+(imp2int(WP_imp,'2-High')*int(cited))\n", " , uniquePMIDs_count[PMID][6]+(imp2int(WP_imp,'3-Mid')*int(cited))\n", " , uniquePMIDs_count[PMID][7]+(imp2int(WP_imp,'4-Low')*int(cited))\n", " , uniquePMIDs_count[PMID][8]+(imp2int(WP_imp,'Unknown')*int(cited)) \n", " , uniquePMIDs_count[PMID][9]+(class2int(WP_class,'FA')*int(cited))\n", " , uniquePMIDs_count[PMID][10]+(class2int(WP_class,'A')*int(cited))\n", " , uniquePMIDs_count[PMID][11]+(class2int(WP_class,'GA')*int(cited))\n", " , uniquePMIDs_count[PMID][12]+(class2int(WP_class,'B')*int(cited))\n", " , uniquePMIDs_count[PMID][13]+(class2int(WP_class,'C')*int(cited))\n", " , uniquePMIDs_count[PMID][14]+(class2int(WP_class,'Start')*int(cited))\n", " , uniquePMIDs_count[PMID][15]+(class2int(WP_class,'Stub')*int(cited))\n", " , uniquePMIDs_count[PMID][16]+(class2int(WP_class,'Other')*int(cited)) \n", " , uniquePMIDs_count[PMID][17]+(class2int(WP_class,'Unknown')*int(cited))]\n", " else:\n", " uniquePMIDs_count[PMID]=[title, 1, int(cited),int(latestVersion)\n", " , imp2int(WP_imp,'1-Top')*int(cited)\n", " , imp2int(WP_imp,'2-High')*int(cited)\n", " , imp2int(WP_imp,'3-Mid')*int(cited)\n", " , imp2int(WP_imp,'4-Low')*int(cited)\n", " , imp2int(WP_imp,'Unknown')*int(cited)\n", " , class2int(WP_class,'FA')*int(cited)\n", " , class2int(WP_class,'A')*int(cited)\n", " , class2int(WP_class,'GA')*int(cited)\n", " , class2int(WP_class,'B')*int(cited)\n", " , class2int(WP_class,'C')*int(cited)\n", " , class2int(WP_class,'Start')*int(cited)\n", " , class2int(WP_class,'Stub')*int(cited)\n", " , class2int(WP_class,'Other')*int(cited)\n", " , class2int(WP_class,'Unknown')*int(cited)]\n", "\n", "display(Markdown(f'### PMIDs statistical summary 2 (OLD VERSIONS of Cochrane reviews found and/or cited):'))\n", "PMIDs_df= pd.DataFrame.from_dict(uniquePMIDs_count,orient='index').reset_index().rename_axis('index')\\\n", " .rename(columns = {'index': 'PMIDs of Cochrane reviews (old versions only)'\n", " ,0: 'Title'\n", " ,1: 'Times appeared in search results'\n", " ,2: 'Times cited in Wikipedia'\n", " ,3: 'Version (1=latest, 0=old)'\n", " ,4: 'Times cited in a Top Importance WP articles'\n", " ,5: 'Times cited in a High Importance WP articles'\n", " ,6: 'Times cited in a Mid Importance WP articles'\n", " ,7: 'Times cited in a Low Importance WP articles'\n", " ,8: 'Times cited in an Unknown Importance WP articles'\n", " ,9: 'Times cited in a FA quality WP articles'\n", " ,10: 'Times cited in a A quality WP articles'\n", " ,11: 'Times cited in a GA quality WP articles'\n", " ,12: 'Times cited in a B quality WP articles'\n", " ,13: 'Times cited in a C quality WP articles'\n", " ,14: 'Times cited in a Start quality WP articles'\n", " ,15: 'Times cited in a Stub quality WP articles'\n", " ,16: 'Times cited in a Other quality WP articles'\n", " ,17: 'Times cited in an Unknown quality WP articles'\n", " }).sort_values(by = 'Times cited in Wikipedia', ascending = False)\n", "PMIDs_df.loc['Total'] = ['N/A','N/A',PMIDs_df['Times appeared in search results'].sum()\\\n", " ,PMIDs_df['Times cited in Wikipedia'].sum()\n", " #,PMIDs_df['Latest version (1=Yes, 0=No)'].sum()\n", " ,PMIDs_df['Version (1=latest, 0=old)'].count()\n", " ,PMIDs_df['Times cited in a Top Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a High Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Mid Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Low Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in an Unknown Importance WP articles'].sum()\n", " ,PMIDs_df['Times cited in a FA quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a A quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a GA quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a B quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a C quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Start quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Stub quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in a Other quality WP articles'].sum()\n", " ,PMIDs_df['Times cited in an Unknown quality WP articles'].sum()\n", " ]\n", "display(PMIDs_df.sort_values(by = 'Times cited in Wikipedia', ascending = False).head(10)\n", " .style.set_table_attributes('class=\"results\"')\\\n", " .format({'PMIDs of Cochrane reviews (old versions only)': make_clickable_PMID}))\n", "display(HTML('

'))\n", "f=open(\"./results/PMIDs_oldVersions_only.html\",\"w\")\n", "f.write(datatables_js_script + PMIDs_df.sort_values(by = 'Times cited in Wikipedia', ascending = False)\n", " .style.set_table_attributes('class=\"results\"')\\\n", " .format({'PMIDs of Cochrane reviews (old versions only)': make_clickable_PMID}).render())\n", "f.close()\n", "display(HTML('

See the complete table here (sort, filter, and search):

')\n", " ,FileLink('./results/PMIDs_oldVersions_only.html', result_html_prefix='', result_html_suffix='')) \n", "display(Markdown(\"-----------------------------------------------------\"))\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-10-03T10:49:07.699796Z", "start_time": "2019-10-03T10:42:00.079550Z" }, "jupyter": { "source_hidden": true } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "12954it [07:10, 30.06it/s]\n" ] }, { "data": { "text/html": [ "

Complete Tabular Results in CSV or HTML Format:

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
  • ./results/
  • \n", "
  •   full_data.html
  • \n", "
  •   full_data.csv
  • \n", "
  •   full_data_SPSS-friendy.csv
  • \n", "
  •   PMIDs_oldVersions_only.html
  • \n", "
  •   PMIDs_latestVersions_only.html
  • " ], "text/plain": [ "./results/\n", " full_data.html\n", " full_data.csv\n", " full_data_SPSS-friendy.csv\n", " PMIDs_oldVersions_only.html\n", " PMIDs_latestVersions_only.html" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

    Specialized HTML results for each WikiProject medicine task force (see list of active task forces here):

    " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
  • ./results//HTML_results_per_task_force/
  • \n", "
  •   Pathology_taskForce.html
  • \n", "
  •   Nephrology_taskForce.html
  • \n", "
  •   Balkan military history_taskForce.html
  • \n", "
  •   European military history_taskForce.html
  • \n", "
  •   Applied Linguistics_taskForce.html
  • \n", "
  •   Radiology_taskForce.html
  • \n", "
  •   Reproductive medicine_taskForce.html
  • \n", "
  •   Hematology-oncology_taskForce.html
  • \n", "
  •   Gastroenterology_taskForce.html
  • \n", "
  •   Ethics_taskForce.html
  • \n", "
  •   Neurology_taskForce.html
  • \n", "
  •   Livestock_taskForce.html
  • \n", "
  •   Sustainability_taskForce.html
  • \n", "
  •   Emergency medicine and EMS_taskForce.html
  • \n", "
  •   Psychiatry_taskForce.html
  • \n", "
  •   Medical genetics_taskForce.html
  • \n", "
  •   Military logistics and medicine_taskForce.html
  • \n", "
  •   World War II_taskForce.html
  • \n", "
  •   Theoretical Linguistics_taskForce.html
  • \n", "
  •   Dermatology_taskForce.html
  • \n", "
  •   Cardiology_taskForce.html
  • \n", "
  •   Ophthalmology_taskForce.html
  • \n", "
  •   World War I_taskForce.html
  • \n", "
  •   German military history_taskForce.html
  • \n", "
  •   Pulmonology_taskForce.html
  • \n", "
  •   Toxicology_taskForce.html
  • " ], "text/plain": [ "./results//HTML_results_per_task_force/\n", " Pathology_taskForce.html\n", " Nephrology_taskForce.html\n", " Balkan military history_taskForce.html\n", " European military history_taskForce.html\n", " Applied Linguistics_taskForce.html\n", " Radiology_taskForce.html\n", " Reproductive medicine_taskForce.html\n", " Hematology-oncology_taskForce.html\n", " Gastroenterology_taskForce.html\n", " Ethics_taskForce.html\n", " Neurology_taskForce.html\n", " Livestock_taskForce.html\n", " Sustainability_taskForce.html\n", " Emergency medicine and EMS_taskForce.html\n", " Psychiatry_taskForce.html\n", " Medical genetics_taskForce.html\n", " Military logistics and medicine_taskForce.html\n", " World War II_taskForce.html\n", " Theoretical Linguistics_taskForce.html\n", " Dermatology_taskForce.html\n", " Cardiology_taskForce.html\n", " Ophthalmology_taskForce.html\n", " World War I_taskForce.html\n", " German military history_taskForce.html\n", " Pulmonology_taskForce.html\n", " Toxicology_taskForce.html" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#------------------------------------------------------------------------------------------------------\n", "# tabular data vizualization, and storage in CSV and HTML format (find these files in ./results folder)\n", "#------------------------------------------------------------------------------------------------------\n", "\n", "README=\"\"\"\n", "# WPM2Cochrane - a tool for linking WikiProject Medicine to the Cochrane Library\n", "\n", "## Launch in JupyterLab (recommended) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ajoorabchi/WP2Cochrane/master?urlpath=lab/tree/index.ipynb)\n", "\n", "## Launch in Jupyter Notebook [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ajoorabchi/WP2Cochrane/master?filepath=index.ipynb)\n", "\n", "## Results of linking (full dataset)\n", "- [Complete Tabular Results in HTML](https://ajoorabchi.github.io/WP2Cochrane/results/full_data.html)\n", "- [Complete Tabular Results in CSV](https://ajoorabchi.github.io/WP2Cochrane/results/full_data.csv)\n", "- [Complete Tabular Results in CSV (SPSS-friendly version)](https://ajoorabchi.github.io/WP2Cochrane/results/full_data_SPSS-friendy.csv)\n", "- [PMIDs statistical summary 1 (LATEST VERSIONS of Cochrane reviews found and/or cited)](https://ajoorabchi.github.io/WP2Cochrane/results/PMIDs_latestVersions_only.html)\n", "- [PMIDs statistical summary 2 (OLD VERSIONS of Cochrane reviews found and/or cited)](https://ajoorabchi.github.io/WP2Cochrane/results/PMIDs_oldVersions_only.html)\n", "\n", "## Results of linking (Specialized HTML results per task force)\n", "\"\"\"\n", "\n", "datatables_js_script=\"\"\"\n", "\n", "\n", "\n", "\n", "\"\"\"\n", "\n", "def get_df_info(content: pd.DataFrame):\n", "#function code from: https://stackoverflow.com/a/44087453/2339926\n", " #display(content.info())\n", " content_info = StringIO()\n", " content.info(buf=content_info)\n", " str_ = content_info.getvalue()\n", "\n", " lines = str_.split(\"\\n\")\n", " table = StringIO(\"\\n\".join(lines[3:-3]))\n", " datatypes = pd.read_csv(table, delim_whitespace=True, \n", " names=[\"column\", \"count\", \"null\", \"dtype\"])\n", " datatypes.set_index(\"column\", inplace=True)\n", "\n", " info = \"\\n\".join(lines[0:2] + lines[-2:-1])\n", "\n", " return info, datatypes\n", "\n", "def make_clickable_wkd_items(val):\n", " # target _blank to open new window\n", " return '{}'.format(val, val[31:])\n", "\n", "def make_clickable_taskForces(val):\n", " # target _blank to open new window\n", " if not val: \n", " return None\n", " else:\n", " html=\"\"\n", " for taskForce in val:\n", " html += f'

    {taskForce}

    ' \n", " return html\n", "\n", "def make_clickable_WP_en_articles(val):\n", " # target _blank to open new window\n", " if not val: \n", " return None\n", " else:\n", " return '{}'.format(val, unquote(unquote(val[30:].replace(\"_\", \" \"))))\n", " \n", "def list2text(list_):\n", " txt=\"\"\n", " for item in list_:\n", " txt += item + \", \"\n", " return txt[:-2]\n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# full-data.csv generator\n", "#------------------------------------------------------------------------------------------------------\n", "h = html2text.HTML2Text()\n", "h.ignore_links = False\n", "df_plainText =df.copy()\n", "df_plainText['cochrane_reviews_plainText'] = [h.handle(text) if text is not None else text for text in df['cochrane_reviews_html']]\n", "df_plainText['taskForces_plainText'] = [list2text(text) if text is not None else text for text in df['taskForces']]\n", "df_plainText.to_csv('./results/full_data.csv', index=False)\n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# full_data_SPSS-friendy.csv generator (one review per row)\n", "#------------------------------------------------------------------------------------------------------\n", "df_spss_friendly=pd.DataFrame()\n", "for index, row in tqdm(df[['WkD_disease', 'WkD_diseaseLabel', 'WP_en_article','class'\n", " ,'importance','taskForces','cochrane_reviews','talkPage_categories']].iterrows()):\n", " #display(index,row)\n", " PIMD_paper_dict = row['cochrane_reviews']\n", " if PIMD_paper_dict:\n", " search_results_count = len(PIMD_paper_dict)\n", " cited_count=0\n", " outofdate_cited_count=0\n", " for PMID, paper in PIMD_paper_dict.items():\n", " title = paper[0]\n", " cited = paper[1]\n", " latestVersion = paper[2]\n", " if cited:\n", " cited_count +=1\n", " if cited and not latestVersion:\n", " outofdate_cited_count +=1\n", " else:\n", " search_results_count = None\n", " cited_count = None\n", " outofdate_cited_count = None\n", " \n", " PIMD_paper_dict = row['cochrane_reviews']\n", " if PIMD_paper_dict:\n", " for PMID, paper in PIMD_paper_dict.items():\n", " title = paper[0]\n", " cited = paper[1]\n", " latestVersion = paper[2]\n", " data = pd.DataFrame(row.items())\n", " data = data.transpose()\n", " data.columns = data.iloc[0]\n", " data = data.drop(data.index[[0]])\n", " data = data.drop(columns=['cochrane_reviews'])\n", " data['PMID']=PMID\n", " data['Title']=title\n", " data['Cited']=cited\n", " data['LatestVersion']=latestVersion\n", " data['Search results count'] = search_results_count\n", " data['Cited count'] = cited_count\n", " data['outofdate_cited_count'] = outofdate_cited_count\n", " df_spss_friendly = df_spss_friendly.append(data)\n", " else:\n", " data = pd.DataFrame(row.items())\n", " data = data.transpose()\n", " data.columns = data.iloc[0]\n", " data = data.drop(data.index[[0]])\n", " data = data.drop(columns=['cochrane_reviews'])\n", " data['PMID']=None\n", " data['Title']=None\n", " data['Cited']=None\n", " data['LatestVersion']=None\n", " data['Search results count'] = None\n", " data['Cited count'] = None\n", " data['outofdate_cited_count'] = None\n", " df_spss_friendly = df_spss_friendly.append(data)\n", " \n", "# if index==10:\n", "# break\n", " \n", "df_spss_friendly.to_csv('./results/full_data_SPSS-friendy.csv', index=False)\n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# full-data.html generator\n", "#------------------------------------------------------------------------------------------------------\n", "th = dict(selector=\"th\", props=[('text-align', 'left'),('font','blod 14px arial, sans-serif'),('vertical-align','top')])\n", "rh = dict(selector=\".row_heading\", props=[(\"text-align\", \"left\"),('font','bold 14px arial, sans-serif'),('vertical-align','top')])\n", "\n", "col0 = dict(selector=\".col0\", props=[(\"text-align\", \"left\"),('font','bold 12px arial, sans-serif'),('max-width','100px'),('vertical-align','top')])\n", "col1 = dict(selector=\".col1\", props=[(\"text-align\", \"left\"),('font','bold 12px arial, sans-serif'),('max-width','130px'),('vertical-align','top')])\n", "col2 = dict(selector=\".col2\", props=[(\"text-align\", \"left\"),('font','bold 12px arial, sans-serif'),('max-width','110px'),('vertical-align','top'),('word-wrap','break-word')])\n", "col3 = dict(selector=\".col3\", props=[(\"text-align\", \"left\"),('font','bold 12px arial, sans-serif'),('max-width','60px'),('vertical-align','top'),('word-wrap','break-word')])\n", "col4 = dict(selector=\".col4\", props=[(\"text-align\", \"left\"),('font','bold 12px arial, sans-serif'),('max-width','90px'),('vertical-align','top'),('word-wrap','break-word')])\n", "col5 = dict(selector=\".col5\", props=[(\"text-align\", \"left\"),('font','bold 12px arial, sans-serif'),('max-width','90px'),('vertical-align','top'),('word-wrap','break-word')])\n", "\n", "col6 = dict(selector=\".col6\", props=[(\"text-align\", \"left\"),('font','12px arial, sans-serif'),('vertical-align','top')])\n", "\n", "fullSize = sys.maxsize\n", "testSize = 10\n", "df_vizTable = df[['WkD_disease', 'WkD_diseaseLabel', 'WP_en_article','class','importance','taskForces','cochrane_reviews_html']]\n", "df_vizTable_styled=df_vizTable\\\n", " .head(fullSize).rename_axis('index')\\\n", " .sort_values(by=['importance'],na_position='last')\\\n", " .style.set_table_styles([th,rh,col0,col1,col2,col3,col4,col5,col6]).set_table_attributes('class=\"results\"')\\\n", " .format({'WkD_disease': make_clickable_wkd_items,'WP_en_article': make_clickable_WP_en_articles,\n", " 'taskForces': make_clickable_taskForces})\n", "\n", "tableGuide='

    Table Guide:

    \\\n", "

    Grouping:

    \\\n", "

    In cases, where there are multiple version of a Cochrane review, they are grouped toghther (showing the same background color)\\\n", " ,and are listed chronologically,latest version first.

    \\\n", "

    Color Codes:

    \\\n", "
      \\\n", "
    1. Green: up-to-date and CITED

    2. \\\n", "
    3. Red: up-to-date and NOT CITED

    4. \\\n", "
    5. Orange: out-of-date and CITED

    6. \\\n", "
    7. Grey: out-of-date and NOT CITED

    8. \\\n", "

    '\n", "\n", "f=open(\"./results/full_data.html\",\"w\")\n", "info,datatypes = get_df_info(df_vizTable)\n", "#display(datatypes,datatypes.at['WkD_disease','count'])\n", "table_size_message=f\"

    Results Table contains {datatypes.at['WkD_disease','count']} rows


    \"\n", "table_size_warning=f\"

    This is a large table, so it could take up to 30 seconds to fully load and render in your browser


    \"\n", "\n", "if datatypes.at['WkD_disease','count']>1000:\n", " table_size_message += table_size_warning\n", "f.write(datatables_js_script + info + datatypes.to_html() + tableGuide + table_size_message\n", " + df_vizTable_styled.render())\n", "f.close()\n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# Creates a dedicated HTML rsults file for each WikiProject medicine task force group in Wikipeida\n", "#------------------------------------------------------------------------------------------------------\n", "for taskForce, count in taskForces_count.items():\n", " if taskForce!=\"NA\":\n", "\n", "# display(HTML(f\"Creating a HTML results file for task Force '{taskForce}':\\\n", "# ./results/HTML_results_per_task_force/{taskForce}_taskForce.html\"))\n", "\n", " #vizTable_per_taskForce = df_vizTable.loc[df_vizTable['taskForces'].apply(str)==\"[]\"]\n", " vizTable_per_taskForce = df_vizTable.loc[df_vizTable['taskForces'].notnull()]\n", " mask = vizTable_per_taskForce.taskForces.apply(lambda x: taskForce in x)\n", " vizTable_per_taskForce = vizTable_per_taskForce[mask]\n", "\n", " \n", " vizTable_per_taskForce_styled = vizTable_per_taskForce.rename_axis('index')\\\n", " .sort_values(by=['importance'],na_position='last')\\\n", " .style.set_table_styles([th,rh,col0,col1,col2,col3,col4,col5,col6]).set_table_attributes('class=\"results\"')\\\n", " .format({'WkD_disease': make_clickable_wkd_items,'WP_en_article': make_clickable_WP_en_articles,\n", " 'taskForces': make_clickable_taskForces})\n", "\n", " f=open(f\"./results/HTML_results_per_task_force/{taskForce}_taskForce.html\",\"w\")\n", " info,datatypes = get_df_info(vizTable_per_taskForce)\n", " #display(datatypes,datatypes.at['WkD_disease','count'])\n", " table_size_message=f\"

    Results Table contains {datatypes.at['WkD_disease','count']} rows


    \"\n", " table_size_warning=f\"

    This is a large table so it needs ~10s to fully load and render in your browser


    \"\n", "\n", " if datatypes.at['WkD_disease','count']>1000:\n", " table_size_message += table_size_warning\n", " f.write(datatables_js_script + info + datatypes.to_html() + tableGuide + table_size_message\n", " + vizTable_per_taskForce_styled.render()) \n", " \n", " f.close()\n", " #update README.md\n", " README += f\"\\n- [{taskForce}](https://ajoorabchi.github.io/WP2Cochrane/results/HTML_results_per_task_force/{quote(taskForce)}_taskForce.html)\"\n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# display CSV & HTML view/download options\n", "#------------------------------------------------------------------------------------------------------\n", "display(HTML('

    Complete Tabular Results in CSV or HTML Format:

    '),FileLinks('./results/'\\\n", " , result_html_prefix='
  • ', result_html_suffix='
  • ',recursive=False))\n", "\n", "display(HTML('

    Specialized HTML results for each \\\n", " WikiProject medicine task force \\\n", " (see list of active task forces \\\n", " here):

    '),FileLinks('./results//HTML_results_per_task_force/'\\\n", " , result_html_prefix='
  • ', result_html_suffix='
  • ',recursive=False))\n", "\n", "\n", "#display(HTML(datatables_js_script + info + datatypes.to_html() + tableGuide + df_vizTable_styled.render()))\n", "\n", "#------------------------------------------------------------------------------------------------------\n", "# write README.md\n", "#------------------------------------------------------------------------------------------------------\n", "f=open(\"./README.md\",\"w\")\n", "f.write(README)\n", "f.close()\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-04-10T16:10:10.237634Z", "start_time": "2019-04-10T16:10:04.521317Z" }, "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "###### ------------------------------------------------------------------------------------------------------\n", "# WikiData search: searches WkD for a list of dieeas\n", "#------------------------------------------------------------------------------------------------------\n", "\"\"\"\n", "This is a modified version of code from: \n", "1. https://lawlesst.github.io/notebook/sparql-dataframe.html\n", "2. https://github.com/SuLab/sparql_to_pandas/blob/master/SPARQL_pandas.ipynb\n", "\n", "Demonstrating how to get JupyterLab working with Binder: \n", "\n", "https://github.com/binder-examples/jupyterlab\n", "https://github.com/binder-examples/jupyter-extension/blob/master/index.ipynb\n", "\"\"\"\n", "\n", "def get_sparql_dataframe(service, query):\n", " \"\"\"\n", " Helper function to convert SPARQL results into a Pandas data frame.\n", " \"\"\"\n", " sparql = SPARQLWrapper(service)\n", " sparql.setQuery(query)\n", " sparql.setReturnFormat(JSON)\n", " result = sparql.query()\n", "\n", " processed_results = json.load(result.response)\n", " cols = processed_results['head']['vars']\n", "\n", " out = []\n", " for row in processed_results['results']['bindings']:\n", " item = []\n", " for c in cols:\n", " item.append(row.get(c, {}).get('value'))\n", " out.append(item)\n", "\n", " return pd.DataFrame(out, columns=cols)\n", "\n", "#help: https://en.wikibooks.org/wiki/SPARQL/Wikidata_Query_Service_-_Introduction\n", "\n", "wds = \"https://query.wikidata.org/sparql\"\n", "rq = \"\"\"\n", "SELECT ?WkD_disease ?WkD_diseaseLabel ?WP_en_article \n", "WHERE {\n", " SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". }\n", " ?WkD_disease wdt:P31 wd:Q12136.\n", " \n", " OPTIONAL {\n", " ?WP_en_article schema:about ?WkD_disease .\n", " ?WP_en_article schema:inLanguage \"en\" .\n", " ?WP_en_article schema:isPartOf .\n", " }\n", "}\n", "#order by desc(?WkD_disease)\n", "\"\"\"\n", "\n", "df = get_sparql_dataframe(wds, rq)\n", "\n", "#WkD api sample\n", "# client = Client() # doctest: +SKIP\n", "# entity = client.get('Q1472', load=True)\n", "# print (entity)\n", "# print (entity.description)\n", "\n", "# image_prop = client.get('P18')\n", "# image = entity[image_prop]\n", "# print (image)\n", "\n", "# print(image.image_resolution)\n", "\n", "# print(image.image_url)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-10-03T10:49:07.709745Z", "start_time": "2019-10-03T10:49:07.701463Z" }, "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "#------------------------------------------------------------------------------------------------------\n", "# PubMed search \n", "#------------------------------------------------------------------------------------------------------\n", "\n", "\"\"\"\n", "This is a modified version of code from: \n", "1. https://gist.github.com/bonzanini/5a4c39e4c02502a8451d\n", "2. https://gist.github.com/bonzanini/5a4c39e4c02502a8451d\n", "\n", "# Full discussion:\n", "# https://marcobonzanini.wordpress.com/2015/01/12/searching-pubmed-with-python/\n", "\"\"\"\n", "\n", "def search(index,query):\n", " Entrez.email = 'your.email@example.com'\n", " \n", " logger.debug (f\"{index:>5}. PubMed search query: {query}\")\n", " handle = Entrez.esearch(db='pubmed', sort='relevance', retmax='200', retmode='xml', term=query)\n", " results = Entrez.read(handle)\n", " return results\n", "\n", "def fetch_details(index, id_list):\n", " ids = ','.join(id_list)\n", " Entrez.email = 'your.email@example.com'\n", "\n", " sleep_time = 10\n", " num_retries = 1000\n", " error = None\n", " for x in range(0, num_retries): \n", " try:\n", " logger.debug (f\"{index:>5}. Fetching article details for PMIDs: {ids}\")\n", " handle = Entrez.efetch(db='pubmed', retmode='xml', id=ids)\n", " results = Entrez.read(handle)\n", " error = None\n", " except Exception as error:\n", " logger.error (f\"{index:>5}. I got a HTTPError - reason {error}- while trying to fetch the articles details from pubmed\")\n", " logger.error (f\"{index:>5}. Sleeping for {sleep_time} seconds before trying again...\")\n", " sleep(sleep_time) # wait before trying to fetch the data again\n", " sleep_time *= 1.5 # Implement your backoff algorithm here i.e. exponential backoff\n", " pass\n", "\n", " if not error:\n", " break\n", " \n", " if error:\n", " logger.critical (f\"{index:>5}. Retried fetching article's details {num_retries} times with no success!\")\n", " raise error\n", " return results\n", "\n", "def pubmed_search(index, disease, searchTitle, searchAbstract):\n", " \n", " logger.debug (f\"{index:>5}. Searching PubMed for {disease} in searchTitle:{searchTitle} , searchAbstract:{searchAbstract}\")\n", " if searchTitle and searchAbstract:\n", " results = search(index,\"(\"+disease+'[Title/Abstract] NOT \"withdrawn\"[Title]) AND \"The Cochrane database of systematic reviews\"[Journal]')\n", " if searchTitle and not searchAbstract:\n", " results = search(index,\"(\"+disease+'[Title] NOT \"withdrawn\"[Title]) AND \"The Cochrane database of systematic reviews\"[Journal]')\n", " \n", " #pprint.pprint (results)\n", " return results['IdList']\n", "\n", "# display(len(pubmed_search(\"Crohn's disease\",True,True)))\n", "# display(len(pubmed_search(\"Crohn's disease\",True,False)))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-10-03T10:49:07.724984Z", "start_time": "2019-10-03T10:49:07.713900Z" }, "jupyter": { "source_hidden": true } }, "outputs": [], "source": [ "#------------------------------------------------------------------------------------------------------\n", "# string comparator \n", "#------------------------------------------------------------------------------------------------------\n", "\n", "# def searh_wp_refs_4title(pubmedArticleTitle,WPpageTitle):\n", "# ''' looksup an article title in a WikiPedia page '''\n", "# #display (wikipedia.WikipediaPage(WPpageTitle).html())\n", "# WPpageHTML= wikipedia.WikipediaPage(WPpageTitle).html()\n", "# ratio = fuzz.ratio(WPpageHTML, pubmedArticleTitle)\n", "# print (\"fuzz.ratio:\",ratio)\n", "\n", "# WPpageHTML=re.sub(r'\\W+', '', WPpageHTML.lower())\n", "# pubmedArticleTitle=re.sub(r'\\W+', '', pubmedArticleTitle.lower())\n", "# print(pubmedArticleTitle)\n", "# if pubmedArticleTitle in WPpageHTML:\n", "# return True\n", "# else:\n", "# return False\n", " \n", "#print (searh_wp_refs_4title(\"interventions to slow progression of myopia in children...\",\"Near-sightedness\"))\n", "\n", "def searh_wp_refs_4PMID(PMID,en_article_HTML):\n", " ''' looksup a PMIDs in a WikiPedia page ''' \n", "\n", " #print('searching for PMID: '+PMID+ '')\n", " if PMID in en_article_HTML:\n", " #print ('found')\n", " return True\n", " else:\n", " #print ('not found')\n", " return False\n", " \n", "#en_article_HTML= wikipedia.WikipediaPage('Near-sightedness').html()\n", "#print (searh_wp_refs_4PMID(\"22161388\",en_article_HTML))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-10-03T10:53:57.254225Z", "start_time": "2019-10-03T10:49:07.728865Z" }, "jupyter": { "source_hidden": true } }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "454a55c1d4d64145975defe1582a0cfd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6905cf6f0e724e90851efabe7cff430c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "937316ce9b824c2b9886e6a3ebc8e0f6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Progress', max=4717, style=ProgressStyle(description_width='i…" ] }, "metadata": {}, "output_type": "display_data" }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 2.7, use buffering of HTTP responses\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 377\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuffering\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 378\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: getresponse() got an unexpected keyword argument 'buffering'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{index:>5}. Getting the WikiPedia content for: {WP_en_article_Title}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0mWP_en_article_obj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwikipedia\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mWikipediaPage\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mWP_en_article_Title\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0mWP_en_article_HTML\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mWP_en_article_obj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhtml\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0;31m#checking for redirects\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/wikipedia/wikipedia.py\u001b[0m in \u001b[0;36mhtml\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 452\u001b[0m }\n\u001b[1;32m 453\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 454\u001b[0;31m \u001b[0mrequest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_wiki_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 455\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_html\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'query'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'pages'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpageid\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'revisions'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'*'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/wikipedia/wikipedia.py\u001b[0m in \u001b[0;36m_wiki_request\u001b[0;34m(params)\u001b[0m\n\u001b[1;32m 735\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwait_time\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtotal_seconds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 736\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 737\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mAPI_URL\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 738\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 739\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mRATE_LIMIT\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(url, params, **kwargs)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'allow_redirects'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 75\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'get'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 76\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 60\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 531\u001b[0m }\n\u001b[1;32m 532\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 533\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 534\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 535\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 666\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 667\u001b[0m \u001b[0;31m# Resolve redirects if allowed.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 668\u001b[0;31m \u001b[0mhistory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mresp\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mresp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgen\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mallow_redirects\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 669\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 670\u001b[0m \u001b[0;31m# Shuffle things around if there's history.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 666\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 667\u001b[0m \u001b[0;31m# Resolve redirects if allowed.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 668\u001b[0;31m \u001b[0mhistory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mresp\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mresp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgen\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mallow_redirects\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 669\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 670\u001b[0m \u001b[0;31m# Shuffle things around if there's history.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mresolve_redirects\u001b[0;34m(self, resp, req, stream, timeout, verify, cert, proxies, yield_requests, **adapter_kwargs)\u001b[0m\n\u001b[1;32m 245\u001b[0m \u001b[0mproxies\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mproxies\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[0mallow_redirects\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 247\u001b[0;31m \u001b[0;34m**\u001b[0m\u001b[0madapter_kwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 248\u001b[0m )\n\u001b[1;32m 249\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 646\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 647\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 648\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m )\n\u001b[1;32m 451\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 598\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 599\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 600\u001b[0;31m chunked=chunked)\n\u001b[0m\u001b[1;32m 601\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 602\u001b[0m \u001b[0;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 378\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 380\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 381\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 382\u001b[0m \u001b[0;31m# Remove the TypeError from the exception chain in Python 3;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1330\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1331\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1332\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1333\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 295\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 297\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 298\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 258\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 259\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 260\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 585\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 586\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 587\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 294\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 295\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOpenSSL\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSSL\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSysCallError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 296\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Unexpected EOF'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/OpenSSL/SSL.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1819\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSSL_peek\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1820\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1821\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSSL_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1822\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_ssl_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1823\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "\"\"\"------------------------------------------------------------------------------------------------------\n", "Main: this is the main part of the code which encompasses the following steps:\n", "\n", " 1. content and metadata of WP articles which correspond to a disease in WikiData \n", " are retrieved (title, class, importance, taskforce, content)\n", "\n", " 2. For each WP article a search query is submitted to PubMed to retrieve a list of relevant Cochrane reviews\n", "\n", " 3. the retrieved list of reviews are cross checked agains the WP articles reference section to identify cited, missing\n", " and out of date reviews.\n", "\n", " 4. gathered infomation is converted tocolor-coded tabular format.\n", "------------------------------------------------------------------------------------------------------\"\"\"\n", "\n", "h2t = html2text.HTML2Text()\n", "h2t.ignore_links = True\n", "\n", "df['class']=None\n", "df['importance']=None\n", "df['taskForces']=None\n", "df['cochrane_reviews_html']=None\n", "df['cochrane_reviews']=None\n", "df['talkPage_categories']=None\n", "\n", "disease_search_log= widgets.HTML()\n", "citation_search_log= widgets.HTML()\n", "\n", "display(disease_search_log)\n", "display(citation_search_log)\n", "for index, row in tqdm_notebook(df.iterrows(),desc='Progress',total=df['WP_en_article'].count(), unit=\"WkD_disease\"):\n", " if row['WP_en_article'] is not None:\n", " #and row['WP_en_article']==\"https://en.wikipedia.org/wiki/Agalactia\":\n", " if console_handler.level<=20:\n", " display(HTML(f\"START of row {index:>5}\"))\n", " else:\n", " logger.info(f\"<{index:>5}------------------------------------------------\")\n", "\n", " disease_search_log.value= f'

    processing: disease #{index:>5}\\\n", "   {row[\"WkD_diseaseLabel\"]}   {row[\"WkD_disease\"]}   {row[\"WP_en_article\"]}

    '\n", " WP_en_article_Title= unquote(unquote(row['WP_en_article'][30:]))\n", " \n", " #will raise a DisambiguationError if the page is a disambiguation page, or a PageError if \n", " #the page doesn’t exist (although by default, it tries to find the page you meant with suggest and search.)\n", " try:\n", " logger.info(f\"{index:>5}. Getting the WikiPedia content for: {WP_en_article_Title}\")\n", " WP_en_article_obj = wikipedia.WikipediaPage(WP_en_article_Title)\n", " WP_en_article_HTML = WP_en_article_obj.html()\n", " \n", " #checking for redirects\n", " if WP_en_article_Title.casefold() != WP_en_article_obj.title.casefold().replace(\" \",\"_\"):\n", " logger.info(f\"{index:>5}. Redirected to: {WP_en_article_obj.title}\") \n", " \n", " logger.info(f\"{index:>5}. Getting the WikiPedia talkPage_categories for: {WP_en_article_obj.title}\")\n", " WP_en_article_talk_obj = wikipedia.WikipediaPage(\"Talk:\"+ WP_en_article_obj.title)\n", " row['talkPage_categories'] = WP_en_article_talk_obj.categories\n", " \n", " except wikipedia.exceptions.DisambiguationError as e:\n", " logger.warning (f\"{index:>5}. '{WP_en_article_talk_obj.title}' is an Ambiguous title: {e.options}\")\n", " logger.warning (f\"{index:>5}. Picking the first sense in the list by default: {e.options[0]}\")\n", " WP_en_article_obj = wikipedia.WikipediaPage(e.options[0])\n", " WP_en_article_HTML = WP_en_article_obj.html()\n", " \n", " #checking for redirects\n", " if e.options[0].casefold() != WP_en_article_obj.title.casefold().replace(\" \",\"_\"):\n", " logger.warning (f\"{index:>5}. Redirected to: {WP_en_article_obj.title}\")\n", " \n", " logger.warning (f\"{index:>5}. Getting the talkPage_categories for: {WP_en_article_obj.title}\")\n", " WP_en_article_talk_obj = wikipedia.WikipediaPage(\"Talk:\"+ WP_en_article_obj.title)\n", " row['talkPage_categories'] = WP_en_article_talk_obj.categories\n", "\n", " except wikipedia.exceptions.PageError as e:\n", " logger.error (f\"{index:>5}. {WP_en_article_obj.title} - I got a PageError - reason: {e} - Article has no talk page yet\")\n", " except KeyError as e:\n", " logger.error (f\"{index:>5}. {WP_en_article_obj.title} - I got a KeyError - reason: {e} - Article's Talk page has no Category\")\n", " \n", " finally:\n", " if console_handler.level==10: \n", " display(row['talkPage_categories'])\n", " else:\n", " logger.debug(row['talkPage_categories'])\n", " \n", " logger.debug(f\"{index:>5}. Extracting task forces, class, and importance\")\n", " taskForces=[]\n", " for cat in row['talkPage_categories'] or []:\n", " if cat.casefold().endswith(\"-class medicine articles\"):\n", " row['class'] = cat[0:-24]\n", " if cat.casefold().endswith('-importance medicine articles'):\n", " imp = {\n", " 'NA' : None,\n", " '???' : None,\n", " 'Unknown' : None,\n", " 'Low' : '4-Low',\n", " 'Mid' : '3-Mid',\n", " 'High' : '2-High',\n", " 'Top' : '1-Top'\n", " }\n", " if imp[cat[0:-29]]:\n", " row['importance'] = imp[cat[0:-29]]\n", " else:\n", " row['importance'] = None\n", " \n", " if cat.casefold().endswith(' task force articles') and \"wikiproject\" not in cat.casefold():\n", " taskForce = cat[0:-20]\n", " taskForces.append(taskForce)\n", " \n", " if taskForces:\n", " row['taskForces']=taskForces\n", " if console_handler.level<=20:\n", " display(HTML(f\"{index:>5}. class: {row['class']}, importance: {row['importance']}, task forces: {row['taskForces']}\"))\n", " else:\n", " logger.info(f\"{index:>5}. class: {row['class']}, importance: {row['importance']}, task forces: {row['taskForces']}\")\n", "\n", "\n", "\n", " matches=0\n", " PIMD_paper_dict={}\n", " logger.info(f\"{index:>5}. searching Pubmed for WkD_diseaseLabel: {row['WkD_diseaseLabel']}\") \n", " id_list= pubmed_search(index,row['WkD_diseaseLabel'],True,True)\n", " if len(id_list)==200:\n", " logger.warning(f\"{index:>5}. (a) {row['WkD_diseaseLabel']} - Too many matches found (>200)\") \n", " logger.warning(f\"{index:>5}. (b) restrcting search to Titles only (excluding Abstracts)\")\n", " time.sleep(1)\n", " id_list= pubmed_search(index,row['WkD_diseaseLabel'],True,False)\n", " if len(id_list)==0:\n", " logger.warning(f\"{index:>5}. (a) {row['WkD_diseaseLabel']} - Restrcting search to Titles only returned no results\")\n", " logger.warning(f\"{index:>5}. (b) reverting back to title/abstract\")\n", " time.sleep(1)\n", " id_list= pubmed_search(index,row['WkD_diseaseLabel'],True,True) \n", " \n", " if not id_list and row['WkD_diseaseLabel'].lower()!= re.sub(r\" ?\\([^)]+\\)\", \"\", WP_en_article_Title.replace(\"_\", \" \").lower()):\n", " logger.warning(f\"{index:>5}. (a) searching for '{row['WkD_diseaseLabel']}' returned {len(id_list)} results\")\n", " logger.warning(f\"{index:>5}. (b) searching for '{WP_en_article_Title.replace('_', ' ')}' instead\")\n", " time.sleep(1)\n", " id_list= pubmed_search(index,WP_en_article_Title.replace(\"_\", \" \"),True,True)\n", " if len(id_list)==200:\n", " logger.warning(f\"{index:>5}. (a) {WP_en_article_Title.replace('_',' ')} - Retruned too many matches (>200)\")\n", " logger.warning(f\"{index:>5}. (b) restrcting search to Titles only (excluding Abstracts)\")\n", " time.sleep(1)\n", " id_list= pubmed_search(index,WP_en_article_Title.replace(\"_\", \" \"),True,False)\n", " if len(id_list)==0:\n", " logger.warning(f\"{index:>5}. (a) {WP_en_article_Title.replace('_',' ')} - Restrcting search to Titles only returned no results\")\n", " logger.warning(f\"{index:>5}. (b) reverting back to title/abstract\")\n", " time.sleep(1)\n", " id_list= pubmed_search(index,WP_en_article_Title.replace(\"_\", \" \"),True,True) \n", " \n", " logger.info(f\"{index:>5}. {len(id_list)} matching PMIDs found.\")\n", " if id_list:\n", " papers = fetch_details(index,id_list)\n", " #print(json.dumps(papers, indent=5))\n", "\n", " for i, paper in enumerate(papers['PubmedArticle']):\n", " articleTitle = paper['MedlineCitation']['Article']['ArticleTitle']\n", " PMID = int(paper['MedlineCitation']['PMID'])\n", " cited = searh_wp_refs_4PMID(str(PMID),WP_en_article_HTML)\n", " if cited:\n", " matches +=1\n", " #display(articleTitle,PMID,cited)\n", " latestVersion=None \n", " PIMD_paper_dict[int(PMID)]=[articleTitle,cited,latestVersion]\n", " \n", " PIMD_paper_dict = OrderedDict(sorted(PIMD_paper_dict.items(), key=lambda t: [str(title).lower() for title in t[1][0]]))\n", " #display(PIMD_paper_dict)\n", " row['cochrane_reviews']= PIMD_paper_dict\n", "\n", " if PIMD_paper_dict:\n", " row['cochrane_reviews_html']='
      '\n", " bgc=\"White\"\n", " for PMID, paper in PIMD_paper_dict.items():\n", " cited=paper[1]\n", " if cited:\n", " color=\"green\"\n", " cited_message=\" [CITED] \"\n", " if not cited:\n", " color=\"red\"\n", " cited_message=\" [NOT CITED] \"\n", " \n", " title=paper[0]\n", " latestVersion=True\n", " foundItems = (key for key, vals in PIMD_paper_dict.items() if title.lower() in [str(val).lower() for val in vals] and key!=PMID)\n", " for item in foundItems:\n", " #display(item)\n", " if item > PMID:\n", " latestVersion=False\n", " paper[2]=latestVersion\n", " #display(title,PMID,cited,latestVersion)\n", " \n", " if latestVersion:\n", " version_message=\" [LATEST Version] \"\n", " else:\n", " version_message=\" [OLD Version] \"\n", " if cited:\n", " color=\"orange\"\n", " version_message +=\" [UPDATE NEEDED] \"\n", " else:\n", " color=\"grey\"\n", " \n", " if latestVersion:\n", " if bgc==\"white\":\n", " bgc=\"#E0F5FE\"\n", " else:\n", " bgc=\"white\"\n", " \n", " row['cochrane_reviews_html']+='
    1. '\\\n", " +paper[0]+' PMID: '+str(PMID)+''+cited_message+version_message+\"
    2. \"\n", "\n", " row['cochrane_reviews_html']+=\"
    \"\n", " else:\n", " row['cochrane_reviews_html']=\"No matching publication found!\"\n", "\n", " \n", " if console_handler.level<=10: #10=DEBUG\n", " display(HTML(row['cochrane_reviews_html']))\n", " else:\n", " #logger.debug(h2t.handle(str(row['cochrane_reviews_html'])))\n", " logger.debug(row['cochrane_reviews_html']) \n", "\n", " citation_search_log.value = f'

    processed: disease #{index:>5}   \\\n", " {row[\"WkD_diseaseLabel\"]}   {row[\"WkD_disease\"]}   {row[\"WP_en_article\"]}

    \\\n", "

    {matches} of {len(id_list)}\\\n", " Cochrane reviews found (via PubMed) are cited in the Wikipedia article: {WP_en_article_Title}

    '\n", " \n", " if console_handler.level<=20:\n", " display(HTML(f\"END of row {index:>5}
    \"))\n", " else:\n", " logger.info(f\"------------------------------------------------{index:>5}>
    \")\n", " \n", " \n", "\n", "\"\"\"------------------------------------------------------------------------------------------------------\n", "# Once all the tasks (above cells) are completed successfuly, the dataframe \n", "# is sotored in persistent storage for future use (e.g., Binder)\n", "#------------------------------------------------------------------------------------------------------\"\"\"\n", "# Creating a datetime object\n", "current_datetime = datetime.now()\n", "# Converting a to string in the desired format (YYYYMMDD) using strftime\n", "# and then to int.\n", "current_datetime_int = int(current_datetime.strftime('%Y%m%d'))\n", "\n", "display(HTML(f\"

    Saving df in ./persistent_storage/{current_datetime_int}.pkl\"))\n", "logger.info(f\"Saving df in ./persistent_storage/{current_datetime_int}.pkl\")\n", "# with open('./persistent_storage/df.dill', 'wb') as out_strm: \n", "# dill.dump(df, out_strm) \n", "#dill.dump_session('./persistent_storage/dill_session') \n", "df.to_pickle(f\"./persistent_storage/{current_datetime_int}.pkl\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "notify_time": "30", "toc": { "base_numbering": 1, "nav_menu": { "height": "157px", "width": "360px" }, "number_sections": false, "sideBar": false, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": false, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "02fcf423e13c405d90881f77fd72a2f1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "ProgressStyleModel", "state": { "description_width": "initial" } }, "234bc1990c3248f1acdaa5e1c2e01a8c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_7a8dc09eaf464dd597c1dd4141a8b9a3", "style": "IPY_MODEL_25a2f984db404154b59d9a6ff1e8c508", "value": "| 11621/? [3:34:57<00:00, 1.11s/WkD_disease]" } }, "25a2f984db404154b59d9a6ff1e8c508": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } }, "3e036a8f5b654e3ebd45c34a9bfcf9d4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_6f716b54558f4deb9983f274991c97d5", "style": "IPY_MODEL_4b5fada894fd4fe48f1f247b461db333", "value": "

    WkDdiseases_with_WParticles_count: 4381,   total_CochraneReviews_found: 19962,   total_CochraneReviews_cited: 1049

    " } }, "4b5fada894fd4fe48f1f247b461db333": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } }, "4cd5947e74ca455c9eeb24470e34ff62": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.1.0", "model_name": "LayoutModel", "state": {} }, "6c837377612a4cf5b13dbb635862403f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.1.0", "model_name": "LayoutModel", "state": {} }, "6f716b54558f4deb9983f274991c97d5": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.1.0", "model_name": "LayoutModel", "state": {} }, "79f2b78f122846988434c41a2ad4a2d8": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.1.0", "model_name": "LayoutModel", "state": {} }, "7a8dc09eaf464dd597c1dd4141a8b9a3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.1.0", "model_name": "LayoutModel", "state": {} }, "83d507f804354506a1f32bc53d56d18a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "HBoxModel", "state": { "children": [ "IPY_MODEL_a93d0d8d295e4169941c52d9363e93d5", "IPY_MODEL_234bc1990c3248f1acdaa5e1c2e01a8c" ], "layout": "IPY_MODEL_6c837377612a4cf5b13dbb635862403f" } }, "8613affbed9a47ea885f3a9f79988f4a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } }, "a93d0d8d295e4169941c52d9363e93d5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "IntProgressModel", "state": { "bar_style": "success", "description": "Progress", "layout": "IPY_MODEL_ce9f975bbb6847379526446159cd96c1", "max": 4381, "style": "IPY_MODEL_02fcf423e13c405d90881f77fd72a2f1", "value": 4381 } }, "ce9f975bbb6847379526446159cd96c1": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.1.0", "model_name": "LayoutModel", "state": {} }, "d3d4db35ac0f4b1e9b3af80c0a9e63a2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_4cd5947e74ca455c9eeb24470e34ff62", "style": "IPY_MODEL_8613affbed9a47ea885f3a9f79988f4a", "value": "

    processing: disease #11580   Ebola virus disease   http://www.wikidata.org/entity/Q51993   https://en.wikipedia.org/wiki/Ebola_virus_disease

    " } }, "e01f923030e149898ee50e28e4084a44": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } }, "e84032ee57734e71aee666a933508946": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.4.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_79f2b78f122846988434c41a2ad4a2d8", "style": "IPY_MODEL_e01f923030e149898ee50e28e4084a44", "value": "

    processed: disease #11580   Ebola virus disease   http://www.wikidata.org/entity/Q51993   https://en.wikipedia.org/wiki/Ebola_virus_disease

    0 of 2 Cochrane reviews found (via PubMed) are cited in the Wikipedia article: Ebola_virus_disease

    " } } }, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 2 }