{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "KoiuqPnaQL9Z" }, "source": [ "# COVID-19 Literature Publication Times extracted from PubMed" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": {}, "colab_type": "code", "id": "W4-_oZ4KQpTr" }, "outputs": [], "source": [ "import lzma\n", "import urllib.request\n", "\n", "import pandas\n", "import tqdm.notebook\n", "from pubmedpy.eutilities import download_pubmed_ids" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "KYSST0tvP6Q_" }, "source": [ "Download articles that are part of [LitCovid](https://www.ncbi.nlm.nih.gov/research/coronavirus/):\n", "\n", "> **Keep up with the latest coronavirus research** \n", "Qingyu Chen, Alexis Allot, Zhiyong Lu \n", "*Nature* (2020-03-10) \n", "DOI: [10.1038/d41586-020-00694-1](https://doi.org/10.1038/d41586-020-00694-1) · PMID: [32157233](https://www.ncbi.nlm.nih.gov/pubmed/32157233)\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'attachment; filename=05272020.litcovid.export.tsv'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url = \"https://www.ncbi.nlm.nih.gov/research/coronavirus-api/export/tsv\"\n", "_filename, headers = urllib.request.urlretrieve(url, filename=\"data/litcovid.tsv\")\n", "# show dated filename\n", "headers.get(\"Content-Disposition\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "colab_type": "code", "id": "-BzMAdh8Pk_s", "outputId": "83145bbc-4a24-4bfe-acc9-7ea43016af21" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pmidtitlejournal
032450607Gastrointestinal: Bowel ischemia in a suspecte...J Gastroenterol Hepatol
132450565Novel Coronavirus-Induced Right Ventricular Fa...Cardiology
232450560COVID-19, Low-Molecular-Weight Heparin, and He...Kidney Blood Press Res
332450492Mental health and COVID-19 in Nepal: A case of...Asian J Psychiatr
432450477Hardware versus heartware: The need to address...J Clin Anesth
\n", "
" ], "text/plain": [ " pmid title \\\n", "0 32450607 Gastrointestinal: Bowel ischemia in a suspecte... \n", "1 32450565 Novel Coronavirus-Induced Right Ventricular Fa... \n", "2 32450560 COVID-19, Low-Molecular-Weight Heparin, and He... \n", "3 32450492 Mental health and COVID-19 in Nepal: A case of... \n", "4 32450477 Hardware versus heartware: The need to address... \n", "\n", " journal \n", "0 J Gastroenterol Hepatol \n", "1 Cardiology \n", "2 Kidney Blood Press Res \n", "3 Asian J Psychiatr \n", "4 J Clin Anesth " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "litcovid_df = pandas.read_table(\"data/litcovid.tsv\", comment=\"#\")\n", "litcovid_df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "ld1rcf9XQEFQ", "outputId": "800209fc-a8a5-4d19-8f8a-7e3ce283acd4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "16,405 articles in litcovid.tsv\n" ] } ], "source": [ "print(f'{len(litcovid_df):,} articles in litcovid.tsv')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": {}, "colab_type": "code", "id": "2kQ9lrGYa4Li" }, "outputs": [], "source": [ "path = \"data/litcovid-esummaries.xml.xz\"" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 83, "referenced_widgets": [ "22e49cf0280d40bb8a0061686cb7e107", "30ce2c66e12b4bfba400058e5ccb75b7", "26dc39c2a5bb4a9b9fc4fbd39deb2dda", "960e519c1af74b49b48979d8ffd22135", "51ee38b2a1724256aa50a38b64497d51", "3c2086081e9d49be81e2c0825440b87e", "cc361014924b4767bf8729b08ecfe2d8", "2587e775e07f4829903d669be5998611" ] }, "colab_type": "code", "id": "cXFIYDrRRiQD", "outputId": "4c7d8d72-c148-442d-fee0-fbdf38837b4a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "16,405\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "36ba6bcca9f94eaabcabd84987b8173c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=16405.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "WARNING:root:1 successive error: 200 IDs[32355056 … 32358406] threw HTTPSConnectionPool(host='eutils.ncbi.nlm.nih.gov', port=443): Max retries exceeded with url: /entrez/eutils/esummary.fcgi?db=pubmed&id=32355056%2C32355099%2C32355107%2C32355114%2C32355115%2C32355116%2C32355117%2C32355118%2C32355119%2C32355131%2C32355132%2C32355222%2C32355243%2C32355260%2C32355296%2C32355299%2C32355328%2C32355329%2C32355330%2C32355392%2C32355394%2C32355415%2C32355424%2C32355435%2C32355447%2C32355450%2C32355509%2C32355510%2C32355546%2C32355547%2C32355555%2C32355556%2C32355564%2C32355606%2C32355607%2C32355634%2C32355638%2C32355651%2C32355653%2C32355658%2C32355659%2C32355694%2C32355837%2C32355863%2C32355869%2C32355889%2C32355892%2C32355904%2C32355949%2C32355961%2C32355962%2C32355982%2C32355985%2C32356025%2C32356031%2C32356032%2C32356040%2C32356047%2C32356096%2C32356161%2C32356164%2C32356225%2C32356251%2C32356252%2C32356294%2C32356298%2C32356301%2C32356302%2C32356307%2C32356322%2C32356356%2C32356382%2C32356422%2C32356423%2C32356460%2C32356508%2C32356510%2C32356516%2C32356569%2C32356573%2C32356577%2C32356578%2C32356580%2C32356583%2C32356590%2C32356601%2C32356603%2C32356625%2C32356626%2C32356627%2C32356628%2C32356639%2C32356640%2C32356641%2C32356642%2C32356654%2C32356672%2C32356698%2C32356760%2C32356761%2C32356777%2C32356849%2C32356857%2C32356858%2C32356863%2C32356866%2C32356867%2C32356869%2C32356871%2C32356896%2C32356900%2C32356908%2C32356910%2C32356926%2C32356927%2C32356944%2C32356945%2C32356955%2C32357070%2C32357072%2C32357074%2C32357084%2C32357086%2C32357206%2C32357209%2C32357210%2C32357257%2C32357273%2C32357275%2C32357277%2C32357281%2C32357288%2C32357307%2C32357377%2C32357378%2C32357379%2C32357380%2C32357381%2C32357424%2C32357457%2C32357471%2C32357503%2C32357545%2C32357553%2C32357575%2C32357582%2C32357655%2C32357675%2C32357808%2C32357883%2C32357949%2C32357950%2C32357952%2C32357954%2C32357957%2C32357959%2C32357975%2C32357976%2C32357977%2C32357978%2C32357994%2C32357995%2C32357996%2C32357997%2C32357998%2C32358044%2C32358045%2C32358057%2C32358098%2C32358099%2C32358105%2C32358107%2C32358120%2C32358121%2C32358130%2C32358131%2C32358134%2C32358142%2C32358176%2C32358180%2C32358202%2C32358203%2C32358216%2C32358217%2C32358218%2C32358227%2C32358228%2C32358229%2C32358230%2C32358231%2C32358232%2C32358233%2C32358234%2C32358250%2C32358303%2C32358311%2C32358312%2C32358325%2C32358326%2C32358406&rettype=xml (Caused by NewConnectionError(': Failed to establish a new connection: [Errno -2] Name or service not known'))\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "pubmed_ids = sorted(map(int, litcovid_df.pmid))\n", "print(f'{len(pubmed_ids):,}')\n", "\n", "with lzma.open(path, 'wt') as write_file:\n", " download_pubmed_ids(\n", " pubmed_ids, write_file, endpoint='esummary',\n", " retmax=200, retmin=50, sleep=0, error_sleep=1,\n", " tqdm=tqdm.notebook.tqdm,\n", " )" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "covid-publication-times.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "22e49cf0280d40bb8a0061686cb7e107": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_26dc39c2a5bb4a9b9fc4fbd39deb2dda", "IPY_MODEL_960e519c1af74b49b48979d8ffd22135" ], "layout": "IPY_MODEL_30ce2c66e12b4bfba400058e5ccb75b7" } }, "2587e775e07f4829903d669be5998611": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "26dc39c2a5bb4a9b9fc4fbd39deb2dda": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "100%", "description_tooltip": null, "layout": "IPY_MODEL_3c2086081e9d49be81e2c0825440b87e", "max": 10, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_51ee38b2a1724256aa50a38b64497d51", "value": 10 } }, "30ce2c66e12b4bfba400058e5ccb75b7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3c2086081e9d49be81e2c0825440b87e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "51ee38b2a1724256aa50a38b64497d51": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "initial" } }, "960e519c1af74b49b48979d8ffd22135": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2587e775e07f4829903d669be5998611", "placeholder": "​", "style": "IPY_MODEL_cc361014924b4767bf8729b08ecfe2d8", "value": " 10/10 [03:24<00:00, 20.48s/articles]" } }, "cc361014924b4767bf8729b08ecfe2d8": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 1 }