# COVID-19 Literature Publication Times extracted from PubMed

In [1]:
import lzma
import urllib.request

import pandas
import tqdm.notebook
from pubmedpy.eutilities import download_pubmed_ids

Download articles that are part of [LitCovid](https://www.ncbi.nlm.nih.gov/research/coronavirus/):

> **Keep up with the latest coronavirus research**  
Qingyu Chen, Alexis Allot, Zhiyong Lu  
*Nature* (2020-03-10) <https://doi.org/ggq9fd>  
DOI: [10.1038/d41586-020-00694-1](https://doi.org/10.1038/d41586-020-00694-1) · PMID: [32157233](https://www.ncbi.nlm.nih.gov/pubmed/32157233)


In [2]:
url = "https://www.ncbi.nlm.nih.gov/research/coronavirus-api/export/tsv"
_filename, headers = urllib.request.urlretrieve(url, filename="data/litcovid.tsv")
# show dated filename
headers.get("Content-Disposition")

'attachment; filename=05272020.litcovid.export.tsv'

In [3]:
litcovid_df = pandas.read_table("data/litcovid.tsv", comment="#")
litcovid_df.head()

Unnamed: 0,pmid,title,journal
0,32450607,Gastrointestinal: Bowel ischemia in a suspecte...,J Gastroenterol Hepatol
1,32450565,Novel Coronavirus-Induced Right Ventricular Fa...,Cardiology
2,32450560,"COVID-19, Low-Molecular-Weight Heparin, and He...",Kidney Blood Press Res
3,32450492,Mental health and COVID-19 in Nepal: A case of...,Asian J Psychiatr
4,32450477,Hardware versus heartware: The need to address...,J Clin Anesth


In [4]:
print(f'{len(litcovid_df):,} articles in litcovid.tsv')

16,405 articles in litcovid.tsv


In [5]:
path = "data/litcovid-esummaries.xml.xz"

In [6]:
pubmed_ids = sorted(map(int, litcovid_df.pmid))
print(f'{len(pubmed_ids):,}')

with lzma.open(path, 'wt') as write_file:
    download_pubmed_ids(
        pubmed_ids, write_file, endpoint='esummary',
        retmax=200, retmin=50, sleep=0, error_sleep=1,
        tqdm=tqdm.notebook.tqdm,
    )

16,405


HBox(children=(FloatProgress(value=0.0, max=16405.0), HTML(value='')))




