In [1]:
from Bio import Entrez, Medline, SeqIO

### Do not forget to inform NCBI of your email address (change below)

In [2]:
Entrez.email = "put@your_email.here" 

In [3]:
#This gives you the list of available databases
handle = Entrez.einfo()
rec = Entrez.read(handle)
print(rec)

{u'DbList': ['pubmed', 'protein', 'nuccore', 'nucleotide', 'nucgss', 'nucest', 'structure', 'genome', 'assembly', 'genomeprj', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'epigenomics', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'journals', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'pubmedhealth', 'seqannot', 'snp', 'sra', 'taxonomy', 'toolkit', 'toolkitall', 'toolkitbook', 'unigene', 'gencoll', 'gtr']}


In [4]:
handle = Entrez.esearch(db="nucleotide", term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]')
rec_list = Entrez.read(handle)
if rec_list['RetMax'] < rec_list['Count']:
    handle = Entrez.esearch(db="nucleotide", term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]',
                            retmax=rec_list['Count'])
    rec_list = Entrez.read(handle)

In [5]:
id_list = rec_list['IdList']
hdl = Entrez.efetch(db='nucleotide', id=id_list, rettype='gb')

In [6]:
recs = list(SeqIO.parse(hdl, 'gb'))

In [7]:
for rec in recs:
    if rec.name == 'KM288867':
        break
print(rec.name)
print(rec.description)

KM288867
Plasmodium falciparum clone PF3D7_0709000 chloroquine resistance transporter (CRT) gene, complete cds.


In [8]:
for feature in rec.features:
    if feature.type == 'gene':
        print(feature.qualifiers['gene'])
    elif feature.type == 'exon':
        loc = feature.location
        print('Exon', loc.start, loc.end, loc.strand)
    else:
        print('not processed:\n%s' % feature)

not processed:
type: source
location: [0:10000](+)
qualifiers:
    Key: clone, Value: ['PF3D7_0709000']
    Key: db_xref, Value: ['taxon:5833']
    Key: mol_type, Value: ['genomic DNA']
    Key: organism, Value: ['Plasmodium falciparum']

['CRT']
not processed:
type: mRNA
location: join{[2751:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:7598](+)}
qualifiers:
    Key: gene, Value: ['CRT']
    Key: product, Value: ['chloroquine resistance transporter']
Sub-Features
type: mRNA
location: [2751:3543](+)
qualifiers:

type: mRNA
location: [3720:3989](+)
qualifiers:

type: mRNA
location: [4168:4341](+)
qualifiers:

type: mRNA
location: [4513:4646](+)
qualifiers:

type: mRNA
location: [4799:4871](+)
qualifiers:

type: mRNA
location: [4994:5070](+)
qualifiers:

type: mRNA
location: [5166:5249](+)
qualifiers:

type: mRNA
location: [5376:5427](+)
qualif

In [9]:
for name, value in rec.annotations.items():
    print('%s=%s' % (name, value))

sequence_version=1
source=Plasmodium falciparum (malaria parasite P. falciparum)
taxonomy=['Eukaryota', 'Alveolata', 'Apicomplexa', 'Aconoidasida', 'Haemosporida', 'Plasmodium', 'Plasmodium (Laverania)']
keywords=['']
references=[Reference(title='Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction', ...), Reference(title='Direct Submission', ...)]
accessions=['KM288867']
data_file_division=INV
date=12-NOV-2014
organism=Plasmodium falciparum
gi=706072608


In [10]:
print(len(rec.seq))

10000


In [11]:
refs = rec.annotations['references']
for ref in refs:
    if ref.pubmed_id != '':
        print(ref.pubmed_id)
        handle = Entrez.efetch(db="pubmed", id=[ref.pubmed_id],
                                rettype="medline", retmode="text")
        records = Medline.parse(handle)
        for med_rec in records:
            for k, v in med_rec.items():
                print('%s: %s' % (k, v))

25370483
LID: 10.1038/ncomms6329 [doi]
STAT: In-Process
DEP: 20141105
MID: ['NIHMS630149']
DA: 20141105
AID: ['ncomms6329 [pii]', '10.1038/ncomms6329 [doi]']
CRDT: ['2014/11/06 06:00']
DP: 2014
GR: ['1DP2OD007124/OD/NIH HHS/United States', '5-T32-ES007020/ES/NIEHS NIH HHS/United States', '5-T32-GM08334/GM/NIGMS NIH HHS/United States', 'DP2 OD007124/OD/NIH HHS/United States', 'P30 ES002109/ES/NIEHS NIH HHS/United States']
OWN: NLM
PT: ['Journal Article', 'Research Support, N.I.H., Extramural', "Research Support, Non-U.S. Gov't"]
LA: ['eng']
FAU: ['Goldfless, Stephen J', 'Wagner, Jeffrey C', 'Niles, Jacquin C']
JT: Nature communications
LR: 20150117
PG: 5329
TI: Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction.
PMCR: ['2015/05/05 00:00']
PL: England
TA: Nat Commun
JID: 101528555
AB: The available tools for conditional gene expression in Plasmodium falciparum are limited. Here, to enable reliable control of target gene expression, we bui