{
"cells": [
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
"from Bio import Entrez\n",
"from Bio import SeqIO\n",
"from Bio.Seq import Seq\n",
"from Bio.SeqRecord import SeqRecord\n",
"import re\n",
"import random\n",
"import pandas as pd\n",
"\n",
"Entrez.email = 'sasha.grrshnova98@gmail.com'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#get B.burgdorferi genome\n",
"\n",
"burgdorferi_genome = id_search('NC_001318.1')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"bavariensis_genome=id_search('NC_006156.1')\n",
"afzelii_genome=id_search('NC_018887.1')\n",
"garenii_genome=id_search('NC_018747.1')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"borrelia=['burgdorferi', 'bavariensis', 'afzelii', 'garenii']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def PAM_search(seq):\n",
" \n",
" genome = str(seq)\n",
"\n",
" PAM_positions = {}\n",
"\n",
" PAM_pos = []\n",
" \n",
" for m in re.finditer(r\"[ACGT]GG\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['SpCas9'] = PAM_pos\n",
" \n",
" PAM_pos = []\n",
" for m in re.finditer(r\"[ACGT]G[AG][AG]T\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['SaCas9_1'] = PAM_pos\n",
"\n",
" PAM_pos = []\n",
" for m in re.finditer(r\"G[ACGT]G[AG][AG][ACTG]\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['SaCas9_2'] = PAM_pos\n",
" \n",
" PAM_pos = []\n",
" for m in re.finditer(r\"[ACGT][ACGT][ACGT][ACGT][AG][CT]AC\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['CjCas9'] = PAM_pos\n",
"\n",
" \n",
" return PAM_positions"
]
},
{
"cell_type": "code",
"execution_count": 430,
"metadata": {},
"outputs": [],
"source": [
"#to search start inverted pam in complement DNA\n",
"def PAM_rev_search(seq):\n",
" \n",
" genome = str(seq.complement())\n",
"\n",
" PAM_positions = {}\n",
"\n",
" PAM_pos = []\n",
" \n",
" for m in re.finditer(r\"GG[ACGT]\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['SpCas9'] = PAM_pos\n",
" \n",
" PAM_pos = []\n",
" for m in re.finditer(r\"T[AG][AG]G[ACGT]\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['SaCas9_1'] = PAM_pos\n",
"\n",
" PAM_pos = []\n",
" for m in re.finditer(r\"[ACGT][AG][AG]G[ACGT]\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['SaCas9_2'] = PAM_pos\n",
" \n",
" PAM_pos = []\n",
" for m in re.finditer(r\"CA[CT][AG][ACGT][ACGT][ACGT][ACGT]\", genome):\n",
" PAM_pos.append(m.start())\n",
" PAM_positions['CjCas9'] = PAM_pos\n",
"\n",
" \n",
" return PAM_positions"
]
},
{
"cell_type": "code",
"execution_count": 433,
"metadata": {},
"outputs": [],
"source": [
"burgdorferi_PAM_rev_pos = PAM_rev_search(burgdorferi_genome)\n",
"burgdorferi_PAM_pos = PAM_search(burgdorferi_genome)\n",
"\n",
"bavariensis_PAM_rev_pos = PAM_rev_search(bavariensis_genome)\n",
"bavariensis_PAM_pos = PAM_search(bavariensis_genome)\n",
"\n",
"afzelii_PAM_rev_pos = PAM_rev_search(afzelii_genome)\n",
"afzelii_PAM_pos = PAM_search(afzelii_genome)\n",
"\n",
"garenii_PAM_rev_pos = PAM_rev_search(garenii_genome)\n",
"garenii_PAM_pos = PAM_search(garenii_genome)"
]
},
{
"cell_type": "code",
"execution_count": 436,
"metadata": {},
"outputs": [],
"source": [
"#to chose cas proteins, len first pam and len between 2 pams (here 24 for SpCas9)\n",
"def pam_pairs(PAM_pos, PAM_rev_pos, pos_cas, rev_cas): \n",
" PAM_pos_y = []\n",
"\n",
" for el in PAM_pos[pos_cas]:\n",
" PAM_pos_y.append(el + 24)\n",
" \n",
" PAM_pairs = {}\n",
" for el in PAM_pos_y:\n",
" if el in set(PAM_rev_pos[rev_cas]):\n",
" PAM_pairs[el-24] = el\n",
" \n",
" return PAM_pairs"
]
},
{
"cell_type": "code",
"execution_count": 437,
"metadata": {},
"outputs": [],
"source": [
"burgdorferi_pam_pairs=pam_pairs(burgdorferi_PAM_pos, burgdorferi_PAM_rev_pos, 'SpCas9', 'SpCas9')"
]
},
{
"cell_type": "code",
"execution_count": 438,
"metadata": {},
"outputs": [],
"source": [
"bavariensis_pam_pairs=pam_pairs(bavariensis_PAM_pos, bavariensis_PAM_rev_pos, 'SpCas9', 'SpCas9')\n",
"afzelii_pam_pairs=pam_pairs(afzelii_PAM_pos, afzelii_PAM_rev_pos, 'SpCas9', 'SpCas9')\n",
"garenii_pam_pairs=pam_pairs(garenii_PAM_pos, garenii_PAM_rev_pos, 'SpCas9', 'SpCas9')"
]
},
{
"cell_type": "code",
"execution_count": 276,
"metadata": {},
"outputs": [],
"source": [
"# 1 target + pam\n",
"burgdorferi_pam_targets_pairs_seq={}\n",
"for i in burgdorferi_pam_pairs.keys():\n",
" burgdorferi_pam_targets_pairs_seq.update({i:str(burgdorferi_genome[i-20:i+3])})\n",
" \n",
"bavariensis_pam_targets_pairs_seq={}\n",
"for i in bavariensis_pam_pairs.keys():\n",
" bavariensis_pam_targets_pairs_seq.update({i:str(bavariensis_genome[i-20:i+3])})\n",
" \n",
"afzelii_pam_targets_pairs_seq={}\n",
"for i in afzelii_pam_pairs.keys():\n",
" afzelii_pam_targets_pairs_seq.update({i:str(afzelii_genome[i-20:i+3])})\n",
" \n",
"garenii_pam_targets_pairs_seq={}\n",
"for i in garenii_pam_pairs.keys():\n",
" garenii_pam_targets_pairs_seq.update({i:str(garenii_genome[i-20:i+3])})"
]
},
{
"cell_type": "code",
"execution_count": 305,
"metadata": {},
"outputs": [],
"source": [
"# second target + pam\n",
"burgdorferi_pam_targets_pairs_seq_rev={}\n",
"for i in burgdorferi_pam_pairs.values():\n",
" burgdorferi_pam_targets_pairs_seq_rev.update({i: str(burgdorferi_genome[i:i+23])})\n",
" \n",
"bavariensis_pam_targets_pairs_seq_rev={}\n",
"for i in bavariensis_pam_pairs.values():\n",
" bavariensis_pam_targets_pairs_seq_rev.update({i:str(bavariensis_genome[i:i+23])})\n",
" \n",
"afzelii_pam_targets_pairs_seq_rev={}\n",
"for i in afzelii_pam_pairs.values():\n",
" afzelii_pam_targets_pairs_seq_rev.update({i:str(afzelii_genome[i:i+23])})\n",
" \n",
"garenii_pam_targets_pairs_seq_rev={}\n",
"for i in garenii_pam_pairs.values():\n",
" garenii_pam_targets_pairs_seq_rev.update({i: str(garenii_genome[i:i+23])})\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"def sequence_compare(seq_a, seq_b):\n",
" len1= len(seq_a)\n",
" len2= len(seq_b)\n",
" matches = 0\n",
" for pos in range (0,min(len1,len2)) :\n",
" if seq_a[pos] != seq_b[pos]:\n",
" matches+=0\n",
" else:\n",
" matches+=1\n",
" return matches"
]
},
{
"cell_type": "code",
"execution_count": 408,
"metadata": {},
"outputs": [],
"source": [
"# to search condervative sequnces (target1-pam1) among 4 borrelias, \n",
"# threshold is a number of same nucleotides in same positions\n",
"\n",
"thres=22\n",
"\n",
"common_seq_dict={}\n",
"common_seq=[]\n",
"n=0\n",
"for i in set(burgdorferi_pam_targets_pairs_seq.values()):\n",
" for k in set(bavariensis_pam_targets_pairs_seq.values()):\n",
" if sequence_compare(i,k) > thres:\n",
" for l in set(afzelii_pam_targets_pairs_seq.values()):\n",
" if sequence_compare(i,l) > thres:\n",
" for m in set(garenii_pam_targets_pairs_seq.values()):\n",
" if sequence_compare(i,m) > thres:\n",
" a= [key for (key, value) in burgdorferi_pam_targets_pairs_seq.items() if value == i]\n",
" b= [key for (key, value) in bavariensis_pam_targets_pairs_seq.items() if value == k]\n",
" c= [key for (key, value) in afzelii_pam_targets_pairs_seq.items() if value == l]\n",
" d= [key for (key, value) in garenii_pam_targets_pairs_seq.items() if value == m]\n",
" common_seq_dict.update({n: [a , b, c, d]})\n",
" n=n+1\n"
]
},
{
"cell_type": "code",
"execution_count": 409,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
" 10 | \n",
" 11 | \n",
" 12 | \n",
" 13 | \n",
" 14 | \n",
" 15 | \n",
" 16 | \n",
" 17 | \n",
" 18 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" [134376] | \n",
" [378587] | \n",
" [484274] | \n",
" [444913] | \n",
" [628385] | \n",
" [443638] | \n",
" [441401, 438156] | \n",
" [438906, 435661] | \n",
" [535898] | \n",
" [445778] | \n",
" [417622] | \n",
" [345887] | \n",
" [19925] | \n",
" [481105] | \n",
" [789106] | \n",
" [436784, 440029] | \n",
" [331095] | \n",
" [871669] | \n",
" [444366] | \n",
"
\n",
" \n",
" 1 | \n",
" [134197] | \n",
" [377346] | \n",
" [486625] | \n",
" [447105] | \n",
" [630325] | \n",
" [444930] | \n",
" [438801, 442045] | \n",
" [436306, 439550] | \n",
" [538085] | \n",
" [447970] | \n",
" [418254] | \n",
" [344628] | \n",
" [19773] | \n",
" [483428] | \n",
" [791106] | \n",
" [437429, 440673] | \n",
" [329902] | \n",
" [873604] | \n",
" [446555] | \n",
"
\n",
" \n",
" 2 | \n",
" [134240] | \n",
" [377159] | \n",
" [486357] | \n",
" [446825] | \n",
" [630240] | \n",
" [444150] | \n",
" [438413, 441649] | \n",
" [435918, 439154] | \n",
" [537885] | \n",
" [447690] | \n",
" [417870] | \n",
" [344443] | \n",
" [19926] | \n",
" [483120] | \n",
" [790823] | \n",
" [440277, 437041] | \n",
" [329753] | \n",
" [873497] | \n",
" [446273] | \n",
"
\n",
" \n",
" 3 | \n",
" [134244] | \n",
" [377626] | \n",
" [483767] | \n",
" [444253] | \n",
" [627939] | \n",
" [441997] | \n",
" [439103] | \n",
" [436608] | \n",
" [535613] | \n",
" [445118] | \n",
" [418542] | \n",
" [344906] | \n",
" [19810] | \n",
" [480567] | \n",
" [788736] | \n",
" [437731] | \n",
" [330186] | \n",
" [870992] | \n",
" [443703] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 \\\n",
"0 [134376] [378587] [484274] [444913] [628385] [443638] \n",
"1 [134197] [377346] [486625] [447105] [630325] [444930] \n",
"2 [134240] [377159] [486357] [446825] [630240] [444150] \n",
"3 [134244] [377626] [483767] [444253] [627939] [441997] \n",
"\n",
" 6 7 8 9 10 11 \\\n",
"0 [441401, 438156] [438906, 435661] [535898] [445778] [417622] [345887] \n",
"1 [438801, 442045] [436306, 439550] [538085] [447970] [418254] [344628] \n",
"2 [438413, 441649] [435918, 439154] [537885] [447690] [417870] [344443] \n",
"3 [439103] [436608] [535613] [445118] [418542] [344906] \n",
"\n",
" 12 13 14 15 16 17 18 \n",
"0 [19925] [481105] [789106] [436784, 440029] [331095] [871669] [444366] \n",
"1 [19773] [483428] [791106] [437429, 440673] [329902] [873604] [446555] \n",
"2 [19926] [483120] [790823] [440277, 437041] [329753] [873497] [446273] \n",
"3 [19810] [480567] [788736] [437731] [330186] [870992] [443703] "
]
},
"execution_count": 409,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_seq_df=pd.DataFrame.from_dict(common_seq_dict)\n",
"common_seq_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- I chose only the first start of same positions in one borrelia"
]
},
{
"cell_type": "code",
"execution_count": 410,
"metadata": {},
"outputs": [],
"source": [
"for index, column in common_seq_df.iteritems():\n",
" common_seq_df[index][0]=common_seq_df[index][0][0]\n",
" common_seq_df[index][1]=common_seq_df[index][1][0]\n",
" common_seq_df[index][2]=common_seq_df[index][2][0]\n",
" common_seq_df[index][3]=common_seq_df[index][3][0]"
]
},
{
"cell_type": "code",
"execution_count": 411,
"metadata": {},
"outputs": [],
"source": [
"common_seq_rev_dict={}\n",
"#24 is len of pam1 and len between 2 pams\n",
"\n",
"for index, column in common_seq_df.iteritems():\n",
" bur=burgdorferi_pam_targets_pairs_seq_rev[common_seq_df[index][0]+24]\n",
" bav=bavariensis_pam_targets_pairs_seq_rev[common_seq_df[index][1]+24]\n",
" afz=afzelii_pam_targets_pairs_seq_rev[common_seq_df[index][2]+24]\n",
" gar=garenii_pam_targets_pairs_seq_rev[common_seq_df[index][3]+24]\n",
" \n",
" thres=22\n",
" if sequence_compare(bur,bav) > thres:\n",
" if sequence_compare(bur,afz) > thres:\n",
" if sequence_compare(bur,gar) > thres:\n",
" common_seq_rev_dict.update({index: [common_seq_df[index][0]+24, common_seq_df[index][1]+24,\\\n",
" common_seq_df[index][2]+24, common_seq_df[index][3]+24]})\n",
"\n",
"\n",
"common_seq_rev_df=pd.DataFrame.from_dict(common_seq_rev_dict)\n"
]
},
{
"cell_type": "code",
"execution_count": 412,
"metadata": {},
"outputs": [],
"source": [
"common_seq_rev_df=common_seq_rev_df.T\n",
"common_seq_df=common_seq_df.T"
]
},
{
"cell_type": "code",
"execution_count": 413,
"metadata": {},
"outputs": [],
"source": [
"common_seq_df.columns=['bur_1', 'bav_1', 'afz_1', 'gar_1']\n",
"common_seq_rev_df.columns=['bur_2', 'bav_2', 'afz_2', 'gar_2']"
]
},
{
"cell_type": "code",
"execution_count": 414,
"metadata": {},
"outputs": [],
"source": [
"common_seq_all=pd.merge(common_seq_df, common_seq_rev_df, how='outer', left_index=True, right_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 420,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" bur_1 | \n",
" bav_1 | \n",
" afz_1 | \n",
" gar_1 | \n",
" bur_2 | \n",
" bav_2 | \n",
" afz_2 | \n",
" gar_2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" 444913 | \n",
" 447105 | \n",
" 446825 | \n",
" 444253 | \n",
" 444937.0 | \n",
" 447129.0 | \n",
" 446849.0 | \n",
" 444277.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 443638 | \n",
" 444930 | \n",
" 444150 | \n",
" 441997 | \n",
" 443662.0 | \n",
" 444954.0 | \n",
" 444174.0 | \n",
" 442021.0 | \n",
"
\n",
" \n",
" 6 | \n",
" 441401 | \n",
" 438801 | \n",
" 438413 | \n",
" 439103 | \n",
" 441425.0 | \n",
" 438825.0 | \n",
" 438437.0 | \n",
" 439127.0 | \n",
"
\n",
" \n",
" 7 | \n",
" 438906 | \n",
" 436306 | \n",
" 435918 | \n",
" 436608 | \n",
" 438930.0 | \n",
" 436330.0 | \n",
" 435942.0 | \n",
" 436632.0 | \n",
"
\n",
" \n",
" 9 | \n",
" 445778 | \n",
" 447970 | \n",
" 447690 | \n",
" 445118 | \n",
" 445802.0 | \n",
" 447994.0 | \n",
" 447714.0 | \n",
" 445142.0 | \n",
"
\n",
" \n",
" 10 | \n",
" 417622 | \n",
" 418254 | \n",
" 417870 | \n",
" 418542 | \n",
" 417646.0 | \n",
" 418278.0 | \n",
" 417894.0 | \n",
" 418566.0 | \n",
"
\n",
" \n",
" 14 | \n",
" 789106 | \n",
" 791106 | \n",
" 790823 | \n",
" 788736 | \n",
" 789130.0 | \n",
" 791130.0 | \n",
" 790847.0 | \n",
" 788760.0 | \n",
"
\n",
" \n",
" 15 | \n",
" 436784 | \n",
" 437429 | \n",
" 440277 | \n",
" 437731 | \n",
" 436808.0 | \n",
" 437453.0 | \n",
" 440301.0 | \n",
" 437755.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" bur_1 bav_1 afz_1 gar_1 bur_2 bav_2 afz_2 gar_2\n",
"3 444913 447105 446825 444253 444937.0 447129.0 446849.0 444277.0\n",
"5 443638 444930 444150 441997 443662.0 444954.0 444174.0 442021.0\n",
"6 441401 438801 438413 439103 441425.0 438825.0 438437.0 439127.0\n",
"7 438906 436306 435918 436608 438930.0 436330.0 435942.0 436632.0\n",
"9 445778 447970 447690 445118 445802.0 447994.0 447714.0 445142.0\n",
"10 417622 418254 417870 418542 417646.0 418278.0 417894.0 418566.0\n",
"14 789106 791106 790823 788736 789130.0 791130.0 790847.0 788760.0\n",
"15 436784 437429 440277 437731 436808.0 437453.0 440301.0 437755.0"
]
},
"execution_count": 420,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_seq_all.loc[~common_seq_all['bur_2'].isna()]"
]
},
{
"cell_type": "code",
"execution_count": 421,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Seq('CGTGTGTAGCCCAGGACATAAGG', SingleLetterAlphabet())"
]
},
"execution_count": 421,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"burgdorferi_genome[444913-20:444913+3]"
]
},
{
"cell_type": "code",
"execution_count": 422,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Seq('CGTGTGTAGCCCAGGACATAAGG', SingleLetterAlphabet())"
]
},
"execution_count": 422,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bavariensis_genome[447105-20:447105+3]"
]
},
{
"cell_type": "code",
"execution_count": 424,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Seq('CCTCACCTTCCTCCGACTTATCA', SingleLetterAlphabet())"
]
},
"execution_count": 424,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"burgdorferi_genome[444937:444937+23]"
]
},
{
"cell_type": "code",
"execution_count": 425,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Seq('CCTCACCTTCCTCCGACTTATCA', SingleLetterAlphabet())"
]
},
"execution_count": 425,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bavariensis_genome[447129:447129+23]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MISC"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"#create df_PAM; 1 - Watson, 0 - Crick\n",
"\n",
"import pandas as pd\n",
"df_PAM = pd.DataFrame({\"PAM_pos\" : PAM_SpCas9_pos, \"Strand\" : 0})"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"#create df_PAM_rev; 1 - Watson, 0 - Crick\n",
"\n",
"df_PAM_rev = pd.DataFrame({\"PAM_pos\" : PAM_SpCas9_rev_pos, \"Strand\" : 1})"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"df_all_PAMs = pd.concat([df_PAM, df_PAM_rev])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"handle = Entrez.esearch(db=\"probe\", term=\"borrelia burgdorferi\", retmode=\"text\", retmax = 220)\n",
"\n",
"record = Entrez.read(handle)\n",
"#print(record[\"IdList\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"records = record['IdList']\n",
"#print(records)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[1;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[0;32m 378\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Python 2.7, use buffering of HTTP responses\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 379\u001b[1;33m \u001b[0mhttplib_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbuffering\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 380\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mTypeError\u001b[0m: getresponse() got an unexpected keyword argument 'buffering'",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mid_dbprobe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mlink\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'https://www.ncbi.nlm.nih.gov/probe/'\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mid_dbprobe\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlink\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mre\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfindall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mr'class=\"breakTxt\">[ACTG]+'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\api.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(url, params, **kwargs)\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'allow_redirects'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 72\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'get'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 73\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\api.py\u001b[0m in \u001b[0;36mrequest\u001b[1;34m(method, url, **kwargs)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[1;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 58\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[0;32m 516\u001b[0m }\n\u001b[0;32m 517\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 518\u001b[1;33m \u001b[0mresp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 519\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\sessions.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m 637\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 638\u001b[0m \u001b[1;31m# Send the request\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 639\u001b[1;33m \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 640\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 641\u001b[0m \u001b[1;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\adapters.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 436\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[0mretries\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 438\u001b[1;33m \u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 439\u001b[0m )\n\u001b[0;32m 440\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[0;32m 598\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 599\u001b[0m \u001b[0mbody\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 600\u001b[1;33m chunked=chunked)\n\u001b[0m\u001b[0;32m 601\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 602\u001b[0m \u001b[1;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[1;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[0;32m 380\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 381\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 382\u001b[1;33m \u001b[0mhttplib_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 383\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 384\u001b[0m \u001b[1;31m# Remove the TypeError from the exception chain in Python 3;\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1196\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1197\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1198\u001b[1;33m \u001b[0mresponse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1199\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1200\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36mbegin\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 295\u001b[0m \u001b[1;31m# read until we get a non-100 response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 296\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 297\u001b[1;33m \u001b[0mversion\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 298\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 299\u001b[0m \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36m_read_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 256\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 257\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 258\u001b[1;33m \u001b[0mline\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"iso-8859-1\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 259\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mline\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"status line\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[1;34m(self, b)\u001b[0m\n\u001b[0;32m 574\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 575\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 576\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 577\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 578\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\contrib\\pyopenssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 277\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnection\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 278\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mOpenSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSysCallError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 279\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margs\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'Unexpected EOF'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\OpenSSL\\SSL.py\u001b[0m in \u001b[0;36mrecv_into\u001b[1;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[0;32m 1332\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL_peek\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1333\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1334\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1335\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_raise_ssl_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1336\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"import requests\n",
"\n",
"probes = []\n",
"for i in records:\n",
" id_dbprobe = i\n",
" link = 'https://www.ncbi.nlm.nih.gov/probe/' + id_dbprobe\n",
" f = requests.get(link)\n",
" result = f.text\n",
" res = re.findall(r'class=\"breakTxt\">[ACTG]+', result)\n",
" for el in res:\n",
" probes.append(el[17:])\n",
"\n",
"print(len(probes))\n",
"#print(probes) "
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"f = open(\"BB_markers.txt\", \"a\")\n",
"for k in probes:\n",
" a = str(k)+'\\n'\n",
" f.write(a)\n",
"f.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#number of PAM for different Cas9 proteins in B.burgdorferi genome\n",
"\n",
"Bburgdorferi_PAM_cnt = {}\n",
"for k in Bburgdorferi_PAM.keys():\n",
" Bburgdorferi_PAM_cnt[k] = len(Bburgdorferi_PAM.get(k))\n",
" \n",
"print(Bburgdorferi_PAM_cnt)\n",
"\n",
"\n",
"#histogram - number of PAM for different Cas9 proteins in B.burgdorferi genome\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.bar(list(Bburgdorferi_PAM_cnt.keys()), Bburgdorferi_PAM_cnt.values())\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"burgdorferi_PAM_rev_pos = burgdorferi_PAM_rev_pos[\"SpCas9\"]\n",
"burgdorferi_PAM_pos = burgdorferi_PAM_pos[\"SpCas9\"]\n",
"\n",
"bavariensis_PAM_rev_pos = bavariensis_PAM_rev_pos[\"SpCas9\"]\n",
"bavariensis_PAM_pos = bavariensis_PAM_pos[\"SpCas9\"]\n",
"\n",
"afzelii_PAM_rev_pos = afzelii_PAM_rev_pos[\"SpCas9\"]\n",
"afzelii_PAM_pos = afzelii_PAM_pos[\"SpCas9\"]\n",
"\n",
"garenii_PAM_rev_pos = garenii_PAM_rev_pos[\"SpCas9\"]\n",
"garenii_PAM_pos = garenii_PAM_pos[\"SpCas9\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"thres=22\n",
"\n",
"common_seq=[]\n",
"for i in set(burgdorferi_pam_targets_pairs_seq.values()):\n",
" for k in set(bavariensis_pam_targets_pairs_seq.values()):\n",
" if sequence_compare(i,k) > thres:\n",
" common_seq.append(i)\n",
"\n",
"common_seq_2=[]\n",
"for i in common_seq:\n",
" for k in set(afzelii_pam_targets_pairs_seq.values()):\n",
" if sequence_compare(i,k) > thres:\n",
" common_seq_2.append(i)\n",
" \n",
"common_seq=[]\n",
"for i in common_seq_2:\n",
" for k in set(garenii_pam_targets_pairs_seq.values()):\n",
" if sequence_compare(i,k) > thres:\n",
" common_seq.append(i)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}