{ "cells": [ { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [], "source": [ "from Bio import Entrez\n", "from Bio import SeqIO\n", "from Bio.Seq import Seq\n", "from Bio.SeqRecord import SeqRecord\n", "import re\n", "import random\n", "import pandas as pd\n", "\n", "Entrez.email = 'sasha.grrshnova98@gmail.com'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#get B.burgdorferi genome\n", "\n", "burgdorferi_genome = id_search('NC_001318.1')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "bavariensis_genome=id_search('NC_006156.1')\n", "afzelii_genome=id_search('NC_018887.1')\n", "garenii_genome=id_search('NC_018747.1')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "borrelia=['burgdorferi', 'bavariensis', 'afzelii', 'garenii']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def PAM_search(seq):\n", " \n", " genome = str(seq)\n", "\n", " PAM_positions = {}\n", "\n", " PAM_pos = []\n", " \n", " for m in re.finditer(r\"[ACGT]GG\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['SpCas9'] = PAM_pos\n", " \n", " PAM_pos = []\n", " for m in re.finditer(r\"[ACGT]G[AG][AG]T\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['SaCas9_1'] = PAM_pos\n", "\n", " PAM_pos = []\n", " for m in re.finditer(r\"G[ACGT]G[AG][AG][ACTG]\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['SaCas9_2'] = PAM_pos\n", " \n", " PAM_pos = []\n", " for m in re.finditer(r\"[ACGT][ACGT][ACGT][ACGT][AG][CT]AC\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['CjCas9'] = PAM_pos\n", "\n", " \n", " return PAM_positions" ] }, { "cell_type": "code", "execution_count": 430, "metadata": {}, "outputs": [], "source": [ "#to search start inverted pam in complement DNA\n", "def PAM_rev_search(seq):\n", " \n", " genome = str(seq.complement())\n", "\n", " PAM_positions = {}\n", "\n", " PAM_pos = []\n", " \n", " for m in re.finditer(r\"GG[ACGT]\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['SpCas9'] = PAM_pos\n", " \n", " PAM_pos = []\n", " for m in re.finditer(r\"T[AG][AG]G[ACGT]\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['SaCas9_1'] = PAM_pos\n", "\n", " PAM_pos = []\n", " for m in re.finditer(r\"[ACGT][AG][AG]G[ACGT]\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['SaCas9_2'] = PAM_pos\n", " \n", " PAM_pos = []\n", " for m in re.finditer(r\"CA[CT][AG][ACGT][ACGT][ACGT][ACGT]\", genome):\n", " PAM_pos.append(m.start())\n", " PAM_positions['CjCas9'] = PAM_pos\n", "\n", " \n", " return PAM_positions" ] }, { "cell_type": "code", "execution_count": 433, "metadata": {}, "outputs": [], "source": [ "burgdorferi_PAM_rev_pos = PAM_rev_search(burgdorferi_genome)\n", "burgdorferi_PAM_pos = PAM_search(burgdorferi_genome)\n", "\n", "bavariensis_PAM_rev_pos = PAM_rev_search(bavariensis_genome)\n", "bavariensis_PAM_pos = PAM_search(bavariensis_genome)\n", "\n", "afzelii_PAM_rev_pos = PAM_rev_search(afzelii_genome)\n", "afzelii_PAM_pos = PAM_search(afzelii_genome)\n", "\n", "garenii_PAM_rev_pos = PAM_rev_search(garenii_genome)\n", "garenii_PAM_pos = PAM_search(garenii_genome)" ] }, { "cell_type": "code", "execution_count": 436, "metadata": {}, "outputs": [], "source": [ "#to chose cas proteins, len first pam and len between 2 pams (here 24 for SpCas9)\n", "def pam_pairs(PAM_pos, PAM_rev_pos, pos_cas, rev_cas): \n", " PAM_pos_y = []\n", "\n", " for el in PAM_pos[pos_cas]:\n", " PAM_pos_y.append(el + 24)\n", " \n", " PAM_pairs = {}\n", " for el in PAM_pos_y:\n", " if el in set(PAM_rev_pos[rev_cas]):\n", " PAM_pairs[el-24] = el\n", " \n", " return PAM_pairs" ] }, { "cell_type": "code", "execution_count": 437, "metadata": {}, "outputs": [], "source": [ "burgdorferi_pam_pairs=pam_pairs(burgdorferi_PAM_pos, burgdorferi_PAM_rev_pos, 'SpCas9', 'SpCas9')" ] }, { "cell_type": "code", "execution_count": 438, "metadata": {}, "outputs": [], "source": [ "bavariensis_pam_pairs=pam_pairs(bavariensis_PAM_pos, bavariensis_PAM_rev_pos, 'SpCas9', 'SpCas9')\n", "afzelii_pam_pairs=pam_pairs(afzelii_PAM_pos, afzelii_PAM_rev_pos, 'SpCas9', 'SpCas9')\n", "garenii_pam_pairs=pam_pairs(garenii_PAM_pos, garenii_PAM_rev_pos, 'SpCas9', 'SpCas9')" ] }, { "cell_type": "code", "execution_count": 276, "metadata": {}, "outputs": [], "source": [ "# 1 target + pam\n", "burgdorferi_pam_targets_pairs_seq={}\n", "for i in burgdorferi_pam_pairs.keys():\n", " burgdorferi_pam_targets_pairs_seq.update({i:str(burgdorferi_genome[i-20:i+3])})\n", " \n", "bavariensis_pam_targets_pairs_seq={}\n", "for i in bavariensis_pam_pairs.keys():\n", " bavariensis_pam_targets_pairs_seq.update({i:str(bavariensis_genome[i-20:i+3])})\n", " \n", "afzelii_pam_targets_pairs_seq={}\n", "for i in afzelii_pam_pairs.keys():\n", " afzelii_pam_targets_pairs_seq.update({i:str(afzelii_genome[i-20:i+3])})\n", " \n", "garenii_pam_targets_pairs_seq={}\n", "for i in garenii_pam_pairs.keys():\n", " garenii_pam_targets_pairs_seq.update({i:str(garenii_genome[i-20:i+3])})" ] }, { "cell_type": "code", "execution_count": 305, "metadata": {}, "outputs": [], "source": [ "# second target + pam\n", "burgdorferi_pam_targets_pairs_seq_rev={}\n", "for i in burgdorferi_pam_pairs.values():\n", " burgdorferi_pam_targets_pairs_seq_rev.update({i: str(burgdorferi_genome[i:i+23])})\n", " \n", "bavariensis_pam_targets_pairs_seq_rev={}\n", "for i in bavariensis_pam_pairs.values():\n", " bavariensis_pam_targets_pairs_seq_rev.update({i:str(bavariensis_genome[i:i+23])})\n", " \n", "afzelii_pam_targets_pairs_seq_rev={}\n", "for i in afzelii_pam_pairs.values():\n", " afzelii_pam_targets_pairs_seq_rev.update({i:str(afzelii_genome[i:i+23])})\n", " \n", "garenii_pam_targets_pairs_seq_rev={}\n", "for i in garenii_pam_pairs.values():\n", " garenii_pam_targets_pairs_seq_rev.update({i: str(garenii_genome[i:i+23])})\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "def sequence_compare(seq_a, seq_b):\n", " len1= len(seq_a)\n", " len2= len(seq_b)\n", " matches = 0\n", " for pos in range (0,min(len1,len2)) :\n", " if seq_a[pos] != seq_b[pos]:\n", " matches+=0\n", " else:\n", " matches+=1\n", " return matches" ] }, { "cell_type": "code", "execution_count": 408, "metadata": {}, "outputs": [], "source": [ "# to search condervative sequnces (target1-pam1) among 4 borrelias, \n", "# threshold is a number of same nucleotides in same positions\n", "\n", "thres=22\n", "\n", "common_seq_dict={}\n", "common_seq=[]\n", "n=0\n", "for i in set(burgdorferi_pam_targets_pairs_seq.values()):\n", " for k in set(bavariensis_pam_targets_pairs_seq.values()):\n", " if sequence_compare(i,k) > thres:\n", " for l in set(afzelii_pam_targets_pairs_seq.values()):\n", " if sequence_compare(i,l) > thres:\n", " for m in set(garenii_pam_targets_pairs_seq.values()):\n", " if sequence_compare(i,m) > thres:\n", " a= [key for (key, value) in burgdorferi_pam_targets_pairs_seq.items() if value == i]\n", " b= [key for (key, value) in bavariensis_pam_targets_pairs_seq.items() if value == k]\n", " c= [key for (key, value) in afzelii_pam_targets_pairs_seq.items() if value == l]\n", " d= [key for (key, value) in garenii_pam_targets_pairs_seq.items() if value == m]\n", " common_seq_dict.update({n: [a , b, c, d]})\n", " n=n+1\n" ] }, { "cell_type": "code", "execution_count": 409, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112131415161718
0[134376][378587][484274][444913][628385][443638][441401, 438156][438906, 435661][535898][445778][417622][345887][19925][481105][789106][436784, 440029][331095][871669][444366]
1[134197][377346][486625][447105][630325][444930][438801, 442045][436306, 439550][538085][447970][418254][344628][19773][483428][791106][437429, 440673][329902][873604][446555]
2[134240][377159][486357][446825][630240][444150][438413, 441649][435918, 439154][537885][447690][417870][344443][19926][483120][790823][440277, 437041][329753][873497][446273]
3[134244][377626][483767][444253][627939][441997][439103][436608][535613][445118][418542][344906][19810][480567][788736][437731][330186][870992][443703]
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 \\\n", "0 [134376] [378587] [484274] [444913] [628385] [443638] \n", "1 [134197] [377346] [486625] [447105] [630325] [444930] \n", "2 [134240] [377159] [486357] [446825] [630240] [444150] \n", "3 [134244] [377626] [483767] [444253] [627939] [441997] \n", "\n", " 6 7 8 9 10 11 \\\n", "0 [441401, 438156] [438906, 435661] [535898] [445778] [417622] [345887] \n", "1 [438801, 442045] [436306, 439550] [538085] [447970] [418254] [344628] \n", "2 [438413, 441649] [435918, 439154] [537885] [447690] [417870] [344443] \n", "3 [439103] [436608] [535613] [445118] [418542] [344906] \n", "\n", " 12 13 14 15 16 17 18 \n", "0 [19925] [481105] [789106] [436784, 440029] [331095] [871669] [444366] \n", "1 [19773] [483428] [791106] [437429, 440673] [329902] [873604] [446555] \n", "2 [19926] [483120] [790823] [440277, 437041] [329753] [873497] [446273] \n", "3 [19810] [480567] [788736] [437731] [330186] [870992] [443703] " ] }, "execution_count": 409, "metadata": {}, "output_type": "execute_result" } ], "source": [ "common_seq_df=pd.DataFrame.from_dict(common_seq_dict)\n", "common_seq_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- I chose only the first start of same positions in one borrelia" ] }, { "cell_type": "code", "execution_count": 410, "metadata": {}, "outputs": [], "source": [ "for index, column in common_seq_df.iteritems():\n", " common_seq_df[index][0]=common_seq_df[index][0][0]\n", " common_seq_df[index][1]=common_seq_df[index][1][0]\n", " common_seq_df[index][2]=common_seq_df[index][2][0]\n", " common_seq_df[index][3]=common_seq_df[index][3][0]" ] }, { "cell_type": "code", "execution_count": 411, "metadata": {}, "outputs": [], "source": [ "common_seq_rev_dict={}\n", "#24 is len of pam1 and len between 2 pams\n", "\n", "for index, column in common_seq_df.iteritems():\n", " bur=burgdorferi_pam_targets_pairs_seq_rev[common_seq_df[index][0]+24]\n", " bav=bavariensis_pam_targets_pairs_seq_rev[common_seq_df[index][1]+24]\n", " afz=afzelii_pam_targets_pairs_seq_rev[common_seq_df[index][2]+24]\n", " gar=garenii_pam_targets_pairs_seq_rev[common_seq_df[index][3]+24]\n", " \n", " thres=22\n", " if sequence_compare(bur,bav) > thres:\n", " if sequence_compare(bur,afz) > thres:\n", " if sequence_compare(bur,gar) > thres:\n", " common_seq_rev_dict.update({index: [common_seq_df[index][0]+24, common_seq_df[index][1]+24,\\\n", " common_seq_df[index][2]+24, common_seq_df[index][3]+24]})\n", "\n", "\n", "common_seq_rev_df=pd.DataFrame.from_dict(common_seq_rev_dict)\n" ] }, { "cell_type": "code", "execution_count": 412, "metadata": {}, "outputs": [], "source": [ "common_seq_rev_df=common_seq_rev_df.T\n", "common_seq_df=common_seq_df.T" ] }, { "cell_type": "code", "execution_count": 413, "metadata": {}, "outputs": [], "source": [ "common_seq_df.columns=['bur_1', 'bav_1', 'afz_1', 'gar_1']\n", "common_seq_rev_df.columns=['bur_2', 'bav_2', 'afz_2', 'gar_2']" ] }, { "cell_type": "code", "execution_count": 414, "metadata": {}, "outputs": [], "source": [ "common_seq_all=pd.merge(common_seq_df, common_seq_rev_df, how='outer', left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": 420, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bur_1bav_1afz_1gar_1bur_2bav_2afz_2gar_2
3444913447105446825444253444937.0447129.0446849.0444277.0
5443638444930444150441997443662.0444954.0444174.0442021.0
6441401438801438413439103441425.0438825.0438437.0439127.0
7438906436306435918436608438930.0436330.0435942.0436632.0
9445778447970447690445118445802.0447994.0447714.0445142.0
10417622418254417870418542417646.0418278.0417894.0418566.0
14789106791106790823788736789130.0791130.0790847.0788760.0
15436784437429440277437731436808.0437453.0440301.0437755.0
\n", "
" ], "text/plain": [ " bur_1 bav_1 afz_1 gar_1 bur_2 bav_2 afz_2 gar_2\n", "3 444913 447105 446825 444253 444937.0 447129.0 446849.0 444277.0\n", "5 443638 444930 444150 441997 443662.0 444954.0 444174.0 442021.0\n", "6 441401 438801 438413 439103 441425.0 438825.0 438437.0 439127.0\n", "7 438906 436306 435918 436608 438930.0 436330.0 435942.0 436632.0\n", "9 445778 447970 447690 445118 445802.0 447994.0 447714.0 445142.0\n", "10 417622 418254 417870 418542 417646.0 418278.0 417894.0 418566.0\n", "14 789106 791106 790823 788736 789130.0 791130.0 790847.0 788760.0\n", "15 436784 437429 440277 437731 436808.0 437453.0 440301.0 437755.0" ] }, "execution_count": 420, "metadata": {}, "output_type": "execute_result" } ], "source": [ "common_seq_all.loc[~common_seq_all['bur_2'].isna()]" ] }, { "cell_type": "code", "execution_count": 421, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Seq('CGTGTGTAGCCCAGGACATAAGG', SingleLetterAlphabet())" ] }, "execution_count": 421, "metadata": {}, "output_type": "execute_result" } ], "source": [ "burgdorferi_genome[444913-20:444913+3]" ] }, { "cell_type": "code", "execution_count": 422, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Seq('CGTGTGTAGCCCAGGACATAAGG', SingleLetterAlphabet())" ] }, "execution_count": 422, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bavariensis_genome[447105-20:447105+3]" ] }, { "cell_type": "code", "execution_count": 424, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Seq('CCTCACCTTCCTCCGACTTATCA', SingleLetterAlphabet())" ] }, "execution_count": 424, "metadata": {}, "output_type": "execute_result" } ], "source": [ "burgdorferi_genome[444937:444937+23]" ] }, { "cell_type": "code", "execution_count": 425, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Seq('CCTCACCTTCCTCCGACTTATCA', SingleLetterAlphabet())" ] }, "execution_count": 425, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bavariensis_genome[447129:447129+23]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# MISC" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "#create df_PAM; 1 - Watson, 0 - Crick\n", "\n", "import pandas as pd\n", "df_PAM = pd.DataFrame({\"PAM_pos\" : PAM_SpCas9_pos, \"Strand\" : 0})" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "#create df_PAM_rev; 1 - Watson, 0 - Crick\n", "\n", "df_PAM_rev = pd.DataFrame({\"PAM_pos\" : PAM_SpCas9_rev_pos, \"Strand\" : 1})" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "df_all_PAMs = pd.concat([df_PAM, df_PAM_rev])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "handle = Entrez.esearch(db=\"probe\", term=\"borrelia burgdorferi\", retmode=\"text\", retmax = 220)\n", "\n", "record = Entrez.read(handle)\n", "#print(record[\"IdList\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "records = record['IdList']\n", "#print(records)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[1;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[0;32m 378\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Python 2.7, use buffering of HTTP responses\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 379\u001b[1;33m \u001b[0mhttplib_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbuffering\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 380\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mTypeError\u001b[0m: getresponse() got an unexpected keyword argument 'buffering'", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mid_dbprobe\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mlink\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'https://www.ncbi.nlm.nih.gov/probe/'\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mid_dbprobe\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlink\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[0mres\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mre\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfindall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mr'class=\"breakTxt\">[ACTG]+'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\api.py\u001b[0m in \u001b[0;36mget\u001b[1;34m(url, params, **kwargs)\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'allow_redirects'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 72\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'get'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 73\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\api.py\u001b[0m in \u001b[0;36mrequest\u001b[1;34m(method, url, **kwargs)\u001b[0m\n\u001b[0;32m 56\u001b[0m \u001b[1;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 57\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 58\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0murl\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0murl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[1;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[0;32m 516\u001b[0m }\n\u001b[0;32m 517\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 518\u001b[1;33m \u001b[0mresp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 519\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 520\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\sessions.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, request, **kwargs)\u001b[0m\n\u001b[0;32m 637\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 638\u001b[0m \u001b[1;31m# Send the request\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 639\u001b[1;33m \u001b[0mr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 640\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 641\u001b[0m \u001b[1;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\adapters.py\u001b[0m in \u001b[0;36msend\u001b[1;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[0;32m 436\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[0mretries\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 438\u001b[1;33m \u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 439\u001b[0m )\n\u001b[0;32m 440\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[1;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[0;32m 598\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtimeout_obj\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 599\u001b[0m \u001b[0mbody\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 600\u001b[1;33m chunked=chunked)\n\u001b[0m\u001b[0;32m 601\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 602\u001b[0m \u001b[1;31m# If we're going to release the connection in ``finally:``, then\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[1;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[0;32m 380\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m \u001b[1;31m# Python 2.6 and older, Python 3\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 381\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 382\u001b[1;33m \u001b[0mhttplib_response\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 383\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 384\u001b[0m \u001b[1;31m# Remove the TypeError from the exception chain in Python 3;\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1196\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1197\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1198\u001b[1;33m \u001b[0mresponse\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1199\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1200\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36mbegin\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 295\u001b[0m \u001b[1;31m# read until we get a non-100 response\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 296\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 297\u001b[1;33m \u001b[0mversion\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 298\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 299\u001b[0m \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\http\\client.py\u001b[0m in \u001b[0;36m_read_status\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 256\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 257\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 258\u001b[1;33m \u001b[0mline\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"iso-8859-1\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 259\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mline\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"status line\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[1;34m(self, b)\u001b[0m\n\u001b[0;32m 574\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 575\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 576\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 577\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 578\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\requests\\packages\\urllib3\\contrib\\pyopenssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 276\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 277\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnection\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 278\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mOpenSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSysCallError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 279\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margs\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'Unexpected EOF'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Miniconda3\\lib\\site-packages\\OpenSSL\\SSL.py\u001b[0m in \u001b[0;36mrecv_into\u001b[1;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[0;32m 1332\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL_peek\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1333\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1334\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSSL_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1335\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_raise_ssl_error\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1336\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import requests\n", "\n", "probes = []\n", "for i in records:\n", " id_dbprobe = i\n", " link = 'https://www.ncbi.nlm.nih.gov/probe/' + id_dbprobe\n", " f = requests.get(link)\n", " result = f.text\n", " res = re.findall(r'class=\"breakTxt\">[ACTG]+', result)\n", " for el in res:\n", " probes.append(el[17:])\n", "\n", "print(len(probes))\n", "#print(probes) " ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "f = open(\"BB_markers.txt\", \"a\")\n", "for k in probes:\n", " a = str(k)+'\\n'\n", " f.write(a)\n", "f.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#number of PAM for different Cas9 proteins in B.burgdorferi genome\n", "\n", "Bburgdorferi_PAM_cnt = {}\n", "for k in Bburgdorferi_PAM.keys():\n", " Bburgdorferi_PAM_cnt[k] = len(Bburgdorferi_PAM.get(k))\n", " \n", "print(Bburgdorferi_PAM_cnt)\n", "\n", "\n", "#histogram - number of PAM for different Cas9 proteins in B.burgdorferi genome\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "plt.bar(list(Bburgdorferi_PAM_cnt.keys()), Bburgdorferi_PAM_cnt.values())\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "burgdorferi_PAM_rev_pos = burgdorferi_PAM_rev_pos[\"SpCas9\"]\n", "burgdorferi_PAM_pos = burgdorferi_PAM_pos[\"SpCas9\"]\n", "\n", "bavariensis_PAM_rev_pos = bavariensis_PAM_rev_pos[\"SpCas9\"]\n", "bavariensis_PAM_pos = bavariensis_PAM_pos[\"SpCas9\"]\n", "\n", "afzelii_PAM_rev_pos = afzelii_PAM_rev_pos[\"SpCas9\"]\n", "afzelii_PAM_pos = afzelii_PAM_pos[\"SpCas9\"]\n", "\n", "garenii_PAM_rev_pos = garenii_PAM_rev_pos[\"SpCas9\"]\n", "garenii_PAM_pos = garenii_PAM_pos[\"SpCas9\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "thres=22\n", "\n", "common_seq=[]\n", "for i in set(burgdorferi_pam_targets_pairs_seq.values()):\n", " for k in set(bavariensis_pam_targets_pairs_seq.values()):\n", " if sequence_compare(i,k) > thres:\n", " common_seq.append(i)\n", "\n", "common_seq_2=[]\n", "for i in common_seq:\n", " for k in set(afzelii_pam_targets_pairs_seq.values()):\n", " if sequence_compare(i,k) > thres:\n", " common_seq_2.append(i)\n", " \n", "common_seq=[]\n", "for i in common_seq_2:\n", " for k in set(garenii_pam_targets_pairs_seq.values()):\n", " if sequence_compare(i,k) > thres:\n", " common_seq.append(i)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.6" } }, "nbformat": 4, "nbformat_minor": 2 }