{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# This notebook shows rich jupyter representations of Dseqrecord and derived classes\n", "\n", "* Dseqrecord --> base class\n", "* GenbankRecord(Dseqrecord) --> read from Genbank link\n", "* GenbankFile(Dseqrecord) --> read from local file\n", "* Amplicon(Dseqrecord) --> PCR product\n", "* Contig(Dseqrecord) --> Produced through Assembly " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "try:\n", " from pydna.readers import read\n", " from pydna.parsers import parse_primers\n", " from pydna.dseqrecord import Dseqrecord\n", " from pydna.genbank import Genbank\n", " from pydna.amplify import pcr\n", " from pydna.assembly import Assembly\n", "except ImportError:\n", " import sys, os\n", "\n", " sys.path.append(os.pardir)\n", " from pydna.readers import read\n", " from pydna.parsers import parse_primers\n", " from pydna.dseqrecord import Dseqrecord\n", " from pydna.genbank import Genbank\n", " from pydna.amplify import pcr\n", " from pydna.assembly import Assembly" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dseqrecord" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "ldsr = Dseqrecord(\"aaa\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.dseqrecord.Dseqrecord" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(ldsr)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dseqrecord(-3)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ldsr" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Dseqrecord(-3), Dseqrecord(-3)]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[ldsr, ldsr]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "cdsr = Dseqrecord(\"aaa\", circular=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.dseqrecord.Dseqrecord" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(cdsr)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dseqrecord(o3)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdsr" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Dseqrecord(o3), Dseqrecord(o3)]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[cdsr, cdsr]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dseqrecord(o3)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cdsr.reverse_complement()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Dseqrecord(o3), Dseqrecord(o3)]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[cdsr.reverse_complement(), cdsr.reverse_complement()]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "fromstring = read(\">string\\naaaa\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.dseqrecord.Dseqrecord" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(fromstring)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dseqrecord(-4)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fromstring" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Dseqrecord(-4), Dseqrecord(-4)]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[fromstring, fromstring]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Dseqrecord(-4), Dseqrecord(-4)]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[fromstring.reverse_complement(), fromstring.reverse_complement()]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### GenbankRecord" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from pydna.genbankrecord import GenbankRecord" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# The GenbankRecord is not meant to be used directly\n", "gbr = GenbankRecord(\"aaa\", item=\"AccessionNumber\", start=1, stop=3, circular=True)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.genbankrecord.GenbankRecord" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(gbr)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "AccessionNumber 1-3" ], "text/plain": [ "Gbnk(o3 AccessionNumber 1-3)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# This link is dead as expected\n", "gbr" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AccessionNumber 1-3" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr.hyperlink" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "AccessionNumber 1-3" ], "text/plain": [ "Gbnk(o3 AccessionNumber 1-3)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr.reverse_complement()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AccessionNumber 1-3" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr.reverse_complement().hyperlink" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AccessionNumber 1-3" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr.reverse_complement().reverse_complement().hyperlink" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "gb = Genbank(\"bjornjobb@gmail.com\")" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "gbr2 = gb.nucleotide(\"E05006\")" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.genbankrecord.GenbankRecord" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(gbr2)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "E05006" ], "text/plain": [ "Gbnk(-25 E05006)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr2" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "E05006" ], "text/plain": [ "Gbnk(-25 E05006)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr2.reverse_complement()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "gbr3 = gb.nucleotide(\"E05006 REGION: 5..15\")" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.genbankrecord.GenbankRecord" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(gbr3)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "gbr4 = gb.nucleotide(\"E05006 REGION: complement(5..15)\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.genbankrecord.GenbankRecord" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(gbr4)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "E05006 5-15" ], "text/plain": [ "Gbnk(-11 E05006 5-15)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr4" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "E05006 5-15" ], "text/plain": [ "Gbnk(-11 E05006 5-15)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbr4.reverse_complement()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### GenbankFile" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "from pydna.genbankfile import GenbankFile" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "gbf = GenbankFile(\"aaa\")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.genbankfile.GenbankFile" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(gbf)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "None
" ], "text/plain": [ "File(id)(-3)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# The GenbankFile is not supposed to be used directly\n", "# The link below is dead as expected\n", "gbf" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "gbf1 = read(\"sequence.gb\")" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.genbankfile.GenbankFile" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(gbf1)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "sequence.gb
" ], "text/plain": [ "File(E05006.1)(-25)" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbf1" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "sequence.gb
" ], "text/plain": [ "File(E05006.1_rc)(-25)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbf1.reverse_complement()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "gbf2 = read(\"subfolder/sequence.gb\")" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.genbankfile.GenbankFile" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(gbf2)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "subfolder/sequence.gb
" ], "text/plain": [ "File(E05006.1)(-25)" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbf2" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/html": [ "subfolder/sequence.gb
" ], "text/plain": [ "File(E05006.1_rc)(-25)" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbf2.reverse_complement()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Amplicon" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "from pydna.amplicon import Amplicon" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "# The Amplicon class is not meant to be used directly, but it is possible\n", "amp = Amplicon(\"aaa\")" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.amplicon.Amplicon" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(amp)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Amplicon(3)" ], "text/plain": [ "Amplicon(3)" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "amp" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "primers = parse_primers(\n", " '''\n", ">ForwardPrimer\n", "gctactacacacgtactgactg\n", ">ReversePrimer\n", "tgtggttactgactctatcttg '''\n", ")" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "temp = Dseqrecord(\"gctactacacacgtactgactg\" + \"gatc\" * 239 + \"caagatagagtcagtaaccaca\")" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "prd = pcr(primers, temp)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.amplicon.Amplicon" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(prd)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5gctactacacacgtactgactg...caagatagagtcagtaaccaca3\n", " ||||||||||||||||||||||\n", " 3gttctatctcagtcattggtgt5\n", "5gctactacacacgtactgactg3\n", " ||||||||||||||||||||||\n", "3cgatgatgtgtgcatgactgac...gttctatctcagtcattggtgt5" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prd.figure()" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "|95°C|95°C | |tmf:62.4\n", "|____|_____ 72°C|72°C|tmr:59.1\n", "|3min|30s \\ 58.6°C _____|____|45s/kb\n", "| | \\______/ 0:45|5min|GC 49%\n", "| | 30s | |1000bp" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prd.program()" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Amplicon(1000), Amplicon(1000))" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(prd, prd)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Amplicon(1000)" ], "text/plain": [ "Amplicon(1000)" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prd.reverse_complement()" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Amplicon(1000), Amplicon(1000))" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(prd.reverse_complement(), prd.reverse_complement())" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5tgtggttactgactctatcttg...cagtcagtacgtgtgtagtagc3\n", " ||||||||||||||||||||||\n", " 3gtcagtcatgcacacatcatcg5\n", "5tgtggttactgactctatcttg3\n", " ||||||||||||||||||||||\n", "3acaccaatgactgagatagaac...gtcagtcatgcacacatcatcg5" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prd.reverse_complement().figure()" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "|95°C|95°C | |tmf:59.1\n", "|____|_____ 72°C|72°C|tmr:62.4\n", "|3min|30s \\ 58.6°C _____|____|45s/kb\n", "| | \\______/ 0:45|5min|GC 49%\n", "| | 30s | |1000bp" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "prd.reverse_complement().program()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Contig" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "from pydna.contig import Contig" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "# Contig is not meant to be used directly, but it is possible\n", "cnt = Contig(\"aaa\")" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.contig.Contig" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(cnt)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "a = Dseqrecord(\"acgatgctatactaagCCCCtgtgctgtgctct\", name=\"SequenceA\")\n", "b = Dseqrecord(\"tgtgctgtgctctTTTTTTTtattctggctgtat\", name=\"SequenceB\")\n", "c = Dseqrecord(\"tattctggctgtatGGGGGtacgatgctatactaa\", name=\"SequenceC\")\n", "x = Assembly((a, b, c), limit=13)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Assembly\n", "fragments..: 33bp 34bp 35bp\n", "limit(bp)..: 13\n", "G.nodes....: 6\n", "algorithm..: common_sub_strings" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.assembly.Assembly" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(x)" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "cnt = x.assemble_circular()[0]" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pydna.contig.Contig" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(cnt)" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
 -|SequenceA|13\n",
       "|            \\/\n",
       "|            /\\\n",
       "|            13|SequenceB|14\n",
       "|                         \\/\n",
       "|                         /\\\n",
       "|                         14|SequenceC|15\n",
       "|                                      \\/\n",
       "|                                      /\\\n",
       "|                                      15-\n",
       "|                                         |\n",
       " -----------------------------------------
" ], "text/plain": [ "Contig(o60)" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "|||||||||||||||\n", "acgatgctatactaagCCCCtgtgctgtgctct\n", " TGTGCTGTGCTCT\n", " tgtgctgtgctctTTTTTTTtattctggctgtat\n", " TATTCTGGCTGTAT\n", " tattctggctgtatGGGGGtacgatgctatactaa\n", " ACGATGCTATACTAA\n" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt.detailed_figure()" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
 -|SequenceC_rc|14\n",
       "|               \\/\n",
       "|               /\\\n",
       "|               14|SequenceB_rc|13\n",
       "|                               \\/\n",
       "|                               /\\\n",
       "|                               13|SequenceA_rc|15\n",
       "|                                               \\/\n",
       "|                                               /\\\n",
       "|                                               15-\n",
       "|                                                  |\n",
       " --------------------------------------------------
" ], "text/plain": [ "Contig(o60)" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt.reverse_complement()" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "|||||||||||||||\n", "ttagtatagcatcgtaCCCCCatacagccagaata\n", " ATACAGCCAGAATA\n", " atacagccagaataAAAAAAAagagcacagcaca\n", " AGAGCACAGCACA\n", " agagcacagcacaGGGGcttagtatagcatcgt\n", " TTAGTATAGCATCGT\n" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cnt.reverse_complement().detailed_figure()" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda env:bjorn311]", "language": "python", "name": "conda-env-bjorn311-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 4 }