{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function\n", "import os\n", "\n", "import dendropy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Genome alignment" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mafft --localpair --maxiterate 1000 --ep 0.123 --reorder sample.fasta\n" ] } ], "source": [ "from Bio.Align.Applications import MafftCommandline\n", "mafft_cline = MafftCommandline(input='sample.fasta', ep=0.123, reorder=True, maxiterate=1000, localpair=True)\n", "print(mafft_cline)\n", "stdout, stderr = mafft_cline()\n", "with open('align.fasta', 'w') as w:\n", " w.write(stdout)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "os.system('trimal -automated1 -in align.fasta -out trim.fasta -fasta')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Protein alignment" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "muscle -in NP_P.fasta\n", "muscle -in L_P.fasta\n", "muscle -in VP35_P.fasta\n", "muscle -in VP40_P.fasta\n" ] } ], "source": [ "from Bio.Align.Applications import MuscleCommandline\n", "\n", "my_genes = ['NP', 'L', 'VP35', 'VP40']\n", "\n", "for gene in my_genes:\n", " muscle_cline = MuscleCommandline(input='%s_P.fasta' % gene)\n", " print(muscle_cline)\n", " stdout, stderr = muscle_cline()\n", " with open('%s_P_align.fasta' % gene, 'w') as w:\n", " w.write(stdout)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from Bio import SeqIO\n", "from Bio.Seq import Seq\n", "from Bio.SeqRecord import SeqRecord\n", "from Bio.Alphabet import generic_protein\n", "\n", "for gene in my_genes:\n", " gene_seqs = {}\n", " unal_gene = SeqIO.parse('%s.fasta' % gene, 'fasta')\n", " for rec in unal_gene:\n", " gene_seqs[rec.id] = rec.seq\n", "\n", " al_prot = SeqIO.parse('%s_P_align.fasta' % gene, 'fasta')\n", " al_genes = []\n", " for protein in al_prot:\n", " my_id = protein.id\n", " seq = ''\n", " pos = 0\n", " for c in protein.seq:\n", " if c == '-':\n", " seq += '---'\n", " else:\n", " seq += str(gene_seqs[my_id][pos:pos + 3])\n", " pos += 3\n", " al_genes.append(SeqRecord(Seq(seq), id=my_id))\n", "\n", "\n", " SeqIO.write(al_genes, '%s_align.fasta' % gene, 'fasta')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }