{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import dendropy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ebola_raxml = dendropy.Tree.get_from_path('my_ebola.nex', 'nexus')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BDBV_KC545395 18891 bp: 1 1\n", "SUDV_EU338380 18891 bp: 9 9\n", "SUDV_KC242783 18891 bp: 10 10\n", "SUDV_FJ968794 18891 bp: 10 10\n", "SUDV_KC589025 18891 bp: 9 9\n", "SUDV_AY729654 18891 bp: 10 10\n", "SUDV_JN638998 18891 bp: 10 10\n", "RESTV_FJ621584 18891 bp: 8 8\n", "RESTV_JX477165 18891 bp: 10 10\n", "RESTV_FJ621583 18891 bp: 10 10\n", "RESTV_JX477166 18891 bp: 11 11\n", "RESTV_AB050936 18891 bp: 11 11\n", "RESTV_FJ621585 18891 bp: 10 10\n", "EBOV_2014_KM034561 18891 bp: 10 10\n", "EBOV_2014_KM034562 18891 bp: 10 10\n", "EBOV_2014_KM034557 18891 bp: 12 12\n", "EBOV_2014_KM034558 18891 bp: 14 14\n", "EBOV_2014_KM034556 18891 bp: 14 14\n", "EBOV_2014_KM034560 18891 bp: 13 13\n", "EBOV_2014_KM233113 18891 bp: 13 13\n", "EBOV_2014_KM233114 18891 bp: 13 13\n", "EBOV_2014_KM233116 18891 bp: 13 13\n", "EBOV_2014_KM233115 18891 bp: 14 14\n", "EBOV_2014_KM233117 18891 bp: 15 15\n", "EBOV_2014_KM233118 18891 bp: 15 15\n", "EBOV_2014_KM034559 18891 bp: 10 10\n", "EBOV_2014_KM034563 18891 bp: 8 8\n", "EBOV_1976_KC242801 18891 bp: 10 10\n", "EBOV_1976_AF272001 18891 bp: 10 10\n", "EBOV_1995_KC242796 18891 bp: 10 10\n", "EBOV_1995_KC242799 18891 bp: 10 10\n", "EBOV_2007_KC242788 18891 bp: 10 10\n", "EBOV_2007_KC242787 18891 bp: 10 10\n", "EBOV_2007_KC242784 18891 bp: 10 10\n", "EBOV_2007_KC242785 18891 bp: 12 12\n", "EBOV_2007_KC242790 18891 bp: 12 12\n", "EBOV_2007_KC242786 18891 bp: 12 12\n", "EBOV_2007_KC242789 18891 bp: 12 12\n", "TAFV_FJ217162 18891 bp: 5 5\n", "BDBV_FJ217161 18891 bp: 4 4\n", "BDBV_KC545396 18891 bp: 3 3\n", "BDBV_KC545394 18891 bp: 2 2\n", "BDBV_KC545393 18891 bp: 1 1\n" ] } ], "source": [ "def compute_level(node, level=0):\n", " for child in node.child_nodes():\n", " compute_level(child, level + 1)\n", " if node.taxon is not None:\n", " print(\"%s: %d %d\" % (node.taxon, node.level(), level))\n", "\n", "compute_level(ebola_raxml.seed_node)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BDBV_KC545395 18891 bp: 0 1\n", "SUDV_EU338380 18891 bp: 0 9\n", "SUDV_KC242783 18891 bp: 0 10\n", "SUDV_FJ968794 18891 bp: 0 10\n", "Internal: 1 9\n", "Internal: 2 8\n", "SUDV_KC589025 18891 bp: 0 9\n", "SUDV_AY729654 18891 bp: 0 10\n", "SUDV_JN638998 18891 bp: 0 10\n", "Internal: 1 9\n", "Internal: 2 8\n", "Internal: 3 7\n", "RESTV_FJ621584 18891 bp: 0 8\n", "RESTV_JX477165 18891 bp: 0 10\n", "RESTV_FJ621583 18891 bp: 0 10\n", "Internal: 1 9\n", "RESTV_JX477166 18891 bp: 0 11\n", "RESTV_AB050936 18891 bp: 0 11\n", "Internal: 1 10\n", "RESTV_FJ621585 18891 bp: 0 10\n", "Internal: 2 9\n", "Internal: 3 8\n", "Internal: 4 7\n", "Internal: 5 6\n", "EBOV_2014_KM034561 18891 bp: 0 10\n", "EBOV_2014_KM034562 18891 bp: 0 10\n", "Internal: 1 9\n", "EBOV_2014_KM034557 18891 bp: 0 12\n", "EBOV_2014_KM034558 18891 bp: 0 14\n", "EBOV_2014_KM034556 18891 bp: 0 14\n", "Internal: 1 13\n", "EBOV_2014_KM034560 18891 bp: 0 13\n", "Internal: 2 12\n", "Internal: 3 11\n", "EBOV_2014_KM233113 18891 bp: 0 13\n", "EBOV_2014_KM233114 18891 bp: 0 13\n", "Internal: 1 12\n", "EBOV_2014_KM233116 18891 bp: 0 13\n", "EBOV_2014_KM233115 18891 bp: 0 14\n", "EBOV_2014_KM233117 18891 bp: 0 15\n", "EBOV_2014_KM233118 18891 bp: 0 15\n", "Internal: 1 14\n", "Internal: 2 13\n", "Internal: 3 12\n", "Internal: 4 11\n", "Internal: 5 10\n", "EBOV_2014_KM034559 18891 bp: 0 10\n", "Internal: 6 9\n", "Internal: 7 8\n", "EBOV_2014_KM034563 18891 bp: 0 8\n", "Internal: 8 7\n", "EBOV_1976_KC242801 18891 bp: 0 10\n", "EBOV_1976_AF272001 18891 bp: 0 10\n", "Internal: 1 9\n", "EBOV_1995_KC242796 18891 bp: 0 10\n", "EBOV_1995_KC242799 18891 bp: 0 10\n", "Internal: 1 9\n", "Internal: 2 8\n", "EBOV_2007_KC242788 18891 bp: 0 10\n", "EBOV_2007_KC242787 18891 bp: 0 10\n", "Internal: 1 9\n", "EBOV_2007_KC242784 18891 bp: 0 10\n", "EBOV_2007_KC242785 18891 bp: 0 12\n", "EBOV_2007_KC242790 18891 bp: 0 12\n", "Internal: 1 11\n", "EBOV_2007_KC242786 18891 bp: 0 12\n", "EBOV_2007_KC242789 18891 bp: 0 12\n", "Internal: 1 11\n", "Internal: 2 10\n", "Internal: 3 9\n", "Internal: 4 8\n", "Internal: 5 7\n", "Internal: 9 6\n", "Internal: 10 5\n", "TAFV_FJ217162 18891 bp: 0 5\n", "Internal: 11 4\n", "BDBV_FJ217161 18891 bp: 0 4\n", "Internal: 12 3\n", "BDBV_KC545396 18891 bp: 0 3\n", "Internal: 13 2\n", "BDBV_KC545394 18891 bp: 0 2\n", "Internal: 14 1\n", "BDBV_KC545393 18891 bp: 0 1\n", "Internal: 15 0\n" ] }, { "data": { "text/plain": [ "15" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def compute_height(node):\n", " children = node.child_nodes()\n", " if len(children) == 0:\n", " height = 0\n", " else:\n", " height = 1 + max(map(lambda x: compute_height(x), children))\n", " desc = node.taxon or 'Internal'\n", " print(\"%s: %d %d\" % (desc, height, node.level()))\n", " return height\n", "\n", "compute_height(ebola_raxml.seed_node)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BDBV_KC545395 18891 bp: 0 1\n", "SUDV_EU338380 18891 bp: 0 9\n", "SUDV_KC242783 18891 bp: 0 10\n", "SUDV_FJ968794 18891 bp: 0 10\n", "Internal: 2 9\n", "Internal: 2 8\n", "SUDV_KC589025 18891 bp: 0 9\n", "SUDV_AY729654 18891 bp: 0 10\n", "SUDV_JN638998 18891 bp: 0 10\n", "Internal: 2 9\n", "Internal: 2 8\n", "Internal: 2 7\n", "RESTV_FJ621584 18891 bp: 0 8\n", "RESTV_JX477165 18891 bp: 0 10\n", "RESTV_FJ621583 18891 bp: 0 10\n", "Internal: 2 9\n", "RESTV_JX477166 18891 bp: 0 11\n", "RESTV_AB050936 18891 bp: 0 11\n", "Internal: 2 10\n", "RESTV_FJ621585 18891 bp: 0 10\n", "Internal: 2 9\n", "Internal: 2 8\n", "Internal: 2 7\n", "Internal: 2 6\n", "EBOV_2014_KM034561 18891 bp: 0 10\n", "EBOV_2014_KM034562 18891 bp: 0 10\n", "Internal: 2 9\n", "EBOV_2014_KM034557 18891 bp: 0 12\n", "EBOV_2014_KM034558 18891 bp: 0 14\n", "EBOV_2014_KM034556 18891 bp: 0 14\n", "Internal: 2 13\n", "EBOV_2014_KM034560 18891 bp: 0 13\n", "Internal: 2 12\n", "Internal: 2 11\n", "EBOV_2014_KM233113 18891 bp: 0 13\n", "EBOV_2014_KM233114 18891 bp: 0 13\n", "Internal: 2 12\n", "EBOV_2014_KM233116 18891 bp: 0 13\n", "EBOV_2014_KM233115 18891 bp: 0 14\n", "EBOV_2014_KM233117 18891 bp: 0 15\n", "EBOV_2014_KM233118 18891 bp: 0 15\n", "Internal: 2 14\n", "Internal: 2 13\n", "Internal: 2 12\n", "Internal: 2 11\n", "Internal: 2 10\n", "EBOV_2014_KM034559 18891 bp: 0 10\n", "Internal: 2 9\n", "Internal: 2 8\n", "EBOV_2014_KM034563 18891 bp: 0 8\n", "Internal: 2 7\n", "EBOV_1976_KC242801 18891 bp: 0 10\n", "EBOV_1976_AF272001 18891 bp: 0 10\n", "Internal: 2 9\n", "EBOV_1995_KC242796 18891 bp: 0 10\n", "EBOV_1995_KC242799 18891 bp: 0 10\n", "Internal: 2 9\n", "Internal: 2 8\n", "EBOV_2007_KC242788 18891 bp: 0 10\n", "EBOV_2007_KC242787 18891 bp: 0 10\n", "Internal: 2 9\n", "EBOV_2007_KC242784 18891 bp: 0 10\n", "EBOV_2007_KC242785 18891 bp: 0 12\n", "EBOV_2007_KC242790 18891 bp: 0 12\n", "Internal: 2 11\n", "EBOV_2007_KC242786 18891 bp: 0 12\n", "EBOV_2007_KC242789 18891 bp: 0 12\n", "Internal: 2 11\n", "Internal: 2 10\n", "Internal: 2 9\n", "Internal: 2 8\n", "Internal: 2 7\n", "Internal: 2 6\n", "Internal: 2 5\n", "TAFV_FJ217162 18891 bp: 0 5\n", "Internal: 2 4\n", "BDBV_FJ217161 18891 bp: 0 4\n", "Internal: 2 3\n", "BDBV_KC545396 18891 bp: 0 3\n", "Internal: 2 2\n", "BDBV_KC545394 18891 bp: 0 2\n", "Internal: 2 1\n", "BDBV_KC545393 18891 bp: 0 1\n", "Internal: 3 0\n" ] } ], "source": [ "def compute_nofs(node):\n", " children = node.child_nodes()\n", " nofs = len(children)\n", " map(lambda x: compute_nofs(x), children)\n", " desc = node.taxon or 'Internal'\n", " print(\"%s: %d %d\" % (desc, nofs, node.level()))\n", "\n", "compute_nofs(ebola_raxml.seed_node)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BDBV_KC545395 18891 bp (1)\n", "SUDV_EU338380 18891 bp (9)\n", "SUDV_KC242783 18891 bp (10)\n", "SUDV_FJ968794 18891 bp (10)\n", "SUDV_KC589025 18891 bp (9)\n", "SUDV_AY729654 18891 bp (10)\n", "SUDV_JN638998 18891 bp (10)\n", "RESTV_FJ621584 18891 bp (8)\n", "RESTV_JX477165 18891 bp (10)\n", "RESTV_FJ621583 18891 bp (10)\n", "RESTV_JX477166 18891 bp (11)\n", "RESTV_AB050936 18891 bp (11)\n", "RESTV_FJ621585 18891 bp (10)\n", "EBOV_2014_KM034561 18891 bp (10)\n", "EBOV_2014_KM034562 18891 bp (10)\n", "EBOV_2014_KM034557 18891 bp (12)\n", "EBOV_2014_KM034558 18891 bp (14)\n", "EBOV_2014_KM034556 18891 bp (14)\n", "EBOV_2014_KM034560 18891 bp (13)\n", "EBOV_2014_KM233113 18891 bp (13)\n", "EBOV_2014_KM233114 18891 bp (13)\n", "EBOV_2014_KM233116 18891 bp (13)\n", "EBOV_2014_KM233115 18891 bp (14)\n", "EBOV_2014_KM233117 18891 bp (15)\n", "EBOV_2014_KM233118 18891 bp (15)\n", "EBOV_2014_KM034559 18891 bp (10)\n", "EBOV_2014_KM034563 18891 bp (8)\n", "EBOV_1976_KC242801 18891 bp (10)\n", "EBOV_1976_AF272001 18891 bp (10)\n", "EBOV_1995_KC242796 18891 bp (10)\n", "EBOV_1995_KC242799 18891 bp (10)\n", "EBOV_2007_KC242788 18891 bp (10)\n", "EBOV_2007_KC242787 18891 bp (10)\n", "EBOV_2007_KC242784 18891 bp (10)\n", "EBOV_2007_KC242785 18891 bp (12)\n", "EBOV_2007_KC242790 18891 bp (12)\n", "EBOV_2007_KC242786 18891 bp (12)\n", "EBOV_2007_KC242789 18891 bp (12)\n", "TAFV_FJ217162 18891 bp (5)\n", "BDBV_FJ217161 18891 bp (4)\n", "BDBV_KC545396 18891 bp (3)\n", "BDBV_KC545394 18891 bp (2)\n", "BDBV_KC545393 18891 bp (1)\n" ] } ], "source": [ "def print_nodes(node):\n", " for child in node.child_nodes():\n", " print_nodes(child)\n", " if node.taxon is not None:\n", " print('%s (%d)' % (node.taxon, node.level()))\n", "\n", "print_nodes(ebola_raxml.seed_node)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BDBV_KC545395 18891 bp (1)\n", "BDBV_KC545393 18891 bp (1)\n", "BDBV_KC545394 18891 bp (2)\n", "BDBV_KC545396 18891 bp (3)\n", "BDBV_FJ217161 18891 bp (4)\n", "TAFV_FJ217162 18891 bp (5)\n", "RESTV_FJ621584 18891 bp (8)\n", "EBOV_2014_KM034563 18891 bp (8)\n", "SUDV_EU338380 18891 bp (9)\n", "SUDV_KC589025 18891 bp (9)\n", "SUDV_KC242783 18891 bp (10)\n", "SUDV_FJ968794 18891 bp (10)\n", "SUDV_AY729654 18891 bp (10)\n", "SUDV_JN638998 18891 bp (10)\n", "RESTV_JX477165 18891 bp (10)\n", "RESTV_FJ621583 18891 bp (10)\n", "RESTV_FJ621585 18891 bp (10)\n", "EBOV_2014_KM034561 18891 bp (10)\n", "EBOV_2014_KM034562 18891 bp (10)\n", "EBOV_2014_KM034559 18891 bp (10)\n", "EBOV_1976_KC242801 18891 bp (10)\n", "EBOV_1976_AF272001 18891 bp (10)\n", "EBOV_1995_KC242796 18891 bp (10)\n", "EBOV_1995_KC242799 18891 bp (10)\n", "EBOV_2007_KC242788 18891 bp (10)\n", "EBOV_2007_KC242787 18891 bp (10)\n", "EBOV_2007_KC242784 18891 bp (10)\n", "RESTV_JX477166 18891 bp (11)\n", "RESTV_AB050936 18891 bp (11)\n", "EBOV_2014_KM034557 18891 bp (12)\n", "EBOV_2007_KC242785 18891 bp (12)\n", "EBOV_2007_KC242790 18891 bp (12)\n", "EBOV_2007_KC242786 18891 bp (12)\n", "EBOV_2007_KC242789 18891 bp (12)\n", "EBOV_2014_KM034560 18891 bp (13)\n", "EBOV_2014_KM233113 18891 bp (13)\n", "EBOV_2014_KM233114 18891 bp (13)\n", "EBOV_2014_KM233116 18891 bp (13)\n", "EBOV_2014_KM034558 18891 bp (14)\n", "EBOV_2014_KM034556 18891 bp (14)\n", "EBOV_2014_KM233115 18891 bp (14)\n", "EBOV_2014_KM233117 18891 bp (15)\n", "EBOV_2014_KM233118 18891 bp (15)\n" ] } ], "source": [ "from collections import deque\n", "\n", "def print_breadth(tree):\n", " queue = deque()\n", " queue.append(tree.seed_node)\n", " while len(queue) > 0:\n", " process_node = queue.popleft()\n", " if process_node.taxon is not None:\n", " print('%s (%d)' % (process_node.taxon, process_node.level()))\n", " else:\n", " for child in process_node.child_nodes():\n", " queue.append(child)\n", "\n", "print_breadth(ebola_raxml)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#remove\n", "#def clean_comments(node, depth=0):\n", "# for child in node.child_nodes():\n", "# clean_comments(child, depth + 1)\n", "# if node.taxon is None:\n", "# node.comments = None\n", "\n", "#clean_comments(ebola_raxml.seed_node)\n", "#ebola_raxml.write_to_path('ebola_clean.nex', 'nexus')\n", "#ebov_2014_raxml = dendropy.Tree.get_from_path('my_ebov_2014.nex', 'nexus')\n", "#clean_comments(ebov_2014_raxml.seed_node)\n", "#ebov_2014_raxml.write_to_path('ebov_2014_clean.nex', 'nexus')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(set(['BDBV']), 1)\n", "(set(['SUDV']), 6)\n", "(set(['RESTV']), 6)\n", "(set(['EBOV2014']), 14)\n", "(set(['EBOV1976']), 2)\n", "(set(['EBOV1995']), 2)\n", "(set(['EBOV2007']), 7)\n", "(set(['TAFV']), 1)\n", "(set(['BDBV']), 1)\n", "(set(['BDBV']), 1)\n", "(set(['BDBV']), 1)\n", "(set(['BDBV']), 1)\n" ] } ], "source": [ "from copy import deepcopy\n", "simple_ebola = deepcopy(ebola_raxml)\n", "\n", "def simplify_tree(node):\n", " prefs = set()\n", " for leaf in node.leaf_nodes():\n", " my_toks = leaf.taxon.label.split(' ')[0].split('_')\n", " if my_toks[0] == 'EBOV':\n", " prefs.add('EBOV' + my_toks[1])\n", " else:\n", " prefs.add(my_toks[0])\n", " if len(prefs) == 1:\n", " print(prefs, len(node.leaf_nodes()))\n", " node.taxon = dendropy.Taxon(label=list(prefs)[0])\n", " #node.collapse_clade()\n", " node.set_child_nodes([])\n", " else:\n", " for child in node.child_nodes():\n", " simplify_tree(child)\n", "\n", "simplify_tree(simple_ebola.seed_node)\n", "simple_ebola.ladderize()\n", "simple_ebola.write_to_path('ebola_simple.nex', 'nexus')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }