{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Sequence logos in matplotlib\n", "\n", "Inspired by the discussion on https://github.com/biocore/scikit-bio/issues/805 and code by [Jeremy Widmann](https://github.com/biocore/scikit-bio/issues/805#issuecomment-73098553) where the letter patches were made from polygon vertices. Using paths instead of polygons allows for smoother patches.\n", "\n", "### SVG path generation ###\n", "- Font \"Source Code Pro - Medium\" TTF file obtained from https://fontlibrary.org.\n", "- Letters were converted into SVG paths using https://github.com/donbright/font_to_svg, e.g.:\n", "\n", "```\n", "$ ./example1 ./fonts/source-code-pro-2.010R-ro-1.030R-it/TTF/SourceCodePro-Medium.ttf 65 > A.svg\n", "```\n", "\n", "### Reference ###\n", "\n", "Sequence logos: https://schneider.ncifcrf.gov/logorecommendations.html" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from __future__ import division, print_function\n", "from six import StringIO\n", "\n", "from matplotlib.patches import PathPatch\n", "from matplotlib.path import Path\n", "from matplotlib import ticker\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "import numpy as np\n", "import pandas\n", "\n", "from svgpath2mpl import parse_path" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "_pal = sns.color_palette('colorblind')\n", "default_colors = {'A': _pal[1], 'C': _pal[0], 'G': _pal[4], 'T': _pal[2]}\n", "default_glyphs = {}\n", "default_glyphs['A'] = \"\"\"\\\n", "M 235,-357\n", "L 208,-267\n", "L 389,-267\n", "L 362,-357\n", "Q 346,-411 330,-465\n", "Q 314,-519 300,-575\n", "L 296,-575\n", "Q 282,-519 266,-465\n", "Q 251,-411 235,-357\n", "Z\n", "M 26,0\n", "L 242,-655\n", "L 358,-655\n", "L 574,0\n", "L 468,0\n", "L 412,-188\n", "L 184,-188\n", "L 128,0\n", "L 26,0\n", "Z\n", "\"\"\"\n", "default_glyphs['C'] = \"\"\"\\\n", "M 352,12\n", "Q 291,12 238,-10\n", "Q 185,-33 146,-76\n", "Q 107,-120 84,-182\n", "Q 62,-245 62,-326\n", "Q 62,-406 84,-469\n", "Q 107,-532 146,-576\n", "Q 186,-620 240,-643\n", "Q 295,-667 360,-667\n", "Q 420,-667 467,-642\n", "Q 514,-618 544,-586\n", "L 488,-523\n", "Q 463,-549 432,-564\n", "Q 401,-580 360,-580\n", "Q 316,-580 280,-562\n", "Q 244,-545 218,-512\n", "Q 193,-480 179,-433\n", "Q 165,-387 165,-329\n", "Q 165,-270 179,-223\n", "Q 193,-176 219,-143\n", "Q 245,-110 281,-92\n", "Q 317,-75 362,-75\n", "Q 405,-75 439,-93\n", "Q 473,-111 502,-144\n", "L 558,-83\n", "Q 519,-37 468,-12\n", "Q 418,12 352,12\n", "Z\n", "\"\"\"\n", "default_glyphs['G'] = \"\"\"\\\n", "M 339,12\n", "Q 277,12 224,-10\n", "Q 171,-33 132,-76\n", "Q 94,-120 72,-182\n", "Q 51,-245 51,-326\n", "Q 51,-406 73,-469\n", "Q 95,-533 134,-577\n", "Q 173,-621 227,-644\n", "Q 281,-667 344,-667\n", "Q 409,-667 454,-641\n", "Q 499,-616 528,-586\n", "L 472,-523\n", "Q 449,-548 419,-564\n", "Q 389,-580 344,-580\n", "Q 302,-580 267,-562\n", "Q 232,-545 207,-512\n", "Q 182,-480 168,-433\n", "Q 154,-387 154,-329\n", "Q 154,-211 203,-143\n", "Q 252,-75 345,-75\n", "Q 375,-75 401,-84\n", "Q 428,-93 445,-109\n", "L 445,-265\n", "L 324,-265\n", "L 324,-347\n", "L 537,-347\n", "L 537,-65\n", "Q 505,-33 453,-10\n", "Q 402,12 339,12\n", "Z\n", "\"\"\"\n", "default_glyphs['T'] = \"\"\"\\\n", "M 250,0\n", "L 250,-571\n", "L 39,-571\n", "L 39,-655\n", "L 561,-655\n", "L 561,-571\n", "L 350,-571\n", "L 350,0\n", "L 250,0\n", "Z\n", "\"\"\"\n", "default_glyphs['U'] = \"\"\"\\\n", "M 301,12\n", "Q 250,12 208,-3\n", "Q 167,-18 137,-49\n", "Q 107,-81 91,-130\n", "Q 75,-180 75,-249\n", "L 75,-655\n", "L 176,-655\n", "L 176,-243\n", "Q 176,-153 210,-114\n", "Q 245,-75 301,-75\n", "Q 357,-75 392,-114\n", "Q 428,-153 428,-243\n", "L 428,-655\n", "L 525,-655\n", "L 525,-249\n", "Q 525,-180 509,-130\n", "Q 493,-81 464,-49\n", "Q 435,-18 393,-3\n", "Q 352,12 301,12\n", "Z\n", "\"\"\"\n", "\n", "def _get_glyph(path_data, color, x, y, dx, dy, **kwargs):\n", " kwargs.setdefault('facecolor', color)\n", " kwargs.setdefault('edgecolor', 'none')\n", " path = parse_path(path_data)\n", " # normalize and flip upside down\n", " path.vertices[:, 0] -= path.vertices[:, 0].min()\n", " path.vertices[:, 1] -= path.vertices[:, 1].min()\n", " path.vertices[:, 0] /= path.vertices[:, 0].max()\n", " path.vertices[:, 1] /= path.vertices[:, 1].max()\n", " path.vertices[:, 1] = 1 - path.vertices[:, 1]\n", " # scale then translate\n", " path.vertices *= [dx, dy]\n", " path.vertices += [x, y]\n", " return PathPatch(path, **kwargs)\n", "\n", " \n", "def _draw_logo(ax, matrix, charwidth, glyphs=default_glyphs, colors=default_colors):\n", " for i, (_, position) in enumerate(matrix.iterrows()):\n", " letters_sorted = position.sort_values()\n", " bottom = 0\n", " for letter, height in letters_sorted.iteritems():\n", " patch = _get_glyph(glyphs[letter], colors[letter],\n", " i*charwidth, bottom, charwidth, height)\n", " ax.add_artist(patch)\n", " bottom += height\n", "\n", "\n", "def plot_seqlogo(ax, pfm, info=False, charwidth=1.0, **kwargs):\n", " if info:\n", " info_content = 2 - pfm.apply(lambda p: (-p * np.log2(p)).sum(), axis=1)\n", " matrix = pfm.mul(info_content, axis=0)\n", " else:\n", " matrix = pfm\n", " \n", " seqlen = len(pfm)\n", " _draw_logo(ax, matrix, charwidth, **kwargs)\n", " ax.set_xlim([0, seqlen * charwidth])\n", " \n", " # major ticks\n", " ax.xaxis.set_major_locator(ticker.FixedLocator(np.arange(0, seqlen)))\n", " ax.xaxis.set_major_formatter(ticker.NullFormatter())\n", " ax.tick_params(which='major', direction='out')\n", " # minor ticks\n", " ax.xaxis.set_minor_locator(ticker.FixedLocator(np.arange(0, seqlen) + 0.5))\n", " ax.xaxis.set_minor_formatter(ticker.FixedFormatter(np.arange(1, seqlen+1)))\n", " ax.tick_params(which='minor', length=0)\n", " \n", " if info:\n", " ax.set_ylim([0, 2])\n", " ax.yaxis.set_major_locator(ticker.FixedLocator([0., 1., 2.]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Example: CTCF core motif logo" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | A | \n", "C | \n", "G | \n", "T | \n", "
---|---|---|---|---|
P0 | \n", "\n", " | \n", " | \n", " | \n", " |
P1 | \n", "0.047246 | \n", "0.003571 | \n", "0.896649 | \n", "0.052534 | \n", "
P2 | \n", "0.065848 | \n", "0.884012 | \n", "0.004934 | \n", "0.045206 | \n", "
P3 | \n", "0.002425 | \n", "0.990324 | \n", "0.002480 | \n", "0.004771 | \n", "
P4 | \n", "0.570985 | \n", "0.416802 | \n", "0.006008 | \n", "0.006205 | \n", "
P5 | \n", "0.002533 | \n", "0.468765 | \n", "0.003265 | \n", "0.525437 | \n", "