{ "cells": [ { "cell_type": "markdown", "metadata": { "run_control": { "frozen": false, "read_only": false }, "toc": "true" }, "source": [ "# Table of Contents\n", "
" ] }, { "cell_type": "markdown", "metadata": { "run_control": { "frozen": false, "read_only": false } }, "source": [ "RLE plots are simple and intuitive, for each expression value $y_{ij}$ we plot $y_{ij}-Median(y_{.j})$ where $y_{.j}$\n", "represents the column $j$ of a $m \\times n$ expression matrix of $m$ samples and $n$ genes.\n", "\n", "Simulation strategy presented here is as in the [original publication](https://xxx.arxiv.org/abs/1704.03590)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "run_control": { "frozen": false, "read_only": false } }, "outputs": [], "source": [ "%matplotlib inline\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import scipy as sp\n", "import pandas as pd\n", "sns.set_style('whitegrid')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true, "run_control": { "frozen": false, "read_only": false } }, "outputs": [], "source": [ "np.random.seed(42)\n", "m = 30\n", "n = 10000\n", "m_u = 5\n", "s2_u = 0.5\n", "s2_0 = 0.5\n", "alpha = 10\n", "beta = 1" ] }, { "cell_type": "markdown", "metadata": { "run_control": { "frozen": false, "read_only": false } }, "source": [ "\\begin{align*}\n", "y_{ij} &= \\mu_j + \\theta_i + \\gamma_{ij} + \\epsilon_{ij}\\\\\n", "\\mu_j &\\sim N(m_\\mu, s^2_\\mu)\\\\\n", "\\theta_i &\\sim N(m_\\theta, s^2_\\theta)\\\\\n", "\\gamma_{ij} &= \\lambda (\\theta_i-\\bar{\\theta})(\\mu_j-\\bar{\\mu})\\\\\n", "\\epsilon_{ij} &= N(0, \\sigma_j^2)\\\\\n", "1/\\sigma^2_j &\\sim Gamma(\\alpha, \\beta)\\\\\n", "m_\\mu &= 5\\\\\n", "s^2_\\mu &= 0.5\\\\\n", "s^2_\\theta &= 0.5\\\\\n", "\\end{align*}\n" ] }, { "cell_type": "markdown", "metadata": { "run_control": { "frozen": false, "read_only": false } }, "source": [ "# Additive Effects only ($m_\\theta=0, \\lambda=0$)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true, "run_control": { "frozen": false, "read_only": false } }, "outputs": [], "source": [ "# m (samples) rows, n (genes) cols\n", "m_0 = 0\n", "mu = np.random.normal(m_u, np.sqrt(s2_u), n)\n", "theta = np.random.normal(m_0, np.sqrt(s2_0), m)\n", "\n", "eps_sigma2 = 1/np.random.gamma(alpha, beta, (m,n))\n", "epsilon = np.random.normal(0, np.sqrt(eps_sigma2))\n", "\n", "theta_M = np.tile(theta, (n,1)).T\n", "mu_M = np.tile(mu, (m,1))\n", "\n", "Y = mu_M + theta_M + epsilon\n", "\n", "df = pd.DataFrame(Y.T)\n", "df.columns = map(lambda x: str(x), range(1, m+1))\n", "df_median = df.sub(df.median(axis=1), axis=0)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "run_control": { "frozen": false, "read_only": false } }, "outputs": [ { "data": { "text/plain": [ "