{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Problem 1" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas\n", "\n", "def get_num_false_discoveries(m, alpha):\n", " \"\"\"Return the number of false discoveries when applying the Benjamini-Hochberg\n", " prescription with `alpha` to `m` uniformly distributed p-values.\"\"\"\n", " pvals = np.random.random(m)\n", " pvals.sort()\n", " k = np.arange(1, len(pvals) + 1)\n", " thresholds = k * (alpha / m)\n", " return np.sum(pvals <= thresholds)\n", "\n", "def estimate_probs(m=50, alpha=0.05, num_simulations=10**5):\n", " \"\"\"Estimate the probability of getting N=0, 1, 2, and 3 false discoveries\n", " when applying the Benjamini-Hochberg prescription with `alpha` to `m` uniformly\n", " distributed p-values.\"\"\"\n", " counts = np.zeros(4)\n", " for _ in xrange(num_simulations):\n", " num_discoveries = get_num_false_discoveries(m, alpha)\n", " if num_discoveries < len(counts):\n", " counts[num_discoveries] += 1\n", "\n", " probs = counts / counts.sum()\n", " return probs\n", "\n", "for count, prob in enumerate(estimate_probs()):\n", " print 'P({0}) = {1}'.format(count, prob)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "P(0) = 0.94991\n", "P(1) = 0.04654\n", "P(2) = 0.00327\n", "P(3) = 0.00028\n" ] } ], "prompt_number": 1 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Problem 2" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def collect_results(ms, alphas):\n", " \"\"\"Collect probabilities of false discoveries for every combination of ms and alphas.\"\"\"\n", " results = []\n", " for m in ms:\n", " for alpha in alphas:\n", " result = [m, alpha]\n", " result.extend(estimate_probs(m=m, alpha=alpha))\n", " results.append(result)\n", "\n", " return pandas.DataFrame(results, columns=['m', 'alpha', 'p0', 'p1', 'p2', 'p3'])\n", "\n", "def plot_results(results, x_var, logx=True):\n", " fig, axs = plt.subplots(2, 2, figsize=(6, 6))\n", " for num_discoveries in xrange(4):\n", " row = num_discoveries // 2\n", " col = num_discoveries % 2\n", " x_axis = results[x_var]\n", " probs = results['p{0}'.format(num_discoveries)]\n", " axs[row][col].plot(x_axis, probs, marker='o')\n", " axs[row][col].set_xscale('log')\n", " axs[row][col].set_title('p{0}'.format(num_discoveries))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Collect and plot results for varying values of alpha." ] }, { "cell_type": "code", "collapsed": false, "input": [ "vary_alpha_results = collect_results([50], [0.0001, 0.001, 0.01, 0.1, 0.5])\n", "vary_alpha_results" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | m | \n", "alpha | \n", "p0 | \n", "p1 | \n", "p2 | \n", "p3 | \n", "
---|---|---|---|---|---|---|
0 | \n", "50 | \n", "0.0001 | \n", "0.999910 | \n", "0.000090 | \n", "0.000000 | \n", "0.000000 | \n", "
1 | \n", "50 | \n", "0.0010 | \n", "0.999080 | \n", "0.000920 | \n", "0.000000 | \n", "0.000000 | \n", "
2 | \n", "50 | \n", "0.0100 | \n", "0.990490 | \n", "0.009420 | \n", "0.000090 | \n", "0.000000 | \n", "
3 | \n", "50 | \n", "0.1000 | \n", "0.898847 | \n", "0.087688 | \n", "0.011555 | \n", "0.001911 | \n", "
4 | \n", "50 | \n", "0.5000 | \n", "0.571245 | \n", "0.232542 | \n", "0.123220 | \n", "0.072992 | \n", "
5 rows \u00d7 6 columns
\n", "\n", " | m | \n", "alpha | \n", "p0 | \n", "p1 | \n", "p2 | \n", "p3 | \n", "
---|---|---|---|---|---|---|
0 | \n", "5 | \n", "0.05 | \n", "0.949959 | \n", "0.047080 | \n", "0.00289 | \n", "0.00007 | \n", "
1 | \n", "50 | \n", "0.05 | \n", "0.949988 | \n", "0.046511 | \n", "0.00323 | \n", "0.00027 | \n", "
2 | \n", "500 | \n", "0.05 | \n", "0.950659 | \n", "0.045651 | \n", "0.00330 | \n", "0.00039 | \n", "
3 | \n", "5000 | \n", "0.05 | \n", "0.949107 | \n", "0.047523 | \n", "0.00308 | \n", "0.00029 | \n", "
4 | \n", "50000 | \n", "0.05 | \n", "0.950880 | \n", "0.045400 | \n", "0.00342 | \n", "0.00030 | \n", "
5 rows \u00d7 6 columns
\n", "