{ "metadata": { "name": "", "signature": "sha256:242e7918f22a6340b975ba5761232db1e470d7d374d3be1134e4ecc42802a6f5" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%matplotlib inline\n", "\n", "%precision 2\n", "pd.set_option('display.precision', 3)\n", "\n", "import ndl,sim\n", "from zt import ztnbinom" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set up compute cluster and initialize its environment. (Make sure it's got all the right versions of the files, especially sim.py!)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from IPython.parallel import Client\n", "\n", "rc = Client(profile='home')\n", "dview = rc.direct_view()\n", "dview.block = True\n", "lview = rc.load_balanced_view()\n", "lview.block = True\n", "rc.ids" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "%%px\n", "\n", "import sys\n", "sys.path = ['/home1/malouf/learning'] + sys.path\n", "import sim\n", "#from sim import Simulation" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "markdown", "metadata": {}, "source": [ "-----\n", "\n", "Create data that's distributed like the corpus counts in Ramscar et al.'s PNAS paper (see the Input Modeling notebook for details). " ] }, { "cell_type": "code", "collapsed": false, "input": [ "def cues(N):\n", " card = ztnbinom.rvs(3,.6)\n", " feats = range(card) + ['exactly%d'%card]\n", " return [feats,codeFunc(card)] " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "ns = [ztnbinom.rvs(3,.6) for i in xrange(10000)]\n", "data = np.zeros((max(ns)))\n", "for i in ns:\n", " data[i-1] += 1\n", "data" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "array([ 3.31e+03, 2.57e+03, 1.77e+03, 1.08e+03, 6.19e+02,\n", " 3.13e+02, 1.65e+02, 7.60e+01, 5.30e+01, 2.00e+01,\n", " 7.00e+00, 3.00e+00, 2.00e+00, 1.00e+00, 1.00e+00])" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "data = pd.DataFrame(data,columns=['Frequency'],index=range(1,len(data)+1))\n", "data['Cues'] = [range(1,i+1) + ['exactly%d'%i] for i in data.index]\n", "data['Number'] = data.index\n", "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | Frequency | \n", "Cues | \n", "Number | \n", "
---|---|---|---|
1 | \n", "3313 | \n", "[1, exactly1] | \n", "1 | \n", "
2 | \n", "2570 | \n", "[1, 2, exactly2] | \n", "2 | \n", "
3 | \n", "1773 | \n", "[1, 2, 3, exactly3] | \n", "3 | \n", "
4 | \n", "1084 | \n", "[1, 2, 3, 4, exactly4] | \n", "4 | \n", "
5 | \n", "619 | \n", "[1, 2, 3, 4, 5, exactly5] | \n", "5 | \n", "
6 | \n", "313 | \n", "[1, 2, 3, 4, 5, 6, exactly6] | \n", "6 | \n", "
7 | \n", "165 | \n", "[1, 2, 3, 4, 5, 6, 7, exactly7] | \n", "7 | \n", "
8 | \n", "76 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, exactly8] | \n", "8 | \n", "
9 | \n", "53 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9] | \n", "9 | \n", "
10 | \n", "20 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, exactly10] | \n", "10 | \n", "
11 | \n", "7 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, exactly11] | \n", "11 | \n", "
12 | \n", "3 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, exactl... | \n", "12 | \n", "
13 | \n", "2 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ex... | \n", "13 | \n", "
14 | \n", "1 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "14 | \n", "
15 | \n", "1 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "15 | \n", "
15 rows \u00d7 3 columns
\n", "\n", " | Frequency | \n", "Cues | \n", "Number | \n", "Outcomes | \n", "
---|---|---|---|---|
1 | \n", "3313 | \n", "[background, 1, exactly1] | \n", "1 | \n", "notdu | \n", "
2 | \n", "2570 | \n", "[background, 1, 2, exactly2] | \n", "2 | \n", "du | \n", "
3 | \n", "1773 | \n", "[background, 1, 2, 3, exactly3] | \n", "3 | \n", "notdu | \n", "
4 | \n", "1084 | \n", "[background, 1, 2, 3, 4, exactly4] | \n", "4 | \n", "notdu | \n", "
5 | \n", "619 | \n", "[background, 1, 2, 3, 4, 5, exactly5] | \n", "5 | \n", "notdu | \n", "
6 | \n", "313 | \n", "[background, 1, 2, 3, 4, 5, 6, exactly6] | \n", "6 | \n", "notdu | \n", "
7 | \n", "165 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, exactly7] | \n", "7 | \n", "notdu | \n", "
8 | \n", "76 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, exactly8] | \n", "8 | \n", "notdu | \n", "
9 | \n", "53 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9] | \n", "9 | \n", "notdu | \n", "
10 | \n", "20 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ex... | \n", "10 | \n", "notdu | \n", "
11 | \n", "7 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11... | \n", "11 | \n", "notdu | \n", "
12 | \n", "3 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11... | \n", "12 | \n", "notdu | \n", "
13 | \n", "2 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11... | \n", "13 | \n", "notdu | \n", "
14 | \n", "1 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11... | \n", "14 | \n", "notdu | \n", "
15 | \n", "1 | \n", "[background, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11... | \n", "15 | \n", "notdu | \n", "
15 rows \u00d7 4 columns
\n", "\n", " | Frequency | \n", "Cues | \n", "Number | \n", "
---|---|---|---|
1 | \n", "1653 | \n", "[1, exactly1] | \n", "1 | \n", "
2 | \n", "1877 | \n", "[1, 2, exactly2] | \n", "2 | \n", "
3 | \n", "1637 | \n", "[1, 2, 3, exactly3] | \n", "3 | \n", "
4 | \n", "1363 | \n", "[1, 2, 3, 4, exactly4] | \n", "4 | \n", "
5 | \n", "1035 | \n", "[1, 2, 3, 4, 5, exactly5] | \n", "5 | \n", "
6 | \n", "793 | \n", "[1, 2, 3, 4, 5, 6, exactly6] | \n", "6 | \n", "
7 | \n", "559 | \n", "[1, 2, 3, 4, 5, 6, 7, exactly7] | \n", "7 | \n", "
8 | \n", "361 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, exactly8] | \n", "8 | \n", "
9 | \n", "265 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, exactly9] | \n", "9 | \n", "
10 | \n", "149 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, exactly10] | \n", "10 | \n", "
11 | \n", "108 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, exactly11] | \n", "11 | \n", "
12 | \n", "79 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, exactl... | \n", "12 | \n", "
13 | \n", "47 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ex... | \n", "13 | \n", "
14 | \n", "36 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "14 | \n", "
15 | \n", "14 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "15 | \n", "
16 | \n", "10 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "16 | \n", "
17 | \n", "5 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "17 | \n", "
18 | \n", "5 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "18 | \n", "
19 | \n", "0 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "19 | \n", "
20 | \n", "3 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "20 | \n", "
21 | \n", "1 | \n", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14... | \n", "21 | \n", "
21 rows \u00d7 3 columns
\n", "