{ "metadata": { "name": "ma_analysis" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Converting MA data from MongoDB to an R Data Frame\n", "All the relevant data was created using [ma.py](https://bitbucket.org/yoavram/masim/src/tip/ma.py).\n", "## Setup environment\n", "Change to default working directory, import *matplotlib* so that saved *py* files will work." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import os\n", "os.chdir(\"d:\\\\workspace\\\\ma\")\n", "from matplotlib.pyplot import plot" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a MongoDB interface\n", "The host is *britanya409-2.tau.ac.il*. From the PC I use *localhost* and a PuTTy tunnel to port 27018.\n", "The db is *ma* and the collection is *results*. The code uses the [pymongo](http://api.mongodb.org/python/current/) package to interface MongoDB. Current version is stated below." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pymongo\n", "print \"pymongo version\", pymongo.version" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "pymongo version 2.4\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "def ma_collection():\n", " con = pymongo.Connection(\"localhost\",27018) \n", " col = con.ma.results\n", " return col" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "col = ma_collection()" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "pyerr", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcol\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mma_collection\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32m\u001b[0m in \u001b[0;36mma_collection\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mma_collection\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mcon\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpymongo\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mConnection\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"britanya409-2.tau.ac.il\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m27018\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mcol\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcon\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mma\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresults\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mcol\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\connection.pyc\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, host, port, max_pool_size, network_timeout, document_class, tz_aware, _connect, **kwargs)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 177\u001b[0m super(Connection, self).__init__(host, port,\n\u001b[1;32m--> 178\u001b[1;33m max_pool_size, document_class, tz_aware, _connect, **kwargs)\n\u001b[0m\u001b[0;32m 179\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 180\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__repr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\mongo_client.pyc\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, host, port, max_pool_size, document_class, tz_aware, _connect, **kwargs)\u001b[0m\n\u001b[0;32m 269\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0m_connect\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 270\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 271\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__find_node\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseeds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 272\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mAutoReconnect\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 273\u001b[0m \u001b[1;31m# ConnectionFailure makes more sense here than AutoReconnect\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\mongo_client.pyc\u001b[0m in \u001b[0;36m__find_node\u001b[1;34m(self, seeds)\u001b[0m\n\u001b[0;32m 617\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mcandidate\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcandidates\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 618\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 619\u001b[1;33m \u001b[0mnode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mismaster\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0misdbgrid\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mres_time\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__try_node\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcandidate\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 620\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__is_primary\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mismaster\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 621\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__is_mongos\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0misdbgrid\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\mongo_client.pyc\u001b[0m in \u001b[0;36m__try_node\u001b[1;34m(self, node)\u001b[0m\n\u001b[0;32m 527\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 528\u001b[0m \u001b[1;31m# Call 'ismaster' directly so we can get a response time.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 529\u001b[1;33m \u001b[0msock_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__socket\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 530\u001b[0m response, res_time = self.__simple_command(sock_info,\n\u001b[0;32m 531\u001b[0m \u001b[1;34m'admin'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\mongo_client.pyc\u001b[0m in \u001b[0;36m__socket\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 660\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstart_request\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 661\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 662\u001b[1;33m \u001b[0msock_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__pool\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_socket\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mhost\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mport\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 663\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0msocket\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwhy\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 664\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdisconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\pool.pyc\u001b[0m in \u001b[0;36mget_socket\u001b[1;34m(self, pair)\u001b[0m\n\u001b[0;32m 265\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrelease\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 266\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 267\u001b[1;33m \u001b[0msock_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfrom_pool\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpair\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 268\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 269\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfrom_pool\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\pool.pyc\u001b[0m in \u001b[0;36mconnect\u001b[1;34m(self, pair)\u001b[0m\n\u001b[0;32m 214\u001b[0m \u001b[0mreturn_socket\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[0mwhen\u001b[0m \u001b[0myou\u001b[0m\u001b[0;31m'\u001b[0m\u001b[0mre\u001b[0m \u001b[0mdone\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mit\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 215\u001b[0m \"\"\"\n\u001b[1;32m--> 216\u001b[1;33m \u001b[0msock\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreate_connection\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpair\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 217\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 218\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0muse_ssl\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\Python27\\lib\\site-packages\\pymongo\\pool.pyc\u001b[0m in \u001b[0;36mcreate_connection\u001b[1;34m(self, pair)\u001b[0m\n\u001b[0;32m 193\u001b[0m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msetsockopt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msocket\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mIPPROTO_TCP\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msocket\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTCP_NODELAY\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 194\u001b[0m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msettimeout\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconn_timeout\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;36m20.0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 195\u001b[1;33m \u001b[0msock\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msa\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 196\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0msock\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 197\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0msocket\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merror\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mKeyboardInterrupt\u001b[0m: " ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "col.count()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create a list of all available specs.\n", "Specs are the parameter sets. \n", "Currently most of the parameters are constants, and only *s*, *U*, and *$\\pi$* are variables." ] }, { "cell_type": "code", "collapsed": false, "input": [ "specs = []\n", "for s in col.distinct('s'):\n", " for U in col.find({'s':s}).distinct('U'):\n", " for pi in col.find({'s':s,'U':U}).distinct('pi'):\n", " specs.append( {'s':s,'U':U,'pi':pi} ) " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get a cursor over the data \n", "Get only the required date - the parameters and *w* which is the _mean fitness after the bottleneck_ (if *b==1* then it is the fitness of the single individual after the bottleneck)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "cur = col.find({},['w','tau','B','genes','epistasis','s','b','U','pop','pi'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Look at a sample record\n", "*d* is some record. We show the parameter values and the plot of the mean fitness. Note that *w* and *_id* do not count as parameters because the first is the fitness time series and the second is the Mongo ID of the record." ] }, { "cell_type": "code", "collapsed": false, "input": [ "d = cur.next()\n", "params = [(k,v) for k,v in d.items() if (k!='_id' and k!='w')]\n", "for k,v in params: print k,'=',v" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "tau = 1\n", "B = 300\n", "genes = 100\n", "pop = 100000000.0\n", "epistasis = 1.0\n", "s = 0.01\n", "b = 1\n", "U = 0.003\n", "pi = 200\n" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "w = d['w']\n", "plot(w);" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD9CAYAAABUS3cAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHT5JREFUeJzt3XtwVGWexvFvczfABEkjIEIAzaQ7EJJWSMhuTHpwTFIi\nt8ERUrWWImN1uUpQArOlU7vi7rpo6Wgw4hhrZJcZhamddRViqZmwbhNEE6IGRBLG4RIYissGGNpg\nwhjM2T9aWgK50pfTl+dTlRq6z+331nGenLznPe+xGIZhICIiMaWf2QWIiEjoKfxFRGKQwl9EJAYp\n/EVEYpDCX0QkBin8RURiULfhf//99zN69GhSU1O7XOexxx5j8uTJ3HLLLezbt8/3fVVVFXa7naSk\nJEpLSwNXsYiI+K3b8F+yZAnvv/9+l8t37tzJ9u3b+eSTT1i5ciUrV670LVu+fDllZWVs3bqVdevW\ncerUqcBVLSIifuk2/G+99VauvfbaLpfX1NRw1113MXLkSAoLC2loaADA4/EAkJOTQ2JiInl5edTU\n1ASwbBER8Ydfff47d+4kJSXF93nUqFEcOHCA2tpabDab7/uUlBSqq6v9OZSIiATQAH82NgyDy2eH\nsFgsfdpHX9cXEREvf2bn8evKPzMzk/r6et/npqYmJk+ezPTp0zvc/N27dy8zZ87scj8Xf4lc+vP0\n0warVl35faT9PPHEE6bXoPapfbHWtlhon7/8Dv8333yT06dPs3HjRux2OwAjRowAvCN+Ghsbqays\nJDMzs0/7TkiA06f9qU5ERLrSbbdPYWEh27Zt49SpU4wfP54nn3yStrY2AFwuFxkZGWRnZzN9+nRG\njhzJ66+/7tu2pKQEl8tFW1sbRUVFWK3WPhVmtYIGCImIBEe34b9p06Yed/D000/z9NNPX/F9bm6u\nb/TP1YiWK3+n02l2CUGl9kWuaG4bRH/7/GUxAtF55E8BFkun/Vf19bBwIfjx+0NEJGp1lZ29FbbT\nOyQkqNtHRCRYwvbKv60N4uLgr3+FfmH7K0pExBxRe+U/cCAMHQrfPSwsIiIBFLbhD+r6EREJlrAP\n/2gY8SMiEm7COvytVoW/iEgwhHX4q9tHRCQ4/JrYLdgSEqCxEU6c6N26AwcGvSQRkagQtkM9Adav\nh8cf73kfra2wdCk8/3yAixMRCVP+DvUM6/Dvrffe8wZ/ZWWAihIRCXNRO86/L5KT4Y9/NLsKEZHI\nERVX/t9+C8OGeW8ODx0aoMJERMKYrvyB/v3hxhvhT38yuxIRkcgQFeEPYLOp60dEpLeiJvyTk+GS\nN0eKiEg3wnqcf1+kpMAjj8CWLV2v89hjcNddoatJRCRcRcUNX/BOAb1nD3S1q82b4dgx+PWv/T6U\niIjp/M3OqLnyHzgQbr656+XnzsEvfhG6ekREwlnU9Pn3RPcERES+FzPhP3o0XLigieJERCCGwt9i\n0XBQEZGLYib8QdNAiIhcFDU3fHvDZoNXXoFPPw38vm+4wTuUVEQkEkTNUM/eOHYM3nor8Ps1DFi5\nEs6ehSFDAr9/EZHLaUrnMGG3w+9/D1Onml2JiMQCTewWJmw2DSUVkcih8A8Qhb+IRJIew7+qqgq7\n3U5SUhKlpaVXLG9ubqa4uJj09HSysrI4cOCAb9nEiROZNm0aDoeDjIyMwFYeZhT+IhJJegz/5cuX\nU1ZWxtatW1m3bh2nLntKatOmTbS1tbFr1y6ef/55fv7zn/uWWSwW3G43dXV17Ny5M/DVhxGFv4hE\nkm6Heno8HgBycnIAyMvLo6amhtmzZ/vW+eCDD1iyZAkAWVlZ7N+/v8M+ouFmbm8kJ0NDA5SUfP/d\nT38K48aZV5OISFe6vfKvra3FZrP5PqekpFBdXd1hnfz8fDZt2kRraytbtmxhz549HDp0CPBe+c+a\nNYv58+ezpbu5lqPAiBHeieMaG70/Gzd6f0REwpHfD3ktWrSIo0ePkpubS3JyMklJSQwePBiAHTt2\nMHbsWBoaGpgzZw4ZGRmMGTPmin2sXr3a92+n04nT6fS3LFM8/vj3/371Vbjs96SIyFVzu9243e6A\n7a/bcf4ejwen00ldXR0Ay5Yto6CgoEO3z6XOnTtHdnY2u3btumLZihUrsNvtPPDAAx0LiJJx/pfb\nvh3+4R/go4/MrkREolFQx/nHx8cD3hE/jY2NVFZWkpmZ2WEdj8fDN998Q0tLC2vWrOH2228HoKWl\nhebmZgCampqoqKigoKDgqguNNBdvAEfh7zURiQI9dvuUlJTgcrloa2ujqKgIq9VKWVkZAC6Xi/r6\neu677z7a29vJysrilVdeAeDkyZMsWLAAgISEBIqLixk/fnwQmxJeRo2Cfv2gqQmuu87sakREOtL0\nDkGUnQ1PPQW5uWZXIiLRRq9xDGM2G7zxBvz5z8E7xrBhMH9+8PYvItFJV/5B9L//C6+9FtxjvPkm\nHDni7WYSkdihWT1jnM3mnababje7EhEJJc3qGeOsVu9NZRGRvlD4R7hRo/RSehHpO4V/hNOVv4hc\nDYV/hNOVv4hcDYV/hNOVv4hcDYV/hNOVv4hcDYV/hNOVv4hcDYV/hNOVv4hcDYV/hNOVv4hcDYV/\nhNOVv4hcDYV/hIuL874z4Ouvza5ERCKJZvWMcBaL9+r/hRe87xHuq+nTYebMwNclIuFNE7tFgZdf\nhvr6vm934oS3yyiArwUVkRDRrJ5y1Y4dA4cDTp40uxIR6SvN6ilXbexYOH9eN4xFYpHCP4ZZLJCS\nAg0NZlciIqGm8I9xdvvV3S8Qkcim8I9xuvIXiU0a6hnjUlLgV7/ydgF1JysL7r47NDWJSPBptE+M\n+/prWL8eLlzoep2TJ+G992D37tDVJSLd01BPCbqWFu8cQl99BQP0t6JIWNBQTwm6uDgYMwYOHTK7\nEhEJFIW/9EpKikYFiUQThb/0isJfJLoo/KVXFP4i0aXH8K+qqsJut5OUlERpaekVy5ubmykuLiY9\nPZ2srCwOHDjQ620lckyZAlu2wK23fv+TkwN79phdmYhcjR5H+zgcDtauXUtiYiL5+fl8+OGHWK1W\n3/JXX32VL774ghdffJGPP/6Y5557jjfffLNX24JG+0QKw4CaGmhr+/67F1+E7GxYvty8ukRilb/Z\n2e3APY/HA0BOTg4AeXl51NTUMHv2bN86H3zwAUuWLAEgKyuL/fv393pbiRwWy5Xz/n/+ufdHRCJP\nt90+tbW12Gw23+eUlBSqq6s7rJOfn8+mTZtobW1ly5Yt7Nmzh0OHDvVqW4lsug8gErn8fmRn0aJF\nHD16lNzcXJKTk0lKSmLw4MF92sfq1at9/3Y6nTidTn/LkhCYMgX27vV2CfU0PYSI+MftduMO4JuX\nuu3z93g8OJ1O6urqAFi2bBkFBQVddt2cO3eO7Oxsdu3axdmzZ/nRj37U47bq849chuF9heQXX3gf\nAhOR0AnqE77x8fGAd9ROY2MjlZWVZGZmdljH4/HwzTff0NLSwpo1a7j99tsBGPHdC2W721Yim8Xi\nvfrftcv7i0BEIkeP3T4lJSW4XC7a2tooKirCarVSVlYGgMvlor6+nvvuu4/29naysrJ45ZVXut1W\nosvf/i3ceSc8/jj88z+bXY2I9JYmdhO/bdkCr7wC775rdiUisUMTu4npLt74FZHIoSt/8Vt7Owwf\nDidOeP9XRIJPV/5iun79wGbTmH+RSKLwl4CYMkXhLxJJ9F4mCYgpU+C//9v7V0BXfvxjGDcudDWJ\nSNcU/hIQc+ZAQwN88EHnyxsavD9PPx3aukSkc7rhKyHx1lveF8WXl5tdiUh00A1fiQgaDioSXnTl\nLyHx7bfeYaD/938wbJjZ1YhEPl35S0To3x9++ENvv7+ImE/hLyEzdaq6fkTChcJfQiY1Ff7xH+Gp\np8yuRETU5y8h8/XXsHkz/Nu/ed8BICJXz9/sVPhLSJ0/D9deCx4PDBpkdjUikUs3fCWiDBkCiYnw\n5ZdmVyIS2xT+EnJTp6rbR8RsCn8JOYW/iPk0t4+E3NSp8NJLsG2b2ZV43XKLHjyT2KMbvhJyx47B\nPffAhQtmVwKNjVBUBMXFZlci0jf+Zqeu/CXkrr8e/ud/zK7C69e/hg8/NLsKkdBTn7/EtNRU2LPH\n7CpEQk/dPhLTmpth9Gjv//bvb3Y1Ir2ncf4ifhg+3Bv+Bw+aXYlIaCn8JeZp6KnEInX7SMz7xS/g\n9ddhzJjvv0tIgHfe6f6dxCJm0tw+In5qbob6+o7fLVgAO3bApEnm1CTSEw31FPHT8OGQmdnxu7Q0\nb1eQwl+ilf6oFenE1KkaAirRrcfwr6qqwm63k5SURGlp6RXLW1tbuffee3E4HOTm5rJ582bfsokT\nJzJt2jQcDgcZGRmBrVwkiFJTdRNYoluP3T7Lly+nrKyMxMRE8vPzKSwsxGq1+pZv2LCBoUOHUldX\nx+HDh5k1axZz587FYrFgsVhwu92MHDkyqI0QCbTUVHj2WbOrEAmebsPf4/EAkJOTA0BeXh41NTXM\nnj3bt058fDzNzc20tbVx5swZ4uLisFgsvuW6mSuRyG6H/fvhtdfgkv+cr/CDH8Bdd4WuLpFA6Tb8\na2trsdlsvs8pKSlUV1d3CP/CwkLKy8uxWq1cuHCBjz76yLfMYrEwa9YsJk2axP3338/cuXOD0ASR\nwBsyBB57zDvipzu//z1kZcG4caGpSyRQ/B7t89JLLzFgwACOHz/Onj17uPPOOzly5AgWi4UdO3Yw\nduxYGhoamDNnDhkZGYy5dDD1d1avXu37t9PpxOl0+luWiN/+6Z96XufwYe+NYYW/BJvb7cbtdgds\nf92O8/d4PDidTurq6gBYtmwZBQUFHa787777bpYuXUp+fj4AmZmZbNiwocNfDAArVqzAbrfzwAMP\ndCxA4/wlgj36qHeW0lWrzK5EYk1Q5/aJj48HvCN+GhsbqaysJPOyAdG33XYb5eXltLe3c/DgQc6c\nOYPNZqOlpYXm5mYAmpqaqKiooKCg4KoLFQlHmhVUIlWP3T4lJSW4XC7a2tooKirCarVSVlYGgMvl\nYvHixdTX1zN9+nRGjRrF2rVrAThx4gQ/+clPAEhISKC4uJjx48cHsSkioZeaCi++aHYVIn2n6R1E\n/NDSAlYreDwwcKDZ1Ugs0ZTOIiaKi4MbboA//cnsSkT6RuEv4qfUVPj8c7OrEOkbhb+In6ZN001f\niTwKfxE/acSPRCKFv4if1O0jkUijfUT89O23EB8Px4555/oRCQWN9hExWf/+3vn/b73VO8/Pf/2X\n2RWJ9ExX/iIB8Oc/w9Gj8P77cPAg/Pa3Zlck0U6vcRQJA+PHe38GDYIlS8yuRqRnuvIXCaDWVhg5\n0vvE76BBZlcj0Ux9/iJh5JprYOJE2LfP7EpEuqfwFwmwadM09FPCn/r8RQIsLQ1eegl27rxymcUC\nxcUwYULo6xK5lMJfJMDuvReGDet82ZtvwjvvwN//fWhrErmcwl8kwMaNg6Kizpf16we7d4e2HpHO\nqM9fJITS0nQ/QMKDhnqKhNDZs97nATwe718BIldLQz1FIsiIEZCQ4H0KWMRMuvIXCbG5c71v/0pJ\n+f678eNh3jzzapLI4292KvxFQqyyEjZv/v6zYcCGDd6uoP79zatLIovCXyQKTJ4M770HyclmVyKR\nQn3+IlEgLU1DQCW0FP4iYUDhL6Gm8BcJAwp/CTU94SsSBtLS4JNP4D//s+t1Jk6EjIyQlSRRTjd8\nRcJAezs8+CD85S+dL//rX+Gzz7xvDBMBjfYRiQnt7d6XxB8+7H1ZjIhG+4jEgH79dF9AAqvH8K+q\nqsJut5OUlERpaekVy1tbW7n33ntxOBzk5uay+ZKnV3raVkR6T+EvgdRj+C9fvpyysjK2bt3KunXr\nOHXqVIflGzZsYOjQodTV1fGb3/yGFStW+P4U6WlbEem99HTYtcvsKiRadDvax+PxAJCTkwNAXl4e\nNTU1zJ4927dOfHw8zc3NtLW1cebMGeLi4rBYLL3aVkR6Ly0Nnn0W3n47tMe1WiE7O7THlODrNvxr\na2ux2Wy+zykpKVRXV3cI8MLCQsrLy7FarVy4cIGPP/6419uKSO9NmwbTp8N//Edoj1tR4R2FNGRI\naI8rweX3OP+XXnqJAQMGcPz4cfbs2cPs2bM5fPhwn/axevVq37+dTidOp9PfskSizpAhsHFj6I87\nbRrU18PNN4f+2PI9t9uN2+0O2P66Df8ZM2awatUq3+e9e/dSUFDQYZ2qqiqWLl1KXFwcmZmZXH/9\n9Xz55Ze92vaiS8NfRMLLxXsNCn9zXX5h/OSTT/q1v25v+MbHxwPegG9sbKSyspLMzMwO69x2222U\nl5fT3t7OwYMHOXPmDDabrVfbikj4043m6NRjt09JSQkul4u2tjaKioqwWq2UlZUB4HK5WLx4MfX1\n9UyfPp1Ro0axdu3abrcVkciSnh76m8wSfHrCV0S6dfo0TJoE313zhYTVCrffHrrjRSJN7yAiQVdc\nDMeOhe54b78NTU0wbFjojhlpFP4iEnWmT4fSUsjKMruS8KW5fUQk6jgcUFdndhXRTeEvImFHI4yC\nT+EvImFHV/7Bpz5/EQk7zc0wejT867+G/tj9+8PPfgZDh4b+2H3hb3bqNY4iEnaGD4d/+Rc4ejT0\nx37nHZg8GebMCf2xQ0nhLyJhqbjYnOMOGuTtcor28Fefv4jIJWLlfoPCX0TkEgp/EZEYdNNN3ikt\nzpwxu5LgUviLiFyiXz/vW9N+9jP49FOzqwkehb+IyGVeeAEGD4bf/tbsSoJH4S8icpkZM7xX/p99\nZnYlwaOHvEREOnHmjHcq67/8xdsVFG40sZuISBCMHAkjRsDBg2ZXEhwKfxGRLtx8c/R2/ajbR0Sk\nC089BevWwXXXhe6YTzwBCxb0vJ5e5iIiEiStrfDHP4bueJs3Q2Mj/Pu/97yuJnYTEQmSa67xvlsg\nVC5cgKVLQ3MsXfmLiISJ8+e9N5rPnIEhQ7pfV6N9RESixJAh8MMfwp49wT+Wwl9EJIzcfDNs3w5N\nTd57DsGi8BcRCSM//jGsWQPJyfA3fxO846jPX0QkDH3zjfchs1OnIC7uyuXq8xcRiUKDBkFKCuze\nHZz9K/xFRMJUMJ8wVviLiISpW24J3jsF1OcvIhKmamth0SJYtcr7uV8/KCyEH/wgBH3+VVVV2O12\nkpKSKC0tvWL5c889h8PhwOFwkJqayoABAzh79iwAEydOZNq0aTgcDjIyMq66SBGRWJSWBvPnw+ef\ne39KSmDLlsDsu8crf4fDwdq1a0lMTCQ/P58PP/wQq9Xa6brvvPMOJSUlbN26FYBJkybx6aefMnLk\nyK4L0JW/iEivPPMMnDjhfdNYUK/8PR4PADk5OSQmJpKXl0dNTU2X62/cuJHCwsIO3ynYRUQCI5D3\nALoN/9raWmw2m+9zSkoK1dXVna7b0tJCRUUFCxcu9H1nsViYNWsW8+fPZ0ug/lYREYlRN98Mu3ZB\ne7v/+wrYrJ7l5eVkZ2czYsQI33c7duxg7NixNDQ0MGfOHDIyMhgzZswV265evdr3b6fTidPpDFRZ\nIiJRwe1243a76d8fior831+3ff4ejwen00ldXR0Ay5Yto6CggNmzZ1+x7oIFC1i0aBGLFy/udF8r\nVqzAbrfzwAMPdCxAff4iIr320596bwL/3d8F+WUuF2/4TpgwgYKCgk5v+Ho8HiZPnszRo0e55ppr\nAG830Lfffsvw4cNpamrC6XTy/vvvM378+I4FKPxFRHrt4EHvtA8JCUF+mUtJSQkul4u2tjaKioqw\nWq2UlZUB4HK5AHj77bfJz8/3BT/AyZMnWfDdu8gSEhIoLi6+IvhFRKRvJk8OzH70kJeISATSxG4i\nItJnCn8RkRik8BcRiUEKfxGRGKTwFxGJQQp/EZEYpPAXEYlBCn8RkRik8BcRiUEKfxGRGKTwFxGJ\nQQp/EZEYpPAXEYlBCn8RkRik8BcRiUEKfxGRGKTwFxGJQQp/EZEYpPAXEYlBCn8RkRik8BcRiUEK\nfxGRGKTwFxGJQQp/EZEYpPAXEYlBCn8RkRik8BcRiUEKfxGRGNRj+FdVVWG320lKSqK0tPSK5c89\n9xwOhwOHw0FqaioDBgzg7Nmzvdo2FrjdbrNLCCq1L3JFc9sg+tvnrx7Df/ny5ZSVlbF161bWrVvH\nqVOnOixfuXIldXV11NXVsWbNGpxOJyNGjOjVtrEg2v8DVPsiVzS3DaK/ff7qNvw9Hg8AOTk5JCYm\nkpeXR01NTZfrb9y4kcLCwqvaVkREQqfb8K+trcVms/k+p6SkUF1d3em6LS0tVFRUsHDhwj5vKyIi\noTUgUDsqLy8nOzvb1+XTFxaLJVBlhKUnn3zS7BKCSu2LXNHcNoj+9vmj2/CfMWMGq1at8n3eu3cv\nBQUFna77u9/9ztfl05dtDcPoc9EiIuKfbrt94uPjAe+oncbGRiorK8nMzLxiPY/HQ1VVFfPmzevz\ntiIiEno9dvuUlJTgcrloa2ujqKgIq9VKWVkZAC6XC4C3336b/Px8rrnmmh63FRGRMGCYZNu2bYbN\nZjNuuukm48UXXzSrjIBKTEw0UlNTjfT0dGPGjBmGYRjGV199ZcydO9cYP368MW/ePKO5udnkKntv\nyZIlxnXXXWdMnTrV91137Vm7dq1x0003GXa73di+fbsZJfdaZ2174oknjHHjxhnp6elGenq68e67\n7/qWRVLbDMMwjhw5YjidTiMlJcXIzc013njjDcMwouf8ddW+aDmHra2tRkZGhpGWlmZkZmYazz//\nvGEYgT1/poV/enq6sW3bNqOxsdFITk42mpqazColYCZOnGicPn26w3fPPPOM8fDDDxvnz583Hnro\nIePZZ581qbq+q6qqMj777LMOAdlVe06ePGkkJycbhw8fNtxut+FwOMwqu1c6a9vq1auNX/7yl1es\nG2ltMwzDOH78uFFXV2cYhmE0NTUZkyZNMr766quoOX9dtS+azuHXX39tGIZhnD9/3pgyZYrx5Zdf\nBvT8mTK9QzQ/A2BcdgN7586dLF26lMGDB3P//fdHVDtvvfVWrr322g7fddWempoaCgoKmDBhArm5\nuRiGQXNzsxll90pnbYPOByBEWtsAxowZQ3p6OgBWq5UpU6ZQW1sbNeevq/ZB9JzDuLg4AM6dO8eF\nCxcYPHhwQM+fKeEfrc8AWCwWZs2axfz589myZQvQsa02m42dO3eaWaLfumpPTU0Ndrvdt15ycnJE\ntrW0tJSZM2fyzDPP+P7Ps3Pnzohu2/79+9m7dy8ZGRlRef4utu/igJJoOYft7e2kpaUxevRoHn74\nYSZMmBDQ86eJ3QJox44d7N69mzVr1rBixQpOnDgRdUNZ+9KeSHt+48EHH+TQoUNUVFRw4MAB38CG\nztocKW1rbm5m0aJFvPDCCwwbNizqzt+l7Rs6dGhUncN+/fqxe/du9u/fz8svv0xdXV1Az58p4T9j\nxgz27dvn+7x3715mzpxpRikBNXbsWADsdjtz586lvLycGTNm0NDQAEBDQwMzZswws0S/ddWezMxM\n6uvrfevt27cv4tp63XXXYbFYiI+P56GHHuKtt94CIrdtbW1tLFy4kHvuucc3DDuazl9n7Yu2cwgw\nceJE7rjjDmpqagJ6/kwJ/2h8BqClpcX3J2ZTUxMVFRUUFBSQmZnJ+vXraW1tZf369RH/S66r9mRk\nZFBRUcGRI0dwu93069eP4cOHm1xt3xw/fhyACxcusHHjRu644w4gMttmGAZLly5l6tSpPPLII77v\no+X8ddW+aDmHp06d8s2OfPr0af7whz8wb968wJ6/gN6e7gO3223YbDbjxhtvNNauXWtWGQFz8OBB\nIy0tzUhLSzNmzZplvPbaa4ZhRPZQz8WLFxtjx441Bg0aZNxwww3G+vXru21PSUmJceONNxp2u92o\nqqoysfKeXWzbwIEDjRtuuMF47bXXjHvuucdITU01brnlFuPRRx/tMHIrktpmGIaxfft2w2KxGGlp\nab5hj++9917UnL/O2vfuu+9GzTn8/PPPDYfDYUybNs3Iy8szNmzYYBhG93nS1/ZZDCPKOqVFRKRH\nuuErIhKDFP4iIjFI4S8iEoMU/iIiMUjhLyISgxT+IiIx6P8B2Noc8gqcm7UAAAAASUVORK5CYII=\n" } ], "prompt_number": 16 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert a single record to a data frame\n", "First we convert a *dict* with a *list* value in key *w* to a *list* of *dict*s, each with a single value for key *w*." ] }, { "cell_type": "code", "collapsed": false, "input": [ "time_series = [None]*len(w) \n", "for t in range(len(w)):\n", " time_point = {'w':w[t]}\n", " time_point.update(params)\n", " time_series[t] = time_point" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now to convert this to a data frame using the [pandas](http://pandas.pydata.org/) package (current version 0.9.0). The relevant doc for this usage is [here](http://pandas.pydata.org/pandas-docs/stable/dsintro.html#from-a-list-of-dicts)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "print \"pandas version\", pd.__version__" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "pandas version 0.9.0\n" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "df = pd.DataFrame(time_series)\n", "df.to_csv(\"test.csv\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 19 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert the entire record set to a data frame\n", "Start by writing a function that does what we did before - convert a record from MongoDB to a *list* of *dict*s." ] }, { "cell_type": "code", "collapsed": false, "input": [ "def record_to_data_list_of_dicts(record):\n", " params = [(k,v) for k,v in record.items() if (k!='_id' and k!='w')]\n", " time_series = [None]*len(w) \n", " for t in range(len(w)):\n", " time_point = {'w':w[t],'t':t+1}\n", " time_point.update(params)\n", " time_series[t] = time_point\n", " return time_series" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 20 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Next, here is a function that takes a cursor and converts all the records to one big *list* of *dict*. Use a *limit* of 10 when testing." ] }, { "cell_type": "code", "collapsed": false, "input": [ "def cursor_to_data_frame(cur):\n", " time_series = []\n", " for record in cur:\n", " time_series.extend( record_to_data_list_of_dicts(record) )\n", " return time_series" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 21 }, { "cell_type": "markdown", "metadata": {}, "source": [ "This function creates a cursor of just 10 records, for testing, and converts it to a *DataFrame* and saves it to *csv*. \n", "\n", "The *csv* file can be opened in *R* with the following:\n", "
\n",
      "data<-read.csv('test.csv')\n",
      "dim(data) # should be 3000   12\n",
      "
\n", "\n", "If the filename ends with *.gz* the file will be compressed with *gzip* - this is transparent to R and sometimes allows R to load the file _faster_ compared to a non-compressed *csv* file." ] }, { "cell_type": "code", "collapsed": false, "input": [ "def mongo_to_csv(csv_fname, limit):\n", " col = ma_collection()\n", " cur = col.find({},['w','tau','B','genes','epistasis','s','b','U','pop','pi'], limit=limit)\n", " time_series = cursor_to_data_frame(cur) \n", " cur.close()\n", " df = pd.DataFrame(time_series)\n", " if csv_fname.endswith(\".csv\"):\n", " df.to_csv(csv_fname, index=False)\n", " elif csv_fname.endswith(\".csv.gz\"):\n", " import gzip\n", " fout = gzip.open(csv_fname, 'wb')\n", " df.to_csv(fout, index=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "mongo_to_csv(\"test.csv\", 10)\n", "mongo_to_csv(\"test.csv.gz\", 10)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's it, all the functions are in place, the above is a test case. To run on all the data just use:\n", "\n", "
\n",
      "mongo_to_csv(\"ma_output.csv.gz\", 0)\n",
      "
\n", "\n", "Note: *limit=0* gets all the records." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And you can run this in R to plot a certain fitness mean:\n", "\n", "
\n",
      "df <- read.csv('ma_output.csv.gz')\n",
      "data <- ddply(df, .(s,U,pi,t), summarize, mean.w = mean(w), median.w = median(w))\n",
      "subdata <-subset(data,s==0.01 & U==0.003 & pi==0)\n",
      "plot(mean.w~t, data=subdata)\n",
      "
" ] } ], "metadata": {} } ] }