{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## The next cell will get a ~65 MB data file 'sequence.index', you only need to run the cell once" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2016-02-05 15:45:59-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/historical_data/former_toplevel/sequence.index\n", " => 'sequence.index'\n", "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n", "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n", "Logging in as anonymous ... Logged in!\n", "==> SYST ... done. ==> PWD ... done.\n", "==> TYPE I ... done. ==> CWD (1) /vol1/ftp/historical_data/former_toplevel ... done.\n", "==> SIZE sequence.index ... 67069489\n", "==> PASV ... done. ==> RETR sequence.index ... done.\n", "Length: 67069489 (64M) (unauthoritative)\n", "\n", "sequence.index 100%[=====================>] 63.96M 562KB/s in 2m 31s \n", "\n", "2016-02-05 15:48:34 (434 KB/s) - 'sequence.index' saved [67069489]\n", "\n" ] } ], "source": [ "!rm sequence.index 2>/dev/null\n", "!wget -nd ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/historical_data/former_toplevel/sequence.index -O sequence.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Interfacing with R" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda/lib/python3.4/importlib/_bootstrap.py:321: FutureWarning: The pandas.rpy module is deprecated and will be removed in a future version. We refer to external packages like rpy2. \n", "See here for a guide on how to port your code to rpy2: http://pandas.pydata.org/pandas-docs/stable/r_interface.html\n", " return f(*args, **kwds)\n" ] } ], "source": [ "import os\n", "\n", "from IPython.display import Image\n", "\n", "import rpy2.robjects as robjects\n", "import rpy2.robjects.lib.ggplot2 as ggplot2\n", "from rpy2.robjects.functions import SignatureTranslatedFunction\n", "\n", "import pandas as pd\n", "import pandas.rpy.common as pd_common" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "read_delim = robjects.r('read.delim')\n", "seq_data = read_delim('sequence.index', header=True, stringsAsFactors=False)\n", "#In R:\n", "# seq.data <- read.delim('sequence.index', header=TRUE, stringsAsFactors=FALSE)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This data frame has 26 columns and 187720 rows\n", " [1] \"FASTQ_FILE\" \"MD5\" \"RUN_ID\" \n", " [4] \"STUDY_ID\" \"STUDY_NAME\" \"CENTER_NAME\" \n", " [7] \"SUBMISSION_ID\" \"SUBMISSION_DATE\" \"SAMPLE_ID\" \n", "[10] \"SAMPLE_NAME\" \"POPULATION\" \"EXPERIMENT_ID\" \n", "[13] \"INSTRUMENT_PLATFORM\" \"INSTRUMENT_MODEL\" \"LIBRARY_NAME\" \n", "[16] \"RUN_NAME\" \"RUN_BLOCK_NAME\" \"INSERT_SIZE\" \n", "[19] \"LIBRARY_LAYOUT\" \"PAIRED_FASTQ\" \"WITHDRAWN\" \n", "[22] \"WITHDRAWN_DATE\" \"COMMENT\" \"READ_COUNT\" \n", "[25] \"BASE_COUNT\" \"ANALYSIS_GROUP\" \n", "\n", "Columns in Python 26 \n", "Type of read count before as.integer: character\n", "Type of read count after as.integer: integer\n", " [1]\n", " \"Column names in R: \"\n", " \"FASTQ_FILE\" \n", " \"MD5\" \n", "\n", "\n", " [4]\n", " \"RUN_ID\" \n", " \"STUDY_ID\" \n", " \"STUDY_NAME\" \n", "\n", "\n", " [7]\n", " \"CENTER_NAME\" \n", " \"SUBMISSION_ID\" \n", " \"SUBMISSION_DATE\" \n", "\n", "\n", "[10]\n", " \"SAMPLE_ID\" \n", " \"SAMPLE_NAME\" \n", " \"POPULATION\" \n", "\n", "\n", "[13]\n", " \"EXPERIMENT_ID\" \n", " \"INSTRUMENT_PLATFORM\"\n", " \"INSTRUMENT_MODEL\" \n", "\n", "\n", "[16]\n", " \"LIBRARY_NAME\" \n", " \"RUN_NAME\" \n", " \"RUN_BLOCK_NAME\" \n", "\n", "\n", "[19]\n", " \"INSERT_SIZE\" \n", " \"LIBRARY_LAYOUT\" \n", " \"PAIRED_FASTQ\" \n", "\n", "\n", "[22]\n", " \"WITHDRAWN\" \n", " \"WITHDRAWN_DATE\" \n", " \"COMMENT\" \n", "\n", "\n", "[25]\n", " \"READ_COUNT\" \n", " \"BASE_COUNT\" \n", " \"ANALYSIS_GROUP\" \n", "\n", "\n" ] }, { "data": { "text/plain": [ "\n", "[ 27, 27, 27, ..., 25, 25, 25]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print('This data frame has %d columns and %d rows' % (seq_data.ncol, seq_data.nrow))\n", "print(seq_data.colnames)\n", "#In R:\n", "# print(colnames(seq.data))\n", "# print(nrow(seq.data))\n", "# print(ncol(seq.data))\n", "\n", "print('Columns in Python %d ' % robjects.r.ncol(seq_data)[0])\n", "\n", "#access some functions\n", "as_integer = robjects.r('as.integer')\n", "match = robjects.r.match\n", "\n", "my_col = match('READ_COUNT', seq_data.colnames)[0] # Vector returned\n", "print('Type of read count before as.integer: %s' % seq_data[my_col - 1].rclass[0])\n", "seq_data[my_col - 1] = as_integer(seq_data[my_col - 1])\n", "print('Type of read count after as.integer: %s' % seq_data[my_col - 1].rclass[0])\n", "\n", "my_col = match('BASE_COUNT', seq_data.colnames)[0] # Vector returned\n", "seq_data[my_col - 1] = as_integer(seq_data[my_col - 1])\n", "\n", "my_col = match('CENTER_NAME', seq_data.colnames)[0]\n", "seq_data[my_col - 1] = robjects.r.toupper(seq_data[my_col - 1])\n", "robjects.r.assign('seq.data', seq_data)\n", "robjects.r('print(c(\"Column names in R: \",colnames(seq.data)))')\n", "\n", "robjects.r('seq.data <- seq.data[seq.data$WITHDRAWN==0, ]')\n", "#Lets remove all withdrawn sequences\n", "\n", "robjects.r(\"seq.data <- seq.data[, c('STUDY_ID', 'STUDY_NAME', 'CENTER_NAME', 'SAMPLE_ID', 'SAMPLE_NAME', 'POPULATION', 'INSTRUMENT_PLATFORM', 'LIBRARY_LAYOUT', 'PAIRED_FASTQ', 'READ_COUNT', 'BASE_COUNT', 'ANALYSIS_GROUP')]\")\n", "#Lets shorten the dataframe\n", "\n", "#Population as factor\n", "robjects.r('seq.data$POPULATION <- as.factor(seq.data$POPULATION)')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAMAAABKCk6nAAACu1BMVEUAAAABAQECAgIDAwMEBAQF\nBQUGBgYHBwcICAgJCQkKCgoLCwsMDAwNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcY\nGBgZGRkaGhobGxscHBwdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUmJiYnJycoKCgpKSkqKior\nKyssLCwvLy8wMDAxMTEyMjIzMzM1NTU2NjY3Nzc5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9AQEBBQUFC\nQkJDQ0NERERGRkZHR0dMTExNTU1PT09QUFBSUlJTU1NUVFRVVVVXV1dYWFhaWlpbW1tdXV1eXl5f\nX19hYWFiYmJkZGRmZmZnZ2doaGhpaWlqampra2tsbGxtbW1ubm5vb29wcHBxcXFycnJzc3N1dXV2\ndnZ4eHh5eXl6enp7e3t8fHx9fX1+fn5/f3+AgICBgYGCgoKDg4OEhISFhYWGhoaHh4eIiIiJiYmK\nioqLi4uMjIyNjY2QkJCRkZGSkpKTk5OUlJSVlZWWlpaXl5eYmJiZmZmbm5ucnJydnZ2enp6fn5+g\noKChoaGioqKjo6OlpaWmpqanp6eoqKipqamqqqqrq6usrKytra2urq6vr6+wsLCxsbGysrKzs7O0\ntLS1tbW2tra3t7e4uLi5ubm6urq7u7u8vLy9vb2+vr6/v7/AwMDBwcHCwsLDw8PExMTFxcXGxsbH\nx8fIyMjJycnKysrLy8vMzMzNzc3Ozs7Pz8/Q0NDR0dHS0tLT09PU1NTV1dXW1tbX19fY2NjZ2dna\n2trb29vc3Nzd3d3e3t7f39/g4ODh4eHi4uLj4+Pk5OTl5eXm5ubn5+fo6Ojp6enq6urr6+vs7Ozt\n7e3u7u7v7+/w8PDx8fHy8vLz8/P09PT19fX29vb39/f4+Pj5+fn6+vr7+/v8/Pz9/f3+/v7///8e\nabnDAAARcUlEQVR4nO3ci39cZZnA8ViRVGQvLNSVW11QWXe94boXXFG833HFdd2qK+4Fy8qmpY00\nsdaCaOPWQLVELpWuiJQEq23XQkXtBi8UaoX0tLl1kpncZpKZef+MPZOeJ21Ozpk8bzovc+b09/vw\nOUx63jnvO+fbzC3pNBlKdU31XgC5DeCUB3DKAzjlAZzyAE55AKc8gFMewCkP4JQHcMoDOOUBnPJq\nADw1Elchds+CJsbUQ7PxEy5oKqseOj6hHjqa169gMqcempvUHzY/Grur5sA5Ly5zLHZXuIlR9dDh\nafVQrzioHpobVw/tL+tXkM+oh2by+sOW+2N3ARzd2Qacua/FmL1fXffVp0ymo7VjJGILsKLEAt8z\n7APv6y8eaTPbewrdXRFbgBUlFtiYltntkQ6zYczk2iK2ACtKOvAznaNmTckU10Zs/b67adOmfcW4\nTOyeBZVL6qGlsv6wxtFh9UNd3bDYPWM2wOW9h2eMaRsz2faIrd+Q/zfm+GBcZih2V7ipMfXQkRn1\n0MFiRj10fFI9dLisX0Ehqx6aLegPWx6O29NvA9z9VOVCV3dh146ILXfRihJ7F91SaU1lkx3d2tqZ\nMwu3ACtKLLA+gKsF8MkABlgC2ANYAhhgCWCAJYBlKoABDgIYYKkmwG/WFX1lgGUqgAEOAhhgCWAP\nYAlggAEGOBzAMhXAAAcBDLAEsAewBDDAAAMcDmCZCmCAgwAGWALYA1gCGGCAAQ4HsEwFMMBBAAMs\nAewBLAEMMMAAhwNYpgIY4CCAAZYA9gCWAAYYYIDDASxTAQxwEMAJBj4el+mP3RVuMqseemJaPfR4\ncThmhxI4+sqDZf0K8iPqoSN5/WHLA3F7jtUeOPYTMxP8YaRK4OgrJ/rDSI/XHjj+zoK76FTcRcdP\nBTDAQQADLAHsASwBDDDAAIcDWKYCGOAggAGWAPYAlgAGGGCAwwEsUwEMcBDAAEsAewBLAAMMMMDh\nAJapAAY4CGCAJYA9gCWAAQYY4HAAy1QAAxwEMMASwB7AEsAAAwxwOIBlKoABDgIYYAlgD2AJYIAB\nBjgcwDIVwAAHAQywBLAHsARwdeBjW1o7sybT0doxErEFWFGygbccyD98r9neU+juitgCrCjZwOtn\nzIkvmw1jJtcWsfXLT05OZmI/MZMPI036h5F+42fTe9aZNSVTXBux9fvvlpaWnqqHSGhK4HovcwlN\nzP+yOnDf5tse3Wzaxky2PWIbxF10tZJ9F31wsLD3UdPVXdi1I2ILsKJkA3e3fen+ghnd2tqZi9gC\nrCjZwKoArhbAJwMYYAlgD2AJYIABBjgcwDIVwAAHAQywBLAHsAQwwAADHA5gmQpggIMABlgC2ANY\nAhhggAEOB7BMBTDAQQADLAHsASwBDDDAAIcDWKYCGOAggAGWAPYAlgAGGGCAwwEsUwEMcBDAAEsA\newBLAAMMMMDhAJapAAY4CGCAJYA9gCWAAQZ4UeDBuMxQ7K5wU2PqoSMz6qGDxUzMDiVw9JWHy/oV\nFLLqodmC/rDl4bg9/bUHPhaXOR67K9xkVj10eFo99FhxKGaHEjj6ygNl/QryI+qhI3n9YcsDsbtq\nDxx/Z8FddCruouOnAhjgIIABlgD2AJYABhhggMMBLFMBDHAQwABLAHsASwADDDDA4QCWqQAGOAhg\ngCWAPYAlgAEGGOBwAMtUAAMcBDDAEsAewBLAAAMMcDiAZSqAAQ4CGGAJYA9gCWCAAQY4HMAyFcAA\nBwEMsASwt2TgXoDTDTz71bE/AjidwNde0lTpwu8AnE5gY66zowV48ZIFvKQArlaygLe9clnlThrg\ntAJfdF/BDhfgxUoW8IW5+XsLD9zW/lOT6WjtGInYAqwoWcDf+veheV8/srsw+rzZ3lPo7orYAqwo\nWcAXvqRp3mPwpl1fuqPPbBgzubaIrZ93+PDho8NxmROxu8Llx9VDszPqocOl0ZgdSuDoK2fK+hVM\n59RDx6b1hy1n4vYMVAOemb/TrNs3feBOs6Zkimsjtn4PbtmyZf90XCZ2z4JKRfXQmbL+sOWZmB1K\n4JgVWNywsv6GFS1umIm7YdOhR9moZ8zluUvtJ0xhvWkbM9n2iC130YqSdRc9ewd9zgVzX9+3d/qJ\nDtPVXdi1I2ILsKJkAVfq/7e75y7n7m7d0m9Gt7Z25iK2ACtKHrDJXrTwz6oFcLWSBzx9L8CpBa48\nBC+77G5jFcDVShbwkgK4WgCfDOAXBPj3H73k3EtvOA5wWoGvuWWgOPAf7wQ4rcDnj1fA/gDgtAJf\ne3N/ceAL7wA4rcB9H7vYfwwODQE4XOMCLymAqwXwyQB+QYC/91p/c9WDAKcVeMWP/c1PXwFwWoFf\n1ZktnbjjSoDTCtz7nj8556L3/RbgtAIvKYCrBfDJAAZYAtgDWAIYYIABDgewTAUwwEEAAywB7AEs\nAQwwwACHA1imAhjgIIABlgD2AJYABhhggMMBLFMBDHAQwABLAHsASwADDDDA4QCWqQBOBXDsR6Ly\nabNJ+7TZpQHH/13iOzgV38HxUwEMcBDAAEsAewBLAAMMMMDhAJapAAY4CGCAJYA9gCWAAQYY4HAA\ny1QAAxwEMMASwB7AEsAAAwxwOIBlKoABDgIYYAlgD2AJYIABBjgcwDIVwAAHAQywBLAHsAQwwAAD\nHA5gmQpggIMABlhKCfAZrQDgIIABBhjgcADLVAADHAQwwBLAHsASwACf1cC7Nq67/Tcm09HaMRKx\nBVhRsoH3DxWfaTPbewrdXRFbgBUlG9iY0tN3mg1jJtcWsfU7uGfPnkPZuEwudle46Sn10Imiemi2\nNB6zQ3l6o6+cK+tXMDPpYgXZcuypHbYDbmnrM2tKprg2Yuu3d+fOnb0TcZnYPQuaKaiH5kv6w5an\nYnYoT2/0lSctblgx72IFE2YybsKM7XfwZtM2ZrLtEdsg7qKrley76CczpUMbTVd3YdeOiC3AipIN\n/NDGWzc/bUa3tnbmIrYAK0o2sCqAqwXwyQAGWALYA1gCGGCAAQ4HsEwFMMBBAAMsAewBLAEMMMAA\nhwNYpgIY4CCAAZYA9gCWAAYYYIDDASxTAQxwEMAASwB7AEsAAwwwwOEAlqkABjgIYIAlgD2AJYAB\nBhjgcADLVAADHAQwwBLAHsASwAADDHA4gGUqgAEOAhhgCWAPYAlggAFeFHg4LnMidle4/Lh6aHZG\nPXS4NBqzQ3l6o6+cKetXMB13es5oBcPlTNyEA7UHjv+7xHdwKr6D46cCGOAggAGWAPYAlgAGGGCA\nwwEsUwEMcFAs8JmdB4ABBhjgcADLVAADHFQLYJtTdkanF2CZCmCAgwC2WgHAi52yMzq9AMtUAAMc\nBLDVCgBe7JSd0el9oYHtFguw/TkLBbBMBTDAQQDX6IYBDDDAC85ZKIBlKoABDgK4RjcMYIABXnDO\nQgEsUwEMcBDANbphAAMM8IJzFgpgmQpggIMArtENAxhggBecs1AAy1QAn53ANgt2dB5sxi4IYJkK\nYIAXW7Cj82AzdkEAy1RnG7CjFdgtthbAmY7WjhGAk3jDagO8vafQ3XW2ADfUCmoEvGHM5NoATuAK\nagS8pmSKaysX9u7cubN3YjblKiyHNtZh67+CiYlMLYDbxky2vXLh4J49ew5l4zK52F3hpqfUQyeK\n6qHZ0rh6aL6gHpor61cwM6keOjmjP2w59tQO1wK4q7uwa4d80ZAfo7Sw3Lh6aE1eJi0sk9cf1vE/\nPhvd2tqZA1hTYwLPC+BqAXwygAGWAPYAlgAGWAIYYAlgmQpggIMABlgC2ANYAhhgCWCAJYBlKoAB\nDgK4AYGP9hxVL/jJX6mHPrtPPdTb8zv10N5fqof+/kf6FTz+tHrobx/XH/ZHz8XuqjlwbKWWgnrs\nA/vVQ498Xb+ETcfUQ3f/QD0026pfwbZe9dDebfrDrs8tPmY2gCWArQPYpBz4rmn12L2/UQ89/j/6\nJdx/Qj304OPqoRPf1q/g0SPqoUe69Yf99oRyoEtgSkAApzyAUx7ASyp/11vqvQRl7oAPXO9vPvoz\n1SJOpjrsucvUY20OazPWPL96xapfaQYefLd/4PN/rjqoxfmyGOoQ+PW/8De9b6j1YZ+7o/1IrY9p\nV88H/v5e5Wn7m/v8E7zlOtVYi/Nlc2rdAZ+f9TeZ5bU/8OB/vr9zvPaHVdf8TfVpW573R+Zephpr\ncb5sTq074Pev78sfvvkDmqGdF7zhkStfsWPxgUFH3v4OzTCbO0iLJez9yLX3KE/byqP+5ugK1ViL\n82Ux1CFwZtXlzZd/TvWGy+U/eeSlPd2vVh7Yu/mfde9J2NxBWi3Bu+VP/+n/NANv+vShqUOrvqg6\nqMX5shiajGfRy0rFplJ5mWrss5u+PKA8rM0dpM0S/ArbVM+iCy1XLr/qtqL2qC5KBHCT/KfonHPV\nT3dt7iBtltBQubtJOz5ldl9yyQ9Vi7B5jaLP5g7SYgmzA5trvlqL82Ux1CHwlbvNW77/2F86O/7i\nObyDfOgzD2uGWbxotzlfNqfWHfBLpodebfLLtcO17w3ZvMh31SP/8pBuoM2LdovzZXNq3QFfsX/d\nZ80Dr9MNVr835Or9E4seu9Hi55X6F+0W58vm1LoDvufCv3jOvEv1I06L94ZsXuTbvrxW5j/+2jxj\nUL5otzlfFkOT8bzR4r0hmxf5di+v3aR+0e6qRABbvDdk8yLf8rWtgyxetLvKHbDNM0j9e0M2OXpt\na/GKyuJFu00278G6A7b8sY/yvSGbHL28rn8278G6vPlOfuzj6JlTQ2XzHqzbv9/aZ5AW1f+Zk6Nf\nObDI8j1YZzl5Bln/Z071/5UDq/dgna3C0TPIJPxUoN6/cmDzHqy7M+XoGWQynjkpH3vuf/mlO10v\nZZHqfaYaMvVjz5899tgrHa9lsQC2zuKx50XlsosTXLkLO++6Z3RjHcx/Ws1uD1+XLB57HD5bGP2v\na1Xj3AEn48Gyvrk8B2P1fh3cef3/8gjgrvH2v1aNcygwuf4Tz6QR2OYR0N0Kmv9W9/NzpwL9N77Y\n5eHrmPIR0OoHLm5K47fYC5HuEdDRm16vWrUjqx3rDvgz5udvftnbnnZ2/LqmfQR086ZX3zc/tOKa\nW3UfeuHwWbS5euP4V97q7Ph1y+YR0Dj5gYtfYdPFyn826WByOfJLJ83Uec6O3xg5+YHLwF0fuepD\nX/u1aqxD4KG/+qXpXens+I2Qox+4LHvjd0rase6Ar77ij281m1c7O36dq+ev7Bzb+uHXfPhO3ecS\n8Sx6idX5xE1tvPh81UDei15i9QTu6/jgVZ/8/pRqLO9FW1f/G/bnN+1LwGMw70UnIt6LTnm8F73E\nGuWvbqOsM3E1yolrlHUmrkY5cY2yzgRV/2fRNjXKOmmJAZzyALaOu2hKUACnPIBTHsApL+3AD1/d\nvOLjA/LMyJjm2V90aZp7piR/Xtle8N7fzV1PxhnzgxdVPk6g6ZC/6Vt26kCNUgMtdSn95IJ7832f\nv+HU7Wz+eOWf1c5+dWoT/P9EyzVzVzw17pPX+1c3y+/wNx3NjXfCGm29lr1nc3BBbueL97eZOODT\nP/Ziblxp5fFLZ/yvb/K//GLd/+m5fY22XstWPh9cOAV5S28c8MgX3jl3xblxu99r3tfjX/raAfPU\n7QAnreX54II8cjaZ/D/MnA4sf97UtOxNN/TPXXFu3I3fMnd/1v/62X81qw81nbpCo9RIa11CVxwO\nLpz2nfpEa/R38MSPT7vi3LiVvudlZf/SW4tvr/+ng9jXaOu17Prbggun3xW3/CL6Lnrnab+IKuMO\n/J1/4W1P+F9/+iurAE5cB/9w29Tghg/Ohyx8bLlcnP8k61OTc1eUcatv9y98fbX/9YOXfQ/g5PXD\nNzW//IbTXgfP3t4nz6lsTz0Gy58f+Me568m411UelodeX/lkufNypsnwOpiSFcApD+D5NdYPexWl\n6KZQVACnPIBTHsApD+CUB3DKAzjlAZzy/h9sZ1bXRDf0qAAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ggplot2.theme = SignatureTranslatedFunction(ggplot2.theme,\n", " init_prm_translate = {'axis_text_x': 'axis.text.x'})\n", "bar = ggplot2.ggplot(seq_data) + ggplot2.geom_bar() + ggplot2.aes_string(x='CENTER_NAME') + ggplot2.theme(axis_text_x=ggplot2.element_text(angle=90, hjust=1))\n", "robjects.r.png('out.png')\n", "bar.plot()\n", "dev_off = robjects.r('dev.off')\n", "dev_off()\n", "Image(filename='out.png')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Get Yoruba and CEU\n", "robjects.r('yri_ceu <- seq.data[seq.data$POPULATION %in% c(\"YRI\", \"CEU\") & seq.data$BASE_COUNT < 2E9 & seq.data$READ_COUNT < 3E7, ]')\n", "yri_ceu = robjects.r('yri_ceu')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAMAAABKCk6nAAAC61BMVEUAAAAAv8QBAQECAgIDAwME\nBAQFBQUGBgYHBwcICAgJCQkKCgoLCwsNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcY\nGBgZGRkaGhobGxscHBwdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUmJiYnJycoKCgpKSkqKior\nKyssLCwtLS0uLi4vLy8wMDAxMTEyMjI0NDQ1NTU2NjY3Nzc5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9A\nQEBCQkJDQ0NERERFRUVGRkZHR0dISEhKSkpLS0tMTExNTU1OTk5PT09QUFBRUVFTU1NUVFRWVlZY\nWFhZWVlaWlpbW1tcXFxdXV1eXl5fX19gYGBhYWFiYmJjY2NkZGRlZWVmZmZnZ2dpaWlqampsbGxt\nbW1ubm5vb29wcHBxcXFycnJzc3N0dHR1dXV2dnZ3d3d4eHh5eXl6enp7e3t8fHx8rgB9fX1+fn5/\nf3+AgICBgYGCgoKDg4OEhISFhYWGhoaHh4eIiIiJiYmKioqLi4uMjIyNjY2Ojo6Pj4+QkJCRkZGS\nkpKTk5OUlJSVlZWWlpaXl5eYmJiZmZmampqbm5ucnJydnZ2enp6fn5+goKChoaGioqKjo6OkpKSl\npaWnp6eoqKipqamqqqqrq6usrKytra2urq6vr6+wsLCxsbGysrKzs7O0tLS1tbW2tra3t7e4uLi5\nubm6urq7u7u8vLy9vb2+vr6/v7/AwMDBwcHCwsLDw8PExMTFxcXGxsbHfP/Hx8fIyMjJycnKysrL\ny8vMzMzNzc3Ozs7Pz8/Q0NDR0dHS0tLT09PU1NTV1dXW1tbX19fY2NjZ2dna2trb29vc3Nzd3d3e\n3t7f39/g4ODh4eHi4uLj4+Pk5OTl5eXm5ubn5+fo6Ojp6enq6urr6+vs7Ozt7e3u7u7v7+/w8PDx\n8fHy8vLz8/P09PT19fX29vb39/f4dm34+Pj5+fn6+vr7+/v8/Pz9/f3+/v7////Bp4aBAAAgAElE\nQVR4nO2df4AdRX3AbwNCfkAQC9ECBlQssRSVgFhorS1arbXVtlrE/taW/tD+wHbrgbkAFzEKQYMJ\nklgopKGlpgYIJIomdg2BYBNiNISAJLwcSV4uubvc3buX+zF/dndmdnZmd2b25+zb9+77Idzt7uz3\n3mU/mdnZmdmZLgR0NF2t/gUAs4DgDgcEdzgguMMBwR0OCO5wQHCHA4I7HBDc4YDgDgcEdzgguMMB\nwR1OLsGjxxgjg8fUHNekZQ4cHcgYOKZJ1MXxgUVdfvPkEjxYY4z219TUNWnNoxkDx4+o0/o0abXJ\nQ+q0VzVpNdQXbLYNIFgABAuA4OoDggVAsAAIrj4gWAAEC4Dg6gOCBUCwAAiuPiBYAAQLgODqA4IF\nQLAACK4+IFgABAuA4OqjEvz47YuW/YTtDbv/969cvPIY6vbwD4Pg6qMSvPXIxN5etucpfWBTc+Ma\nb2fPY/7hjhPsTB/BCE3u+Zor8/alLyIi+LYhNOgpP3z/pJe8Y/Pmzc8PMMZHBtQMa9ImdIm6tMkT\n6rRBTdrA1JAqxXEcZZoLGgw22wa14O7eAwgt239gxTAulkdvmkQTN7veV/Xj5C3r1q17bpgx0RxW\nM6pJmxzLGqhL1KVNjahSXMHKNBcUJJalJz+6HHwnQou6u3sQycG9Q2hgCUI71wendFgR7XhoAjuq\niN7eP/n87W4OPoj3PMFrNjYffxihVQeCk0Bw9VEJXn/7F+7cg9DepT0b3L0h9//j9y5ePYhQz0hw\nUocJrk2nWnQiQHD1AcECIFgABFcfECwAggVAcPUBwQIgWKCDBNPnXxAs0DmCHRAso5MEE8MgWKBj\nBDsgWEpHCcaGQbBAxwhmgGABEFx9QLAACBYAwdUHBAuAYAEQXH1AsAAIFgDB1QcEC4BgARBcfUCw\nAAgWAMHVBwQLgGABEFx9QLAACBYAwdUHBAuAYAEQXH1AsAAIFmhvwbKXgUGwQNsLjhgGwQItEmzh\nryA4EW0o2LKwYSiiE9F+gi2LGIZKViLaT3BhOVgGCBZo73uwDBAs0N61aBkgWAAEVx8QLACCBUBw\n9QHBAiBYoA0ES1ozQHBi2kNw2HBWwY4DghW0TrBsAtlJzZSyGsHCTyrq8ptnGggOJ+omDVYLdkCw\nmpYJls4ArZsWWi8YimgFLc3BkcTJLDnYAcEaF5V6TNLO6q4VDLVoFa0SLHOpn7dfJRgHgWAVLRSc\n6g6sFIyDIAcraZFgucsMRTSNAMEqWihYkpi+oYP+HBCsojWCFc9I6QU7ILiygsMJ3rG0gh3/XwoI\nVtESwcpGjpSCgx8DglUUL7jRaCQRHD7ugOBUtFRwI6Pg1PdgEOxRScGR45kEM0CwisIFNxIU0TKw\ndBCcmME+xtjxPjVHNWkn+zMEYsF1ddwhTVrf5BFN4GFNIDoUbLYN+QQfZjS57Qj9mrSTx7MEuoIn\njqnjjvS7eVWVOHlUHViva34bFCQWdfnN055FtIe2iNY1OKcoosWfMQ2L6OAvX33B7FBywaGfAoJV\nlC5YO1IHBCemwwWH28JAsIpKCGbL1yUVHGnrBMEqWiFYnZhGsJAIglVUakxWqiJaSATBKgoW3MBf\n1YIdB95NYrSh4EYDG1YK9opWEOzTfoIbSQRrbsEgODktE+wZhhyciLYUjL+D4ES0neBGbBFdy1+L\nlpfwIFhFuz0mKW7hIFhF+wmWGgbBKtpMsKoWDoJVtKFgmWEQrKK9BDsg2KcjBasbSUCwChDcOjpL\nsF+05hCsaecEwSrKFIztZBesvAHXQLCa9hGs8wuClZQl2MlbROs7okCwipIEB3ZAMAMEh36ApicZ\nBKtIIJgupZIisHjBcSMFQLCKcgRzekAwAwQL8Q50+PMUeQ+W+U0l2MkpGMfDiA6BSlWy+PIVBDM6\nRrCTUzCJB8ECVRPMdtILdkCwhAoJdgoQXINRlSE6R7ADgmVUR3DoETatYMfvZADBAh0muAaCQ1RL\nMLebSbC3AYIFKiM43MaYUnCQ/0GwQIcI5sp3ECxQDcGshhSQSjAfC4IFqiLYDvcCpRXMokGwQCUE\ne37sHIIdEKykMoIdW0xLIdgBwWoqIbjm2C5iWnLBofs3CBaogmCnZtsRwykFB7sgWKBCgsW0xILD\nFXAQLFABwZJnpFpKwXwaCBaojOBwWlLBkWgQLNB6wYqBkKkE82kgWKD1gmvuHVhiOKHg6D8PECxQ\nAcFODsGS7A+CBSogOFqD9kgkWHb7BsECbS1YevsGwQKtFyyvYyUXHA4GwQItF6yoRCcRLA8FwQJt\nLxiKaD2VECxLixcs9wuCRVotWJWB4wUr/GYV/JFZ6ot0zydRV9eMC9ciNPLpuXNvGEP+bhe5/NTA\n7tOecxNcvAPszG976dd/I4ejSguWVZAZVLA0rWzBpxwVdq/itl958wn3Ik8+OQ+hf/xA7ZX3fx75\nu6Lgv1j8Z4gdY2deNOzunnhzjhKjyoLlj0A+40eUGThWsMpvRsFuvrvxvNMuXI12/fKcd27zdne+\na87CbWM/9zcz0WeWeBd56on5CF30U4R+8ha2Kwg++q6pK+rsmH/m7Kv+wdvt/Wx2RyrBW+5YdMcu\ntjfs/t+/cvHKY6jbo0KC5WkxgtX/MjLmYO8qTm6di951z+jahd7ulfeMrL4cnXrfGLrkx94/gFPe\ntgmhWW6hOzqb7QqCb/0a+uot7Bg789lZO93dH19avOAfvDrxUi/b85Q+sKm5cY23s+exaghWa4oR\nrAnMLHjFW14zowvNHqG7s0fRyGzUNe5mwmF2kd/s5phdb2W7rsXhM+je+AVutj9/3BccnPnX73Y9\ne6cVLdjlpZWuzNuXvkgF3zaEBj3lh++f9FJ3bN68+fkBxvjIgJphTdqEOtG2dYHYkyJt8IQmcIoU\n0NK0oSF1nIMG2XbkKr52w+BDXaML7x5d+3Z39+AV946tuBJfXE7w56/d/8r7vsB2r7prpPe9dO+B\n69wv1z3gCw7OHDyvy5TgvauPI7Rs/4EVw7hYHr1pEk3c7JZEq/px8pZ169Y9N8yYaA6rGdWkTY6p\nUtz8qwvElrJ8oqOJHBnRRQWJkavYfdZZ//rnFzx75ezLn0HomrP+b+Gcq3bii7tgN7vIzRvmnvV3\nTba7/fLZv7wH38C70FVPuQeeusoXzJ25xiuif6l4wVNb9rnlC1rU3d2DSA7uHUIDboVh5/rgJLNF\ntL6E1s6JoyuilRUsD2URjYOyPAd/5ovZ5fh80UAlayOpYS07iL95gtdsbD7+MEKrDgQnGRUsGWoV\nueSqtFjBqh9cvOBX3hLO7qk58ZZXsgerBN/kFcsDaO/Sng3u3pD7//F7F68eRKhnJDiplYJ1+VAn\nWJuB9YKztWTd88nsdgid29ChC9Rp0gh2Mgq2bQeaKpUYEKzVFCdY/YEqwY2GDW3RagwJVscpBevz\nr1Kwt9qarRY8Fa5yT+W5qsXSpoJjPBUq2A2IETw5GAIEB2QXnHpRDof4Tbk4JfXb0BTRIFiTlkmw\nQwWneg7261fZBNdAsBpTgtM1dPjFczrBDhHsbYJgFUYEk/5gebpMsJMpBzO7tbSCj3145mXbEe3E\n99sgy6ctBTtEcLr+4EyCG7h4ph8SK7irixP8qS8ce/oP2AUGwSK0DUsjmHYXJhYctHCkEEyqVw3a\noBYn2MurgeA37ucvMAgW0QomptItbefkEUz30gk+fYxcYCiiJQf9Zmi14NRrFwbZPbFgUr8K2sNT\n5uB9/AWeeQINzMlzrbPShoKpqnQNHVxxnlQwyfJcj0e6StZffa7/J7/PLvC7vzTyhffmudZZqaZg\nbSBVla4tOqtgPjGd4MHfnfX2bayI3nXF7Gv25rnWWWlbwWk6G7gbcC2dYCERnoNVFCo4yMCJ+4Oz\nCJb8fBCswoDgNJ0Ngt6EgmU/HwSrSCk4qNdIAp1AcNIiOoNg6Y8HwSrSC7aVgYHfxI9JtqgrgWB5\nG4q6P3g8RJ6LWjDtJjhRBpbl4GA3qeBIolrwyRB5LmrBdCm2E2FGsB0nmHU2KBEE46dZbj9esOKf\nT9sX0W0g2EkvOFLcxgqWF9AgWE2BteiggE56D04t2HEUY+7bX7BP0uBWPCZRwQnboh1JcRsjONQ+\nydH+gtMGt6g/2Ekq2A41cRD0gsOnc3upBF/3NTdh+XVerrngwRb1JHlUTzCfd8KB5GITB/GCbVvm\nVy84HOCkEcx3+A9dth396O1D3tve3zsXBHNoBAuPSMkFhxLjc7D4idkEo+0LXr70WXxRv/OGighO\njQnBwu0vFGinEqzIwHrBjUbEb2LBYn8w+vKZX0G4YvOOhyoimNSwZsx6TdLg0gXbXEdwcsGhxBjB\nDW5XjE4pmFzbLvT7/44qIpjwo49vTxpsQLD4TmE9nMZd9ASVLPkDrU5wQxRcSyU4XIumgl+9+FCF\nBI8vvqmZOLhcwXR1leSCLXmDhUawMAQrQkbBaMVHqyN453VPpwguXnDopeB6NM1JLtgJN1IS4gQr\nU9v+ORhN9H5+LE1wuYKx2SBPxgpWNDimfXUloO0F7/7E1nTBZopoVSATnLCSBYI9eMEzZ55a4abK\nwG8ywYoCejoLTk2ZgoNGDnogRrCqS8iIYHgBPILlb6QS7CQWrNQrF+yf2tnTCePi+ZSL1yYOzi7Y\nsnzDCQXbuJEjpWC5YYlgZ3oIxjR/eNmKpMGZBVsWM8wJjgoRBNs1U4KDU/0Of2lgZwhGaGviifOK\nzcESIywweAZmaTrBiiYOQpxgZWynCB6ZnTS42HuwpNYrCA5deZ1gnd+IYL5lwxcsDewUwTsvTxpc\nbC1aI9hJJ1jeyeATEtwIC+7wInr8uas3JQ0uVDD2qxBM1oAWpcUJVqaKgsXGyc6uZJHeQm+ASUKK\nFywPlGVgjWBt/p3OglNTqGDZKpN1P8nPwEkE6/WGBIf6j0CwQNGCVYFiPyFFKzjpCuDTTPDGD144\n602/uyVxcJGCpUOR6yzJjlRuVYJjCuiIYC6y0wWvmXf3y2MvLz/3W0mDCxRsS6eHDgRHC16FYHxe\n0hXAhTuwF9jZgi/ZgL9tSryIS8GCFYE2K6GFNLlg+oSURrBvuPMFnzaIv42dnjS4QMHylwmYYHyK\nmKYTnDgHhyM7W/Alj+Nvj/5i0uAi78HS2yafg8NIBdMbMAhm8IIffP03as39y85+KGlwkTlYFeio\n1m6QCfYrWIkEy0ZfdbZgtOH9F8y88KPJx+2UUEQ7jmptDqXgWvmCY1YfxUuLIrqWKNtz/x+Ydyu6\nZd4Aevitp19MEukWYeyz58z5lWdxC9T593FrkpJosohp3IodFXkOljRTkkCcIM3eEsGspp1EsHQA\npZnVR/2lRfFaovxCo2j5ReMXLUdo3tbmA1fgQ3SLcOO1L53o/SA+vuUcfvVSIhgvYhq3NGk1BMva\noXGgo0qplOC41Ufp0qJ0LVFuoVE0cemnLp1A6Px/2jFFZNAtwkU/YZK+fxG/eikVjBcxjVmalBf8\nKlnh5yOHWyJYNkCurkqoyQQHj8pZKlkYM6uP+kuLkrVEuYVG3Rpt16Pu16eunXUeeYGJbhFmjiEy\nk2lX1znf49YkpYLJIqYxS5Pygj/2Gfztlj9uiWB5Ee23QkfTIoKdVgrWrT4aLC3qrSXKLzQaCDj5\nDf8dU7xFuGgXVYn+b/aGYE1SftnS2JUrecHnkHXsDr6+bMG0IJY0VUobKQl5BCubMjML1q0+Giwt\n6q0lyi806gu4au3J756Pt+kW4XO/8bPBu+fg41963YtsTVJ+2dJUgukMx6MzWyJYGphCMP9D4heI\nVrygklmwbvVRbmnRNdxCo2QAOhbwxNtm/sJ6vE23CGM3vHbOrz+Dj09d+85hf01Sf9lSclLM0qS8\n4IVkPOXqK+SnmhUsTXQk3YQ+IcHCWWUvEK2hiNVHtcQsTcoLfuTsO/eP7btl7uNJf/ZgH2PseJ+a\no5q0k/19pHVRhuPaVSWP14UzhbMO1aPnh05tSNMOHdYEokPBZmIyrT6a4gWTuKVJhZ/y/d+aP+tN\nH9uR+PcYPMxoctsR+jVpJ48fJldclujl38OKxIlj/J74I45oPlH1WZh6XfOroiAx8SUqYvVRLe3Q\n0KEsMVkBLUsWiujQWcUvL+vR/k2VsYdDFCNYfck1VSyZ4GC3+OVlPdSCJ0Pkc1IoVREsS9JVsUTB\nYWslC27DtwsrIVg+ksOHExyRpl99FF4fLVWwMkvZ6lYsj0CwHWnPVAmOyb8gOERxgmUpksHuPIFg\nJ6lg6hdycOmCZQmytxl4mGBJrtQIhjf8dYdDGBdc1xSovmArqWC/fAbBiSlCcNYSmgmW/QD16qM1\ngzm40Whwgr993hs3oYF3jFw2gB5+/byH0OTcf5n/rb87626aUhaC4J0fOee08z+ZfIGuogTLjlu2\nZVm6GtE4N5tdOK2YtQvDJBDcCARf8oOtVyK0/OMrELr4u09ejNAp63ef8T+7Z/kpJcEL/tFrbz44\n+uPPvm5n0uACBKuqtd5AHa3flIKd0gXP7OqajdCJucMInd5sznQv9ST+46eUBC/4Qz34280fSBpc\nlGDJca98TiJY/g+kkLULI6QrohfgKT+/9OkveTn4O2+lPffenwWJJwMtAF7w2S/jby/re5A5ChIs\nOx53B+YFR9Migvl/BiVVsh5745zPoIF3Ni9178Hz3vDfnGCcUhbC+8GT0WNajORgPD+LvpUSM35E\nXcCHBXOnTefn4C7JMS35BUsEYcHxJXQgWJImW5wy2Jy+gstfdUUhOL6AxoJlj8AY5cpn0JKVityC\nZYJ8wTEZ2BOs8htd+QwEM6a23VjCPFn8tQ8d8zNwfA5W+tUsjOVuTueWrPFNf3nBe5YfSRpsRHCN\ne0bStZCMq/1GBfOJBsZFVxhe8MPXn//hf0tTZhsSHEyqE9vGGStYcpKBHHwiREVz8Iw/7Et3U84r\nGF98K3LYYi8UZmrErkUEi4nTt4h+5E/nf2hV6YKjknAJrQ2s6f1yaxdKzpq+gt3f9Mkb5l9z16Gk\nwdkEBxccX3758rJk0qwYwfK30mqBYOm/guks2OPpGw1PwsIuuaMR7MQI1rdyCesmQQ7OTCbBToxg\ni3tEyidYcQdoF8HFLLVUflMlu+ikHcqKLE7JtUKrBCubsCjBqiuSc8wJtu1Egi9TH+eTzAjuQoYF\nOyHB4Sn9La6ETls7Y7RmWR3vFw8ER0Z0nP0vc/Bc+n/SdSr6izPnftM9ciV66LX/PGuKnOse95LI\neI+15362TQUzyIANS7I4ZTCSQyGY+E2y6koLBUdGdJz67d3nBNd871nuc+k2dN7m/5nhn0s6E8n2\nxf/7nY4TjGf5d4RGSvUK4LELRKszeUmCpSM6/Ov8wOtmdOEjrxkfZWM8iGCyffr4WJsLxutyuAaY\nYMvyZkWxCxKsKcRLqmQpRnR417cfvXHb9rmvkll1HmVjPNzj7h+yPf+HG2fk0BrQsu5C4jcsuCF2\n9EsFOwkE627RrR3R4bLkLPTNM+763Bxvd9XcbjbGwz3u/iHba1//t7NbXxfPsygHycE1meDgdRWZ\nYCeBYG0lrFKPSfe//Mgb8iiIpVWCnYhgPHOVJY7UUQsubM0GkdIF98ydd18eBbHwgl+45oxrDyE0\nXMY0SiQHW6GVz+iNmdmR/cuIFxzzEFUpwcbhBf9m9+DfX492//kzSYNzCsYSeMGOQ/0qBXMPP/o1\nGzQPydNX8FnHUX3hk4nne89XRPsWhKXtHDEDZxIcnFHaY1K7vOHvbZ+SZk6Y7IKdkGAyVhYb1gl2\n4gUHjSDlPQdPjYUoQExRyJ6DE5NZMOfJE2z5D03h5ieFYLwdu2ZD6Q0dlS+iWyw4sKwKFPRrpvSv\ngWBGaxo6RMF+hTpUhY4EihlcPaV/6ZORtovg1OS8B+Ot5lH/iSn8jBQJJHr9YRy5p/SXAYIFsi8Q\nLREs8RvtSOaSFfNFV0Yw61go4ELnoAKC/fYN27LCg90jHck6wVx6iwRbUcHFXOgctEQw7WfAMMGy\nlxkkgtm+WrCqdkUxJtj7WwSCT/38nKfw9SV9+mTXZf0FZ9yA8BiAo5eMo/FLjuIefq/zH48C4IcA\nFEEVBNMMrBcc0quYLxqfURHB63e/DV9f0qdPdl0uxuLwGIBPP4jWfJr28M/YhsgoAH4IQAG0QrDf\nFYzxBUtnPYsI5pJl80U7bFPz25QkmPTvu39Inz7r7j99HH/1xgDsvRr9yvN+b/8kHQXADwEogNYI\ntnnBrHYVfZ8wCIxkYL3gFt2D/ZswvQeTP6RPn/UGX+KtwkHHAPzOPb/NevsRHQXADwEogBYItiyb\nWwyaCK75glWBwhMSRpiMNNRDWJFaNPlD+vSZ4Efnn/EpRMcAfH/2d/3efjeRjALghwAUQEtyMC6N\n6Q4R7FeRVYIlqdlmm62V/xycsk+/4CEArcjBdvDykStY4zcsWEjLNNusR9mCU/bpFzwEoEWCLaYj\niWBpYmgy0uoKbi3lC6ZPvDbbpX5lUzaQQLn80GSkQjIIZrQgB9v8Ldgfu2PJHKYRLKSVLHiqGWJa\nC7YEwcpm6CBQ7leYyjCcWrbgiRB5LmrBlC6YzrDCdrFb1byjnOBIWoUEQxHNbYdeDCR5l2+7jATq\nBAeVM6hkyWmRYLZHWynlDnGgwi83lWG0/RkEMyoguCbrZmCBCr3huSpBsIKyBXu9RsHMOupuBhao\nE8yVz9UT3KoO4BCl52CHa4fGgo9qZrWra0ax+zPdyTqAQTCj9BwceUbSrY6kFeyIGZhPa41g77cQ\nBOM3+2vvGzn1xG++5J1Xcl8/Rim4/6HuYMcbDd+/cvHKY6jbI7tgsR1aOZKDBar98s9HFcnBEcHk\nzf5rdy/YtRCfV3Jfv17wg3VOsLf5wKbmxjXezp7H/MNpBFMFQh0L+7U0y5up9SadylBCeYLJm/03\n3/lHX74en1dyXz9GU0R7VvfcvvRFunnbEBrsdb8fvh+/erNj8+bNzw8wxkcG1AwPDLh/d2+LPPDS\nw1wjljyOKNSlKRIHT2h+m6khddqQJm0ADQabMsGDvt8gB3u9+s9efc+7V+HzSu7rTyJ42f4DK4Zx\nsTx60ySauNn9y6zqx6lb1q1b99wwY6I5rGZ0eNiT4W6RZg162G/EskhaFOJQnaIII5+oZGpEnTai\nSRtGQaJccLiSRXr10UW7T8G34LL7+pMIXtTd3UM3e4fQwBKEdq4PzkhYRNMBGw5rmPQP4xLawqNl\nZXHJbsAyqlCLzoCJ1/3jcvBBtrlmY/PxhxFadSA4I5lg8l4K2fSsWJxgh06aJI3zTpZPRxnjt10F\nm3jdXymYVJf3Lu3Z4O4Muf8fv3fx6kH3lxgJzkkk2AoE+x37Vs3CEzY0tFXo2AysVtGmgk1QwnNw\nkIGpGPweEhbsbisFqyXSfyb6ebLUgODEpH0O9jMwztNUsGXL78CxfqskeCjE9BPM2iOYYG/WJLyT\ntoD2f0rMTHdqpu+aDanJJNhiwzgcjWAsXzPLTuxUhmpAcGISC3ZYny2uYflomqFx9lZP7gGCk1Ki\nYPZsIwzDUvnVC65pJ0IDwQGlCCZO7ECn4Fci2ME1MPXcDxWrZFWZ0gQLrRN+I5biEQn7bTQ0s+yA\n4KSUIdgJC9aPleU6kaSz7Pi3cxCchHJyMP7iWFLBkVVXuCck6Sw7dBPuwYkoqYiukYkow36lgh2F\n4NAdGwQnoqQiusZNIIxnhvYHy0bWbBDGZ6Sao4MDBDPKugeTmWTJS4UW62fAg+4kZ/seo4KDfRCc\niBIEO1Swh+0IU7u7fsM5WPAYnaMDcnBKSutsEOrQWLC3a9WkgvlA7jDk4PSUJZjosUg7VsOi4zic\nWliwIxcc9QuCk1GuYMcvof0qdFhwSGOon1H4oSA4ESULrtEqVoMN85AIFgKDw8opHKKAYEapgm3H\nvwOzGrUoOCwy0Rv+EkAwo6z+YL6KRe7BdPCduChHSGSCN/ylgGBGqTm4JrRikUIaBJul7BxMa1fM\nLy842rfEPV+B4GyU1uFPJFnsAcmJCLajIutqvyA4GaUK9ktob5osX1kg2KuFhca511Xlcw0EJ6QV\ngnF9OiJYJrKuzL8gOCHlDdlhgkVnvmCpSBCcm3IEN0IZmFOmFew4asMgOBElCW4EVayQMCpYapEe\nBME5KEOw7Q2RtPCLgnrBIY0OCM5PKR3+5DWkGncLDhKJYJlfMrBSCQhORBmCG7xgbNIKErFg246+\nguaA4CIwKdh/bZTWk4KBslbwQmkgOBwNggvBrGDskdrlqlhhweIEtAT3NPxygxIQnAiDgi1eMHtp\nlLZEs7NcwbqJ0NSA4EQYFuyJpC8aWbSPkHUUEohg+VQcIDg/Roto/NUhgiP9hJTmUUeY4l8ABOfG\naA7G3yKChQxcazqqqXRqILgAzAn2R+VgwU5NlnvxaSDYKIYFO5ISWjwt2sTB7YLg3BgV7Kmi7/rK\nb8CyNmgHBBeJacFO+CFJFIwLaLGEBsGFYkyw6FcYbcedpehD8jdBcG5MCvbqy/itbysQHC2OdVMS\nguDcGM7BluNP00AbOYRzcNkdfcM/AATnxpRg2u5MH3yDElo4CQSbx6BgVrdyFCU0KaA5wZHeIxCc\nG5M5mGXcoJmDrzA3woKj/YMgODelCA5aKflTHNyGRYfskBwPggvHpOAalmg5csFOSLCDBYuGQXBu\nDAmmGRjfhoNWDjEDk9kKBcHhSwqCc2NesFuHrvuCuUq0/wTsj4uWLt0AgnNjVDB5CGbzB/OD7Zyw\nYCkgODclCK6Fugq98Rs2CC4JM4KpTVIsNyy6iDvJv/4s4LREBsFmMSiYNmd5fYUO30wpLvgNgs1i\nUrBDHm3J7O3BSB3b9sfBe4Bgs5gVTDr7G+HB0A3OsCs43EQdAIJzY/IebFHB/mh3P7VBJ+zHNI9G\n+iACQHBujAgW/NInpOAhV/ALgg1jXLDvN5SBgxOhiDZKPsF9jLHjwXYflqPuJS0AAAkRSURBVNrH\ncjCuNrsHCCQDByf296k5qkkbr6vTDmnS+iaPaAIPawLRoWCzbcgn+DCjyW0H7Ro4A3tvLjiWn0hr\nXdyZh9X0a9ImjqnTjugCJ4+q0+p1TSAKEou6/OYxVkT7g7Lo+masFOYK6BrrdFIBRXRuTAi2RIQq\ntODXiQ7TEgHBuTEs2JH59QdC46cnaOgwi/EcLA6MDTKwnwSCzWJAcLSAZoJt9gjMUkCwWYwL9ggm\nbuf80sYPEGwWc4K5V86iM/MH8xmCYLOYF+xWsagn7kUV6A8ui+IFh9spmWDWyy/MRwqCzWJOsOW3\nQ/tFtO/Ur19JFuUIA4JzY1BwMCCaW3ohMl0lCDaLWcG0n58JrkWnqwTBZjEpmJkM1tYIbsD+6SDY\nLIULDmVgbLLOVZsV0wnLAcG5MSTYq2A5bBxHPfIEHASCYLMYFByorAdzcTgOCC4VM4LJ1FheNzAp\norFUWzLfOwg2jTnBeBF3fzZDvwYd8QuCDVO0YNEv7fkNVbBAcImUIZhJlfgFwYYxIZh7ncE7ZOv8\ngmDDmMvBYr6tKfyCYMMULJg2QeOOJMGvrAKNAcFmMSA46EfyBdu22EXIA4LNYkJw8MZKjU6WxARz\nl4sOrQTBZilesMNeOfN00lULo9OO+lVsEGwWk4K9A1hwXTJJEgguh2IFi3XoGn3V27aiBTQILonC\nBTvsle8aXTKJNk2Hq1e0lQsEm6X4HOwLpjMZelvCG98hQLBZzORghwgmnUiWdplJEGyWQgUH7ZS4\njuUwNMuIgmCzFC24wQT7bVjy9g0GCDaLMcE1LgM7mnWAQbBZzAgWC2jduiog2DAFC8Y1ZvLiPudX\n5wkEm6VIwbSORZ6R+PwLgltHwYIdvyO4wZfPILh1FCyYTT3aaIDgSlCsYDZYp8EEe4kguHUUL5i2\nbHBjdrSebOXq0DGBIDgRBQpmJTR9Fm7oBNv4PzL9u63UDIJzY0AwzbveBIa2KNj2v9lUrM0hu6Qg\nODcmBNNC2rb9Bg5OsK1E1h4CgnNTqGA+A7sPSixT0jk6wnkWBJdAcYLFDOwZFgUrxYqrsAiA4NwU\nKbgREswS9YId3y8INkDxgiVdDJ4nXdGs7JAAwbkxkIMbEWF1mV9HQH5JQXBujBXRfBdw3ffrSP2q\nLykIzk1hgkN++Qujzrs1aKo0TeGCw7nS8WtSThiSDoLNUrTgkF//wVfqFgOCzVKgYIv3qy2WeUCw\nWYoSzPUkkeLYdoRKlbJCBYLNUphgzq/33bfq+1VeNRBsluJysJuHg3I4yLX+luqqgWCzFCiY8+uw\nchlbhhEdraNAwWG/wV0XBLeOggSL+TdcowLBraMowTq/ILiFqAT3r1y88hjbG2ZHhOO84EZDYbcG\ngluJSvADm5ob17C9bnZEOB4Ipq1YtvxZFwS3DpXg24bQYC9Ce25f+iIigskRehyhHZs3b35+gOI3\nU7o1qwEJw7KDlAldoi5t8oQ6bVCTNjA1pE4b0qQNoMFgs21QCb5pEk3cjNCy/QdWDHd7jJIj9DhC\nW9atW/fcMIU0czRse1jKqPwwZnJMk6gN1CXq0qZG1GkjmrRhFCSWaCgnKsG9Q2hgCUKLurt7EMnB\n5Ag9TmBFNOlIUpZtUES3DpXgNRubjz/s5uCDeK+bHaHHCaoVwMOA4NahEnz83sWrBxHau7Rng7s3\nxI7Q4wQQXH1MLBAdAQS3DhAsAIIFQHD1AcECIFgABFcfECwAggVAcPUBwQIgWAAEVx8QLACCBUBw\n9QHBAiBYAARXHxAsAIIFQHD1KUrwtp9qrsyrmrQf7lWnHdRd7i37NIG6T/zez9RpfbrA7+xnm0Vd\nfvPkEszx7zsyBt7zfMbAZQcyBi6pZwy8uY2GYjFAcHJAcBZAsFmKErxpX8bAR17JGPjfhzMGrsk6\nqvm+0YyBraQowUBFAcEdDgjucDIJZq8YJnoHkePu7u7u9brA57/a81Vprav/oW6kCzz49cWrpfdW\nP5B9dMLALXcsumNXpk+sFpkEs1cME72DyPGV4fBPCAUu2Tf+whJJIHqw3q0N/PrTY4+u1QV+RagC\nxwf+4NWJl3ozfWK1yCSYvWIY+w5iiFtX9ny9Txf41b3jLy6Xf2g30gX2jKOjX9QF0o9OE/jSyoyf\nWCUyCWavGMa+gxjiydrJH3xdF3jg1u5bFc+33UgXePczJzcv0gXSj04RuHf18YyfWCUyCWavGCZ6\nB1GkuVgXeNcL4/uWyQO7kS7wwJ23PnGnLpB+dOLAqS37xrN+YpXIJJi9YpjoHUSOrUcmfnivLrD3\nhYkXtCWtKnDH4eaWJ3SB9KMTB27cJf5l03xilcgkmL5i2J3sHUSOJ5b2fOOILvCnyxYt+6nsI3Hh\nqAnc2HvLfzZ1gfSjEwfe5MUNZPnEagHPwR0OCO5wQHCHU0HBXV1dZ/7qs+7GIzO8itr295x55q/t\nwoddgtMeXTjz3E8ccjceXHD6L3otDiTN/TpzN9noCodMRyr413d/pRNfvszd+NOPXe9+XfCt4YEl\nCyO/6ffPXjt24G/cEzbM2zT2xLkbecGfmEB0t4J/vZKp4BXwfqUTZyA0Ob/vfPdRdP5Wd78Z+U1/\ny38Gff897pflH+QEn7q1F4FgSgWvgPsrDfRch9CTH0If3oTQN8/8o4eGUeQ3nb+fblzgjYB7+UJO\ncBf61+dAMKWCV8C7by7oQ+ivV6F/+0t3f98dH/r5DfQeHJw1a4xuzPQeRsdmCYLHrhsHwYQKXgH3\nVzre8xtuHnWFXjCFDz31C5Hf9E3+GKELvay8/00IzfCaFpun4DO3LQbBhApeAe9XGpyDnr7a/f6e\nbWjrSYTqZ0R+04/dSjc+epf7ZfnvuWW1Wy6jrReTM7t/BIIxFbwC7q808sV3oxu9ToflN6Jrbuw/\n9mfvi/ymO+beN3r4to+4Sn/u8eamc59B6P6rfzy69bK15Mzmx2fRnzXNqeAVcEvmOe/dgy591d0+\n8kvoZx+Ye/bv7I8+Bz92+cx513vPwf9xyWlv+y/vyIZ3zFrgdUbik7a/BqFK/vVKBq5AhwOCO5y2\nEwztj+mAK9XhgOAOBwR3OCC4wwHBHQ4I7nBAcIcDgjuc/wcVtq3rbQzTPwAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scatter = ggplot2.ggplot(yri_ceu) + ggplot2.aes_string(x='BASE_COUNT', y='READ_COUNT', shape='factor(POPULATION)', col='factor(ANALYSIS_GROUP)') + ggplot2.geom_point()\n", "robjects.r.png('out.png')\n", "scatter.plot()\n", "dev_off = robjects.r('dev.off')\n", "dev_off()\n", "Image(filename='out.png')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
STUDY_IDSTUDY_NAMECENTER_NAMESAMPLE_IDSAMPLE_NAMEPOPULATIONINSTRUMENT_PLATFORMLIBRARY_LAYOUTPAIRED_FASTQREAD_COUNTBASE_COUNTANALYSIS_GROUP
1SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE9280498334097928high coverage
2SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE9571982344591352high coverage
3SRP0000321000Genomes Project Pilot 2BGISRS000214NA19240YRIILLUMINAPAIRED1490445365584high coverage
4SRP0000321000Genomes Project Pilot 2BGISRS000214NA19240YRIILLUMINAPAIREDdata/NA19240/sequence_read/ERR000020_2.filt.fa...205769074076840high coverage
5SRP0000321000Genomes Project Pilot 2BGISRS000214NA19240YRIILLUMINAPAIREDdata/NA19240/sequence_read/ERR000020_1.filt.fa...205769074076840high coverage
6SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE9388168337974048high coverage
7SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE7762958279466488high coverage
8SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE9625450385018000high coverage
9SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE8808642317111112high coverage
10SRP0000321000Genomes Project Pilot 2BGISRS000214NA19240YRIILLUMINAPAIRED15187683415high coverage
11SRP0000321000Genomes Project Pilot 2BGISRS000214NA19240YRIILLUMINAPAIREDdata/NA19240/sequence_read/ERR000025_2.filt.fa...215932497169580high coverage
12SRP0000321000Genomes Project Pilot 2BGISRS000214NA19240YRIILLUMINAPAIREDdata/NA19240/sequence_read/ERR000025_1.filt.fa...215932497169580high coverage
13SRP0000321000Genomes Project Pilot 2BGISRS000213NA19239YRIILLUMINAPAIRED593122669040high coverage
14SRP0000321000Genomes Project Pilot 2BGISRS000213NA19239YRIILLUMINAPAIREDdata/NA19239/sequence_read/ERR000027_2.filt.fa...5080128228605760high coverage
15SRP0000321000Genomes Project Pilot 2BGISRS000213NA19239YRIILLUMINAPAIREDdata/NA19239/sequence_read/ERR000027_1.filt.fa...5080128228605760high coverage
16SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE11752662423095832high coverage
17SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINAPAIRED22917910313055high coverage
18SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINAPAIREDdata/NA19238/sequence_read/ERR000030_2.filt.fa...7692812346176540high coverage
19SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINAPAIREDdata/NA19238/sequence_read/ERR000030_1.filt.fa...7692812346176540high coverage
20SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE11402532410491152high coverage
21SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE6777368243985248high coverage
22SRP0000321000Genomes Project Pilot 2BGISRS000213NA19239YRIILLUMINAPAIRED789182841048high coverage
23SRP0000321000Genomes Project Pilot 2BGISRS000213NA19239YRIILLUMINAPAIREDdata/NA19239/sequence_read/ERR000034_2.filt.fa...113125340725108high coverage
24SRP0000321000Genomes Project Pilot 2BGISRS000213NA19239YRIILLUMINAPAIREDdata/NA19239/sequence_read/ERR000034_1.filt.fa...113125340725108high coverage
25SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE12013717432493812high coverage
26SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE8045886289651896high coverage
27SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE9081298326926728high coverage
28SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE10130502364698072high coverage
29SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE8632879310783644high coverage
30SRP0000321000Genomes Project Pilot 2BGISRS000212NA19238YRIILLUMINASINGLE8108919291921084high coverage
.......................................
178036SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000205NA19201YRIILLUMINAPAIRED612706127000low coverage
178039SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000208NA19207YRIILLUMINAPAIRED612806128000low coverage
178054SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000212NA19238YRIILLUMINAPAIRED553565535600low coverage
178063SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000209NA19209YRIILLUMINAPAIRED675506755000low coverage
178081SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000213NA19239YRIILLUMINAPAIRED519225192200low coverage
178084SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000213NA19239YRIILLUMINAPAIRED568705687000low coverage
178096SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000195NA19144YRIILLUMINAPAIRED87319587319500low coverage
178099SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000195NA19144YRIILLUMINAPAIRED82199982199900low coverage
178117SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000214NA19240YRIILLUMINAPAIRED503975039700low coverage
178135SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000214NA19240YRIILLUMINAPAIRED553585535800low coverage
178144SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000207NA19206YRIILLUMINAPAIRED641226412200low coverage
178171SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000199NA19159YRIILLUMINAPAIRED728847288400low coverage
178183SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000182NA19098YRIILLUMINAPAIRED98019898019800low coverage
178189SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000182NA19098YRIILLUMINAPAIRED1062464106246400low coverage
178225SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000198NA19153YRIILLUMINAPAIRED78577778577700low coverage
178282SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000212NA19238YRIILLUMINAPAIRED598865988600low coverage
178312SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000205NA19201YRIILLUMINAPAIRED639776397700low coverage
178315SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000208NA19207YRIILLUMINAPAIRED638506385000low coverage
178324SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000209NA19209YRIILLUMINAPAIRED636396363900low coverage
178327SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000207NA19206YRIILLUMINAPAIRED671326713200low coverage
178480SRP0005421000 Genomes YRI Yoruba population sequencingWUGSCSRS000199NA19159YRIILLUMINAPAIRED684386843800low coverage
178750SRP0040781000 Genomes CEPH (Utah residents with ancestr...WUGSCSRS000075NA12750CEUILLUMINAPAIRED679446794400exome
NA.298NANANANANANaNNANANANaNNaNNA
NA.299NANANANANANaNNANANANaNNaNNA
178759SRP0040781000 Genomes CEPH (Utah residents with ancestr...WUGSCSRS000075NA12750CEUILLUMINAPAIRED449744497400exome
NA.300NANANANANANaNNANANANaNNaNNA
NA.301NANANANANANaNNANANANaNNaNNA
184746SRP0005421000 Genomes YRI Yoruba population sequencingBISRS000096NA18499YRIILLUMINAPAIRED87176388048063low coverage
184747SRP0005421000 Genomes YRI Yoruba population sequencingBISRS000096NA18499YRIILLUMINAPAIREDdata/NA18499/sequence_read/SRR797225_2.filt.fa...8623980871021980low coverage
184748SRP0005421000 Genomes YRI Yoruba population sequencingBISRS000096NA18499YRIILLUMINAPAIREDdata/NA18499/sequence_read/SRR797225_1.filt.fa...8623980871021980low coverage
\n", "

25251 rows × 12 columns

\n", "
" ], "text/plain": [ " STUDY_ID STUDY_NAME \\\n", "1 SRP000032 1000Genomes Project Pilot 2 \n", "2 SRP000032 1000Genomes Project Pilot 2 \n", "3 SRP000032 1000Genomes Project Pilot 2 \n", "4 SRP000032 1000Genomes Project Pilot 2 \n", "5 SRP000032 1000Genomes Project Pilot 2 \n", "6 SRP000032 1000Genomes Project Pilot 2 \n", "7 SRP000032 1000Genomes Project Pilot 2 \n", "8 SRP000032 1000Genomes Project Pilot 2 \n", "9 SRP000032 1000Genomes Project Pilot 2 \n", "10 SRP000032 1000Genomes Project Pilot 2 \n", "11 SRP000032 1000Genomes Project Pilot 2 \n", "12 SRP000032 1000Genomes Project Pilot 2 \n", "13 SRP000032 1000Genomes Project Pilot 2 \n", "14 SRP000032 1000Genomes Project Pilot 2 \n", "15 SRP000032 1000Genomes Project Pilot 2 \n", "16 SRP000032 1000Genomes Project Pilot 2 \n", "17 SRP000032 1000Genomes Project Pilot 2 \n", "18 SRP000032 1000Genomes Project Pilot 2 \n", "19 SRP000032 1000Genomes Project Pilot 2 \n", "20 SRP000032 1000Genomes Project Pilot 2 \n", "21 SRP000032 1000Genomes Project Pilot 2 \n", "22 SRP000032 1000Genomes Project Pilot 2 \n", "23 SRP000032 1000Genomes Project Pilot 2 \n", "24 SRP000032 1000Genomes Project Pilot 2 \n", "25 SRP000032 1000Genomes Project Pilot 2 \n", "26 SRP000032 1000Genomes Project Pilot 2 \n", "27 SRP000032 1000Genomes Project Pilot 2 \n", "28 SRP000032 1000Genomes Project Pilot 2 \n", "29 SRP000032 1000Genomes Project Pilot 2 \n", "30 SRP000032 1000Genomes Project Pilot 2 \n", "... ... ... \n", "178036 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178039 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178054 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178063 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178081 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178084 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178096 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178099 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178117 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178135 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178144 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178171 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178183 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178189 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178225 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178282 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178312 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178315 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178324 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178327 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178480 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "178750 SRP004078 1000 Genomes CEPH (Utah residents with ancestr... \n", "NA.298 NA NA \n", "NA.299 NA NA \n", "178759 SRP004078 1000 Genomes CEPH (Utah residents with ancestr... \n", "NA.300 NA NA \n", "NA.301 NA NA \n", "184746 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "184747 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "184748 SRP000542 1000 Genomes YRI Yoruba population sequencing \n", "\n", " CENTER_NAME SAMPLE_ID SAMPLE_NAME POPULATION INSTRUMENT_PLATFORM \\\n", "1 BGI SRS000212 NA19238 YRI ILLUMINA \n", "2 BGI SRS000212 NA19238 YRI ILLUMINA \n", "3 BGI SRS000214 NA19240 YRI ILLUMINA \n", "4 BGI SRS000214 NA19240 YRI ILLUMINA \n", "5 BGI SRS000214 NA19240 YRI ILLUMINA \n", "6 BGI SRS000212 NA19238 YRI ILLUMINA \n", "7 BGI SRS000212 NA19238 YRI ILLUMINA \n", "8 BGI SRS000212 NA19238 YRI ILLUMINA \n", "9 BGI SRS000212 NA19238 YRI ILLUMINA \n", "10 BGI SRS000214 NA19240 YRI ILLUMINA \n", "11 BGI SRS000214 NA19240 YRI ILLUMINA \n", "12 BGI SRS000214 NA19240 YRI ILLUMINA \n", "13 BGI SRS000213 NA19239 YRI ILLUMINA \n", "14 BGI SRS000213 NA19239 YRI ILLUMINA \n", "15 BGI SRS000213 NA19239 YRI ILLUMINA \n", "16 BGI SRS000212 NA19238 YRI ILLUMINA \n", "17 BGI SRS000212 NA19238 YRI ILLUMINA \n", "18 BGI SRS000212 NA19238 YRI ILLUMINA \n", "19 BGI SRS000212 NA19238 YRI ILLUMINA \n", "20 BGI SRS000212 NA19238 YRI ILLUMINA \n", "21 BGI SRS000212 NA19238 YRI ILLUMINA \n", "22 BGI SRS000213 NA19239 YRI ILLUMINA \n", "23 BGI SRS000213 NA19239 YRI ILLUMINA \n", "24 BGI SRS000213 NA19239 YRI ILLUMINA \n", "25 BGI SRS000212 NA19238 YRI ILLUMINA \n", "26 BGI SRS000212 NA19238 YRI ILLUMINA \n", "27 BGI SRS000212 NA19238 YRI ILLUMINA \n", "28 BGI SRS000212 NA19238 YRI ILLUMINA \n", "29 BGI SRS000212 NA19238 YRI ILLUMINA \n", "30 BGI SRS000212 NA19238 YRI ILLUMINA \n", "... ... ... ... ... ... \n", "178036 WUGSC SRS000205 NA19201 YRI ILLUMINA \n", "178039 WUGSC SRS000208 NA19207 YRI ILLUMINA \n", "178054 WUGSC SRS000212 NA19238 YRI ILLUMINA \n", "178063 WUGSC SRS000209 NA19209 YRI ILLUMINA \n", "178081 WUGSC SRS000213 NA19239 YRI ILLUMINA \n", "178084 WUGSC SRS000213 NA19239 YRI ILLUMINA \n", "178096 WUGSC SRS000195 NA19144 YRI ILLUMINA \n", "178099 WUGSC SRS000195 NA19144 YRI ILLUMINA \n", "178117 WUGSC SRS000214 NA19240 YRI ILLUMINA \n", "178135 WUGSC SRS000214 NA19240 YRI ILLUMINA \n", "178144 WUGSC SRS000207 NA19206 YRI ILLUMINA \n", "178171 WUGSC SRS000199 NA19159 YRI ILLUMINA \n", "178183 WUGSC SRS000182 NA19098 YRI ILLUMINA \n", "178189 WUGSC SRS000182 NA19098 YRI ILLUMINA \n", "178225 WUGSC SRS000198 NA19153 YRI ILLUMINA \n", "178282 WUGSC SRS000212 NA19238 YRI ILLUMINA \n", "178312 WUGSC SRS000205 NA19201 YRI ILLUMINA \n", "178315 WUGSC SRS000208 NA19207 YRI ILLUMINA \n", "178324 WUGSC SRS000209 NA19209 YRI ILLUMINA \n", "178327 WUGSC SRS000207 NA19206 YRI ILLUMINA \n", "178480 WUGSC SRS000199 NA19159 YRI ILLUMINA \n", "178750 WUGSC SRS000075 NA12750 CEU ILLUMINA \n", "NA.298 NA NA NA NaN NA \n", "NA.299 NA NA NA NaN NA \n", "178759 WUGSC SRS000075 NA12750 CEU ILLUMINA \n", "NA.300 NA NA NA NaN NA \n", "NA.301 NA NA NA NaN NA \n", "184746 BI SRS000096 NA18499 YRI ILLUMINA \n", "184747 BI SRS000096 NA18499 YRI ILLUMINA \n", "184748 BI SRS000096 NA18499 YRI ILLUMINA \n", "\n", " LIBRARY_LAYOUT PAIRED_FASTQ \\\n", "1 SINGLE \n", "2 SINGLE \n", "3 PAIRED \n", "4 PAIRED data/NA19240/sequence_read/ERR000020_2.filt.fa... \n", "5 PAIRED data/NA19240/sequence_read/ERR000020_1.filt.fa... \n", "6 SINGLE \n", "7 SINGLE \n", "8 SINGLE \n", "9 SINGLE \n", "10 PAIRED \n", "11 PAIRED data/NA19240/sequence_read/ERR000025_2.filt.fa... \n", "12 PAIRED data/NA19240/sequence_read/ERR000025_1.filt.fa... \n", "13 PAIRED \n", "14 PAIRED data/NA19239/sequence_read/ERR000027_2.filt.fa... \n", "15 PAIRED data/NA19239/sequence_read/ERR000027_1.filt.fa... \n", "16 SINGLE \n", "17 PAIRED \n", "18 PAIRED data/NA19238/sequence_read/ERR000030_2.filt.fa... \n", "19 PAIRED data/NA19238/sequence_read/ERR000030_1.filt.fa... \n", "20 SINGLE \n", "21 SINGLE \n", "22 PAIRED \n", "23 PAIRED data/NA19239/sequence_read/ERR000034_2.filt.fa... \n", "24 PAIRED data/NA19239/sequence_read/ERR000034_1.filt.fa... \n", "25 SINGLE \n", "26 SINGLE \n", "27 SINGLE \n", "28 SINGLE \n", "29 SINGLE \n", "30 SINGLE \n", "... ... ... \n", "178036 PAIRED \n", "178039 PAIRED \n", "178054 PAIRED \n", "178063 PAIRED \n", "178081 PAIRED \n", "178084 PAIRED \n", "178096 PAIRED \n", "178099 PAIRED \n", "178117 PAIRED \n", "178135 PAIRED \n", "178144 PAIRED \n", "178171 PAIRED \n", "178183 PAIRED \n", "178189 PAIRED \n", "178225 PAIRED \n", "178282 PAIRED \n", "178312 PAIRED \n", "178315 PAIRED \n", "178324 PAIRED \n", "178327 PAIRED \n", "178480 PAIRED \n", "178750 PAIRED \n", "NA.298 NA NA \n", "NA.299 NA NA \n", "178759 PAIRED \n", "NA.300 NA NA \n", "NA.301 NA NA \n", "184746 PAIRED \n", "184747 PAIRED data/NA18499/sequence_read/SRR797225_2.filt.fa... \n", "184748 PAIRED data/NA18499/sequence_read/SRR797225_1.filt.fa... \n", "\n", " READ_COUNT BASE_COUNT ANALYSIS_GROUP \n", "1 9280498 334097928 high coverage \n", "2 9571982 344591352 high coverage \n", "3 149044 5365584 high coverage \n", "4 2057690 74076840 high coverage \n", "5 2057690 74076840 high coverage \n", "6 9388168 337974048 high coverage \n", "7 7762958 279466488 high coverage \n", "8 9625450 385018000 high coverage \n", "9 8808642 317111112 high coverage \n", "10 15187 683415 high coverage \n", "11 2159324 97169580 high coverage \n", "12 2159324 97169580 high coverage \n", "13 59312 2669040 high coverage \n", "14 5080128 228605760 high coverage \n", "15 5080128 228605760 high coverage \n", "16 11752662 423095832 high coverage \n", "17 229179 10313055 high coverage \n", "18 7692812 346176540 high coverage \n", "19 7692812 346176540 high coverage \n", "20 11402532 410491152 high coverage \n", "21 6777368 243985248 high coverage \n", "22 78918 2841048 high coverage \n", "23 1131253 40725108 high coverage \n", "24 1131253 40725108 high coverage \n", "25 12013717 432493812 high coverage \n", "26 8045886 289651896 high coverage \n", "27 9081298 326926728 high coverage \n", "28 10130502 364698072 high coverage \n", "29 8632879 310783644 high coverage \n", "30 8108919 291921084 high coverage \n", "... ... ... ... \n", "178036 61270 6127000 low coverage \n", "178039 61280 6128000 low coverage \n", "178054 55356 5535600 low coverage \n", "178063 67550 6755000 low coverage \n", "178081 51922 5192200 low coverage \n", "178084 56870 5687000 low coverage \n", "178096 873195 87319500 low coverage \n", "178099 821999 82199900 low coverage \n", "178117 50397 5039700 low coverage \n", "178135 55358 5535800 low coverage \n", "178144 64122 6412200 low coverage \n", "178171 72884 7288400 low coverage \n", "178183 980198 98019800 low coverage \n", "178189 1062464 106246400 low coverage \n", "178225 785777 78577700 low coverage \n", "178282 59886 5988600 low coverage \n", "178312 63977 6397700 low coverage \n", "178315 63850 6385000 low coverage \n", "178324 63639 6363900 low coverage \n", "178327 67132 6713200 low coverage \n", "178480 68438 6843800 low coverage \n", "178750 67944 6794400 exome \n", "NA.298 NaN NaN NA \n", "NA.299 NaN NaN NA \n", "178759 44974 4497400 exome \n", "NA.300 NaN NaN NA \n", "NA.301 NaN NaN NA \n", "184746 871763 88048063 low coverage \n", "184747 8623980 871021980 low coverage \n", "184748 8623980 871021980 low coverage \n", "\n", "[25251 rows x 12 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd_yri_ceu = pd_common.load_data('yri_ceu')\n", "print(type(pd_yri_ceu))\n", "pd_yri_ceu" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " [1]\n", " \"STUDY_ID\" \n", " \"STUDY_NAME\" \n", " \"CENTER_NAME\" \n", "\n", "\n", " [4]\n", " \"SAMPLE_ID\" \n", " \"SAMPLE_NAME\" \n", " \"POPULATION\" \n", "\n", "\n", " [7]\n", " \"INSTRUMENT_PLATFORM\"\n", " \"LIBRARY_LAYOUT\" \n", " \"READ_COUNT\" \n", "\n", "\n", "[10]\n", " \"BASE_COUNT\" \n", " \"ANALYSIS_GROUP\" \n", "\n", "\n" ] }, { "data": { "text/plain": [ "\n", "['STUD..., 'STUD..., 'CENT..., ..., 'READ..., 'BASE..., 'ANAL...]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "del pd_yri_ceu['PAIRED_FASTQ']\n", "no_paired = pd_common.convert_to_r_dataframe(pd_yri_ceu)\n", "robjects.r.assign('no.paired', no_paired)\n", "robjects.r(\"print(colnames(no.paired))\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }