{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## The cell below will get the data file, you only need to run it once " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "(you do not need to do this if you have done it in the Interfacing_R notebook)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2016-02-05 15:50:22-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/historical_data/former_toplevel/sequence.index\n", " => 'sequence.index'\n", "Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8\n", "Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected.\n", "Logging in as anonymous ... Logged in!\n", "==> SYST ... done. ==> PWD ... done.\n", "==> TYPE I ... done. ==> CWD (1) /vol1/ftp/historical_data/former_toplevel ... done.\n", "==> SIZE sequence.index ... 67069489\n", "==> PASV ... done. ==> RETR sequence.index ... done.\n", "Length: 67069489 (64M) (unauthoritative)\n", "\n", "sequence.index 100%[=====================>] 63.96M 419KB/s in 2m 27s \n", "\n", "2016-02-05 15:52:54 (445 KB/s) - 'sequence.index' saved [67069489]\n", "\n" ] } ], "source": [ "!rm sequence.index 2>/dev/null\n", "!wget -nd ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/historical_data/former_toplevel/sequence.index -O sequence.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## This code was changed to support new versions of software\n", "\n", "This is different from the book." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import rpy2.robjects as robjects\n", "import rpy2.robjects.lib.ggplot2 as ggplot2\n", "\n", "%load_ext rpy2.ipython" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [], "source": [ "seq_data = %R read.delim('sequence.index', header=TRUE, stringsAsFactors=FALSE)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ " [1] \"FASTQ_FILE\" \"MD5\" \"RUN_ID\" \n", " [4] \"STUDY_ID\" \"STUDY_NAME\" \"CENTER_NAME\" \n", " [7] \"SUBMISSION_ID\" \"SUBMISSION_DATE\" \"SAMPLE_ID\" \n", "[10] \"SAMPLE_NAME\" \"POPULATION\" \"EXPERIMENT_ID\" \n", "[13] \"INSTRUMENT_PLATFORM\" \"INSTRUMENT_MODEL\" \"LIBRARY_NAME\" \n", "[16] \"RUN_NAME\" \"RUN_BLOCK_NAME\" \"INSERT_SIZE\" \n", "[19] \"LIBRARY_LAYOUT\" \"PAIRED_FASTQ\" \"WITHDRAWN\" \n", "[22] \"WITHDRAWN_DATE\" \"COMMENT\" \"READ_COUNT\" \n", "[25] \"BASE_COUNT\" \"ANALYSIS_GROUP\" \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "array(['BGI', 'BGI', 'BGI', ..., 'BI', 'BI', 'BI'], \n", " dtype='