{ "metadata": { "name": "filter-blast-csv" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Remember,\n", "\n", "hit 'Shift-ENTER' to execute each cell." ] }, { "cell_type": "code", "collapsed": false, "input": [ "ls /mnt" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u001b[0m\u001b[01;34mlost+found\u001b[0m/ \u001b[01;34mtitus\u001b[0m/\r\n" ] } ], "prompt_number": 12 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### For the next command, replace 'titus' with your own NetID " ] }, { "cell_type": "code", "collapsed": false, "input": [ "cd /mnt/titus" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/mnt/titus\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "import csv\n", "r = csv.reader(open('salm.x.ecoli.csv', 'r'))\n", "\n", "outfp = open('filtered.csv', 'w')\n", "w = csv.writer(outfp)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "scores = []\n", "\n", "for row in r:\n", " name1, descr1, name2, descr2, bitscore, evalue = row\n", " bitscore = float(bitscore)\n", " if bitscore > 100:\n", " w.writerow([name1, descr1, name2, descr2, bitscore, evalue])\n", "\n", "outfp.close()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "print open('filtered.csv').read(500)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ref|YP_008253424.1|,membrane protein [Salmonella enterica subsp. enterica serovar Typhimurium var. 5- str. CFSAN001921],ref|NP_417733.1|,putative outer membrane protein [Escherichia coli str. K-12 substr. MG1655],119.0,4e-29\r\n", "ref|YP_008253425.1|,multidrug transporter [Salmonella enterica subsp. enterica serovar Typhimurium var. 5- str. CFSAN001921],ref|NP_417732.1|,multidrug efflux system protein [Escherichia coli str. K-12 substr. MG1655],1744.0,1e-300\r\n", "ref|YP_008253425.1|,multidrug transporter\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }