{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "### parse sam file input\n", "### if too many reads unmapped\n", "\"\"\"\n", "Use only [] for indexing\n", "\"\"\"\n", "import sys\n", "import pandas as pd\n", "import os\n", "#fname='out.sam'\n", "fname=str(sys.argv[1])\n", "#specie='Homo_sapiens'#\n", "specie=str(sys.argv[2])\n", "emptyPickleDir='/cellar/users/btsui/Data/Project/Skymap/ChipSeq/empty_references/'+specie+'.pickle'\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "myCountS=pd.read_pickle(emptyPickleDir)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\"\"\"\n", "\"\"\"\n", "##open sam file\n", "checkMark=100000\n", "with open(fname,'r') as f:\n", " failedCount=0\n", " for l in f:\n", " if l[0]!='@':\n", " break\n", " for i,l in enumerate(f):\n", " splitL=l.split('\\t')\n", " start_loc=int(splitL[3])\n", " tlen=len(splitL[9])\n", " rname=splitL[2]\n", " if rname !='*':\n", " end_loc=start_loc+tlen\n", " binned_start_loc=int(start_loc/20.0)*20\n", " binned_end_loc=int(end_loc/20.0)*20\n", " #print start_loc,end_loc\n", " for i in range(binned_start_loc,binned_end_loc+1,20):\n", " #print (rname,i)\n", " myCountS[(rname,i)]+=1\n", " else:\n", " failedCount+=1\n", " if i==checkMark:\n", " frac=failedCount/float(checkMark)\n", " if frac>0.9:\n", " break\n", " #tlen= int(int(splitL[9])/20.0)*20\n", " #rname,pos,tlen=splitL[2],binnedLoc,splitL[9]\n", " \n", " " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "nonZero_CountS=myCountS[myCountS!=0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "nonZero_CountS.to_pickle('bin_count.pickle')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.13" } }, "nbformat": 4, "nbformat_minor": 0 }