{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "codeBaseDir='/cellar/users/btsui/Project/METAMAP/code/metamap/'\n", "os.chdir(codeBaseDir)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#\n", "__author__ = 'btsui'\n", "import SRAParam\n", "from clusterBaseClass import clusterBaseClass\n", "import mergeSRAResults as Merger\n", "import SRAParser\n", "\n", "\n", "class SRAMangaer(clusterBaseClass):\n", " className = 'SRAMangaer'\n", " def __init__(self):\n", " clusterBaseClass.__init__(self,\n", " outputPostfix=SRAParser.SRAParser.outputPostfix,\n", " baseSplitDir='/cellar/users/btsui/Data/nrnb01_nobackup/tmp/METAMAP//splittedInput_' + self.className + '_',\n", " pythonScriptName=codeBaseDir+'SRAParser.py',\n", " CWD='/cellar/users/btsui/Project/METAMAP/code/metamap',\n", " minCorrectSize=0,\n", " memory=8,\n", " smp=1\n", " )\n", " \n", " def run(self, inputFDir, outDir):\n", " # consist of both input for clustering\n", " fname = inputFDir.split('/')[-1]\n", " \n", " self.splitOutDir = self.baseSplitDir + fname + '/'\n", " print ('splitOutDir dir: ',self.splitOutDir)\n", " print 'splitting input'\n", " #termToId = LS.split(inputFDir, self.nFiles, self.splitOutDir, clean=True) # retain signature\n", " self.setNFiles(SRAParam.nConcurJob)\n", " if os.path.exists(self.splitOutDir ):\n", " os.system('rm '+self.splitOutDir +'/*')\n", " else:\n", " os.mkdir(self.splitOutDir )\n", " \n", " if not self.Done():\n", " # fname,nfiles,splitOutDir\n", " self.processUntilAllDone()\n", "\n", " #Merger.runMerger( self.splitOutDir, outDir=outDir)\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "outDir = '/cellar/users/btsui/Data/nrnb01_nobackup/METAMAP/' +'SRA_parse'\n", "m = SRAMangaer()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('splitOutDir dir: ', '/cellar/users/btsui/Data/nrnb01_nobackup/tmp/METAMAP//splittedInput_SRAMangaer_SRA_META/')\n", "splitting input\n" ] } ], "source": [ "m.run(SRAParam.SRADir, outDir=outDir)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# scratch" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "exit(0)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#codeBaseDir+'SRAParser.py'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test case" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "nrnb-5-6\n", "/cellar/users/btsui/Data/nrnb01_nobackup/tmp/METAMAP//splittedInput_SRAMangaer_SRA_META//835\n", "523\n", "['SRA725772', 'SRA534608', 'SRA549639', 'SRA616252', 'SRA162547', 'SRA522205', 'SRA261971', 'SRA592288', 'SRA491284', 'SRA643053', 'ERA978722', 'ERA473368', 'SRA219268', 'SRA027121', 'SRA069500', 'SRA746477', 'SRA243995', 'ERA085943', 'SRA466298', 'SRA736060', 'SRA633940', 'ERA529592', 'SRA486667', 'SRA634973', 'ERA469384', 'SRA345207', 'SRA114483', 'ERA624108', 'SRA761817', 'ERA190505', 'ERA333873', 'ERA265766', 'ERA327347', 'SRA241634', 'ERA021834', 'SRA468577', 'SRA133859', 'SRA001589', 'ERA246500', 'ERA226358', 'SRA215327', 'ERA337679', 'ERA429046', 'ERA1238184', 'SRA515136', 'ERA707120', 'ERA1056892', 'SRA234108', 'ERA502256', 'ERA013821', 'ERA284076', 'ERA079461', 'ERA973504', 'SRA452032', 'SRA703253', 'SRA064583', 'ERA132290', 'SRA223755', 'SRA412506', 'SRA212352', 'SRA755831', 'SRA648764', 'SRA242179', 'ERA689967', 'SRA488522', 'SRA010164', 'SRA575595', 'ERA428092', 'ERA707048', 'SRA576646', 'ERA003374', 'SRA102498', 'ERA739952', 'SRA474767', 'ERA1516064', 'ERA071420', 'ERA526746', 'ERA561396', 'SRA544510', 'ERA916601', 'ERA191038', 'ERA467680', 'SRA670503', 'ERA631807', 'SRA277176', 'SRA002079', 'SRA534958', 'ERA366591', 'ERA569934', 'SRA665962', 'SRA423749', 'ERA649114', 'SRA192563', 'SRA128355', 'ERA012036', 'SRA604882', 'ERA426317', 'SRA185610', 'SRA115799', 'ERA483791', 'ERA135737', 'ERA018443', 'ERA446383', 'ERA319202', 'SRA591278', 'ERA808119', 'ERA167090', 'SRA187713', 'SRA513110', 'SRA788917', 'SRA207435', 'SRA786836', 'SRA633260', 'ERA1289624', 'ERA013594', 'SRA575368', 'SRA256413', 'ERA068856', 'ERA365385', 'SRA165400', 'ERA014787', 'SRA715448', 'ERA782678', 'ERA827992', 'SRA193147', 'ERA893638', 'SRA199311', 'ERA019148', 'SRA127313', 'ERA396107', 'SRA462552', 'SRA645675', 'SRA543444', 'ERA239031', 'SRA497586', 'SRA068539', 'ERA692355', 'SRA756135', 'SRA346718', 'SRA752740', 'SRA097736', 'SRA113775', 'SRA378431', 'SRA356133', 'SRA771300', 'ERA688820', 'SRA576511', 'ERA364516', 'SRA596517', 'SRA188529', 'SRA379732', 'ERA617722', 'SRA462067', 'ERA420178', 'SRA131285', 'ERA407637', 'SRA621003', 'ERA619867', 'ERA569348', 'SRA150029', 'SRA002057', 'ERA005915', 'ERA787898', 'SRA418286', 'SRA571443', 'ERA588549', 'ERA981338', 'SRA602030', 'ERA770458', 'SRA323762', 'SRA493637', 'SRA128949', 'SRA317093', 'ERA1297955', 'SRA725999', 'SRA305494', 'SRA321316', 'ERA369415', 'SRA057781', 'ERA1292500', 'ERA225364', 'SRA222479', 'SRA563747', 'ERA777118', 'SRA736738', 'SRA522407', 'ERA332803', 'SRA032770', 'SRA399273', 'ERA1308710', 'SRA091321', 'SRA008806', 'ERA431247', 'SRA005781', 'SRA650607', 'DRA003585', 'SRA239825', 'ERA334725', 'SRA392014', 'SRA509265', 'SRA390363', 'ERA064609', 'ERA225494', 'ERA293785', 'SRA288026', 'SRA386548', 'ERA068705', 'SRA733353', 'SRA087131', 'ERA421251', 'SRA609550', 'SRA334684', 'ERA436979', 'SRA685180', 'ERA1275124', 'ERA529280', 'SRA416980', 'SRA429159', 'SRA480401', 'ERA200897', 'SRA531093', 'ERA406850', 'SRA249885', 'ERA766282', 'ERA546738', 'SRA258932', 'SRA550623', 'SRA756496', 'SRA590662', 'ERA969518', 'SRA602419', 'SRA222579', 'ERA259320', 'SRA449139', 'SRA377300', 'ERA460226', 'SRA074489', 'ERA768954', 'SRA131709', 'ERA426752', 'SRA735666', 'ERA763854', 'ERA226186', 'SRA459930', 'SRA519804', 'ERA437140', 'ERA133251', 'ERA473323', 'ERA129516', 'ERA522656', 'SRA650317', 'ERA273808', 'SRA354033', 'SRA252716', 'SRA213904', 'ERA427207', 'ERA1164842', 'SRA282456', 'SRA621503', 'ERA431014', 'ERA570513', 'SRA719331', 'SRA107774', 'SRA111379', 'SRA066476', 'ERA274058', 'SRA324784', 'SRA558112', 'SRA758740', 'SRA652714', 'ERA977352', 'SRA681077', 'SRA648185', 'SRA503807', 'SRA699156', 'SRA671621', 'ERA1283746', 'SRA207929', 'ERA238496', 'SRA614009', 'SRA450282', 'SRA288421', 'SRA402871', 'ERA783491', 'ERA023018', 'ERA1304245', 'ERA071291', 'SRA299309', 'SRA630191', 'SRA056169', 'ERA000941', 'ERA1520236', 'SRA310693', 'ERA678614', 'SRA710471', 'SRA485915', 'SRA240975', 'SRA412396', 'ERA1148949', 'SRA191117', 'SRA125480', 'SRA443599', 'SRA304374', 'SRA574054', 'SRA196870', 'ERA545920', 'SRA395578', 'SRA327676', 'SRA220406', 'ERA921860', 'ERA460353', 'ERA411432', 'SRA543657', 'SRA360151', 'ERA238055', 'SRA644200', 'ERA029691', 'ERA1090441', 'SRA440284', 'ERA145222', 'SRA765359', 'SRA632650', 'ERA1292879', 'SRA260356', 'ERA435313', 'ERA550771', 'SRA578000', 'SRA706375', 'SRA706842', 'SRA716230', 'SRA284306', 'ERA782722', 'ERA501435', 'SRA241763', 'ERA146699', 'ERA1320839', 'SRA553744', 'SRA645297', 'SRA636847', 'ERA207885', 'ERA1516322', 'ERA680764', 'ERA531214', 'SRA682542', 'SRA548511', 'ERA531354', 'ERA1088089', 'SRA325402', 'SRA052898', 'SRA023904', 'SRA096925', 'SRA582598', 'ERA162792', 'SRA527060', 'SRA294033', 'SRA285422', 'SRA664608', 'SRA458601', 'ERA1082906', 'SRA046302', 'ERA675464', 'SRA593504', 'ERA1516336', 'ERA148105', 'SRA484335', 'ERA019493', 'ERA022089', 'SRA485713', 'SRA568798', 'SRA421157', 'ERA952378', 'ERA400881', 'ERA376868', 'SRA653801', 'SRA076465', 'SRA603714', 'SRA286579', 'SRA613709', 'ERA050969', 'ERA090337', 'SRA538394', 'SRA782070', 'SRA636992', 'SRA542138', 'ERA164969', 'ERA291641', 'ERA341121', 'SRA584580', 'SRA756674', 'SRA226769', 'SRA706691', 'ERA217847', 'SRA690677', 'ERA927609', 'SRA752690', 'SRA206386', 'SRA008244', 'SRA755593', 'SRA261869', 'SRA215045', 'DRA000688', 'ERA095337', 'SRA499006', 'SRA696073', 'SRA218419', 'SRA007057', 'SRA511933', 'ERA250545', 'SRA170124', 'SRA317656', 'ERA282403', 'SRA725013', 'SRA530092', 'ERA1273674', 'SRA297388', 'SRA412448', 'SRA540608', 'SRA788195', 'ERA448526', 'ERA068039', 'ERA125091', 'SRA015799', 'SRA025599', 'ERA599177', 'ERA065996', 'SRA317115', 'ERA813784', 'ERA368070', 'ERA484653', 'ERA362951', 'ERA767406', 'SRA582252', 'SRA714440', 'ERA163262', 'ERA130575', 'SRA594384', 'SRA252567', 'ERA624291', 'ERA296423', 'SRA420333', 'ERA928665', 'ERA1149807', 'ERA264311', 'ERA928645', 'SRA398080', 'ERA146668', 'ERA699734', 'ERA791250', 'ERA457804', 'ERA626566', 'ERA274089', 'SRA377835', 'ERA651954', 'SRA626530', 'SRA192399', 'ERA118142', 'SRA758441', 'SRA405340', 'SRA030699', 'SRA576813', 'SRA748579', 'SRA695744', 'ERA400640', 'SRA746627', 'SRA408213', 'SRA546495', 'ERA296359', 'SRA506009', 'SRA587858', 'SRA147159', 'SRA504997', 'SRA698234', 'ERA599110', 'ERA286695', 'ERA118751', 'ERA199729', 'SRA070055', 'SRA036001', 'SRA324852', 'SRA750707', 'SRA551010', 'SRA485692', 'SRA533518', 'ERA205152', 'SRA523498', 'ERA273553', 'SRA673567', 'ERA422904', 'SRA726264', 'ERA017296', 'ERA448614', 'SRA321652', 'SRA565331', 'SRA358736', 'SRA488368', 'ERA743178', 'ERA292550', 'ERA319076', 'ERA1271457', 'SRA555470', 'ERA467426', 'SRA460450', 'ERA1301294', 'ERA212871', 'ERA524960', 'SRA541259', 'ERA775522', 'SRA236567', 'SRA621041', 'ERA652547', 'SRA495307', 'ERA365390', 'ERA050003', 'SRA482264', 'ERA304061', 'SRA418707', 'SRA715574', 'SRA298522', 'ERA1275504', 'ERA358232', 'SRA533754', 'ERA002771', 'SRA496266']\n" ] } ], "source": [ "%%bash \n", "#time 10000000000 #comment out this line to run it\n", "export CWD='/cellar/users/btsui/Project/METAMAP/code/metamap'\n", "export SPLIT_INPUT_DIR='/cellar/users/btsui/Data/nrnb01_nobackup/tmp/METAMAP//splittedInput_SRAMangaer_SRA_META/'\n", "export SGE_TASK_ID=835\n", "export SCRIPT_NAME='/cellar/users/btsui/Project/METAMAP/code/metamap/SRAParser.py'\n", "./metamapWrapper.sh " ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/cellar/users/btsui/Project/METAMAP/code/metamap\n" ] } ], "source": [ "%%bash \n", "pwd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#!grep -rn \"/cellar/users/btsui/Project/METAMAP\" *.py" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "^C\r\n" ] } ], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:environment_conda_py26_btsui]", "language": "python", "name": "conda-env-environment_conda_py26_btsui-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.14" } }, "nbformat": 4, "nbformat_minor": 2 }