{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/john/Development/python/rpyc_docker\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import logging\n", "logger = logging.getLogger(\"rpyc_docker\")\n", "logger.setLevel(logging.INFO)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from docker import Client\n", "docker = Client(base_url='unix://var/run/docker.sock')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import rpyc_docker.worker\n", "reload(rpyc_docker.worker)\n", "import rpyc_docker.rpyc_browser_worker\n", "reload(rpyc_docker.rpyc_browser_worker)\n", "import rpyc_docker.manager\n", "reload(rpyc_docker.manager)\n", "from rpyc_docker import BrowserRpycWorker\n", "from rpyc_docker import Manager" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/john/Development/python/rpyc_docker/examples\n" ] } ], "source": [ "cd examples/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Create Workers for fetching results from reddit and duckduckgo" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import time\n", "from reddit_page import RedditPage\n", "\n", "class RedditWorker(BrowserRpycWorker):\n", " def __init__(self,docker,subReddit,redditResultQueue):\n", " BrowserRpycWorker.__init__(self,docker)\n", " self.subReddit = subReddit\n", " self.redditResultQueue = redditResultQueue\n", " \n", " def work(self):\n", " self.create_container()\n", " time.sleep(2)\n", " self.connect_rpyc()\n", " self.setup_browser(\"firefox\")\n", " redditPage = RedditPage(self.browser)\n", " redditPage.goto_subreddit(self.subReddit)\n", " results = redditPage.get_reddit_titles()\n", " self.redditResultQueue.put([self.subReddit,results])\n", " return True" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import time\n", "from duckduckgo_page import DuckDuckGoPage\n", "\n", "class DuckDuckGoWorker(BrowserRpycWorker):\n", " def __init__(self,docker,searchTerm,resultQueue):\n", " BrowserRpycWorker.__init__(self,docker)\n", " self.searchTerm = searchTerm\n", " self.resultQueue = resultQueue\n", " \n", " def work(self):\n", " self.create_container()\n", " time.sleep(2)\n", " self.connect_rpyc()\n", " self.setup_browser(\"firefox\")\n", " duckDuckGoPage = DuckDuckGoPage(self.browser) \n", " duckDuckGoPage.search(self.searchTerm)\n", " results = duckDuckGoPage.get_results()\n", " self.resultQueue.put([self.searchTerm,results])\n", " return True" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/john/Development/python/rpyc_docker\n" ] } ], "source": [ "cd .." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#using manager to run a reddit workers" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import Queue" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "redditResultQueue = Queue.Queue()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "workQueue = Queue.Queue()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "redditQueries = [\"/r/python\",\"/r/clojure\",\"r/programming\",\"r/javascript\"]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for redditQuery in redditQueries :\n", " workQueue.put([RedditWorker,[docker,redditQuery,redditResultQueue],{}])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "manager = Manager(workQueue,1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:rpyc_docker:,[, '/r/python', ],{}\n" ] } ], "source": [ "manager.start()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'WorkerManager Report\\nRunning workers 0 \\nResults 4 \\nErrors 0 \\nQueue Size 0'" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "manager.report()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "result = redditResultQueue.get_nowait()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#Using manager to run Duckduckgo workers" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import Queue" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "duckDuckGoResultQueue = Queue.Queue()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "duckDuckGoQueries = [\"python\",\"clojure\",\"programming\",\"javascript\"]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "workQueue = Queue.Queue()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "for duckDuckGoQuery in duckDuckGoQueries :\n", " workQueue.put([DuckDuckGoWorker,[docker,duckDuckGoQuery,duckDuckGoResultQueue],{}])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "manager = Manager(workQueue,1)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:rpyc_docker:,[, 'python', ],{}\n" ] } ], "source": [ "manager.start()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'WorkerManager Report\\nRunning workers 0 \\nResults 2 \\nErrors 2 \\nQueue Size 0'" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "manager.report()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "[u'Traceback (most recent call last):',\n", " u' File \"rpyc_docker/worker.py\", line 38, in run',\n", " u' self.result = self.work()',\n", " u' File \"\", line 17, in work',\n", " u' results = duckDuckGoPage.get_results()',\n", " u' File \"duckduckgo_page.py\", line 38, in get_results',\n", " u' File \"/usr/local/lib/python2.7/dist-packages/rpyc/core/netref.py\", line 196, in __call__',\n", " u' return syncreq(_self, consts.HANDLE_CALL, args, kwargs)',\n", " u' File \"/usr/local/lib/python2.7/dist-packages/rpyc/core/netref.py\", line 71, in syncreq',\n", " u' return conn.sync_request(handler, oid, *args)',\n", " u' File \"/usr/local/lib/python2.7/dist-packages/rpyc/core/protocol.py\", line 441, in sync_request',\n", " u' raise obj',\n", " u'WebDriverException: Message: resultElm.querySelector(...) is null',\n", " u'Stacktrace:',\n", " u'[',\n", " u'u',\n", " u\"'\",\n", " u' ',\n", " u' ',\n", " u' ',\n", " u' ',\n", " u'a',\n", " u't',\n", " u' ',\n", " u'a',\n", " u'n',\n", " u'o',\n", " u'n',\n", " u'y',\n", " u'm',\n", " u'o',\n", " u'u',\n", " u's',\n", " u'/',\n", " u'<',\n", " u' ',\n", " u'(',\n", " u'h',\n", " u't',\n", " u't',\n", " u'p',\n", " u's',\n", " u':',\n", " u'/',\n", " u'/',\n", " u'd',\n", " u'u',\n", " u'c',\n", " u'k',\n", " u'd',\n", " u'u',\n", " u'c',\n", " u'k',\n", " u'g',\n", " u'o',\n", " u'.',\n", " u'c',\n", " u'o',\n", " u'm',\n", " u'/',\n", " u'?',\n", " u'q',\n", " u'=',\n", " u'c',\n", " u'l',\n", " u'o',\n", " u'j',\n", " u'u',\n", " u'r',\n", " u'e',\n", " u'&',\n", " u'i',\n", " u'a',\n", " u'=',\n", " u'a',\n", " u'b',\n", " u'o',\n", " u'u',\n", " u't',\n", " u' ',\n", " u'l',\n", " u'i',\n", " u'n',\n", " u'e',\n", " u' ',\n", " u'6',\n", " u'9',\n", " u' ',\n", " u'>',\n", " u' ',\n", " u'F',\n", " u'u',\n", " u'n',\n", " u'c',\n", " u't',\n", " u'i',\n", " u'o',\n", " u'n',\n", " u':',\n", " u'8',\n", " u')',\n", " u\"'\",\n", " u',',\n", " u' ',\n", " u'u',\n", " u\"'\",\n", " u' ',\n", " u' ',\n", " u' ',\n", " u' ',\n", " u'a',\n", " u't',\n", " u' ',\n", " u'a',\n", " u'n',\n", " u'o',\n", " u'n',\n", " u'y',\n", " u'm',\n", " u'o',\n", " u'u',\n", " u's',\n", " u' ',\n", " u'(',\n", " u'h',\n", " u't',\n", " u't',\n", " u'p',\n", " u's',\n", " u':',\n", " u'/',\n", " u'/',\n", " u'd',\n", " u'u',\n", " u'c',\n", " u'k',\n", " u'd',\n", " u'u',\n", " u'c',\n", " u'k',\n", " u'g',\n", " u'o',\n", " u'.',\n", " u'c',\n", " u'o',\n", " u'm',\n", " u'/',\n", " u'?',\n", " u'q',\n", " u'=',\n", " u'c',\n", " u'l',\n", " u'o',\n", " u'j',\n", " u'u',\n", " u'r',\n", " u'e',\n", " u'&',\n", " u'i',\n", " u'a',\n", " u'=',\n", " u'a',\n", " u'b',\n", " u'o',\n", " u'u',\n", " u't',\n", " u' ',\n", " u'l',\n", " u'i',\n", " u'n',\n", " u'e',\n", " u' ',\n", " u'6',\n", " u'9',\n", " u' ',\n", " u'>',\n", " u' ',\n", " u'F',\n", " u'u',\n", " u'n',\n", " u'c',\n", " u't',\n", " u'i',\n", " u'o',\n", " u'n',\n", " u':',\n", " u'3',\n", " u')',\n", " u\"'\",\n", " u',',\n", " u' ',\n", " u'u',\n", " u\"'\",\n", " u' ',\n", " u' ',\n", " u' ',\n", " u' ',\n", " u'a',\n", " u't',\n", " u' ',\n", " u'h',\n", " u'a',\n", " u'n',\n", " u'd',\n", " u'l',\n", " u'e',\n", " u'E',\n", " u'v',\n", " u'a',\n", " u'l',\n", " u'u',\n", " u'a',\n", " u't',\n", " u'e',\n", " u'E',\n", " u'v',\n", " u'e',\n", " u'n',\n", " u't',\n", " u' ',\n", " u'(',\n", " u'h',\n", " u't',\n", " u't',\n", " u'p',\n", " u's',\n", " u':',\n", " u'/',\n", " u'/',\n", " u'd',\n", " u'u',\n", " u'c',\n", " u'k',\n", " u'd',\n", " u'u',\n", " u'c',\n", " u'k',\n", " u'g',\n", " u'o',\n", " u'.',\n", " u'c',\n", " u'o',\n", " u'm',\n", " u'/',\n", " u'?',\n", " u'q',\n", " u'=',\n", " u'c',\n", " u'l',\n", " u'o',\n", " u'j',\n", " u'u',\n", " u'r',\n", " u'e',\n", " u'&',\n", " u'i',\n", " u'a',\n", " u'=',\n", " u'a',\n", " u'b',\n", " u'o',\n", " u'u',\n", " u't',\n", " u':',\n", " u'6',\n", " u'9',\n", " u')',\n", " u\"'\",\n", " u']',\n", " u'',\n", " u'========= Remote Traceback (1) =========',\n", " u'Traceback (most recent call last):',\n", " u' File \"/usr/local/lib/python2.7/dist-packages/rpyc/core/protocol.py\", line 305, in _dispatch_request',\n", " u' res = self._HANDLERS[handler](self, *args)',\n", " u' File \"/usr/local/lib/python2.7/dist-packages/rpyc/core/protocol.py\", line 535, in _handle_call',\n", " u' return self._local_objects[oid](*args, **dict(kwargs))',\n", " u' File \"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py\", line 403, in execute_script',\n", " u\" {'script': script, 'args':converted_args})['value']\",\n", " u' File \"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py\", line 175, in execute',\n", " u' self.error_handler.check_response(response)',\n", " u' File \"/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/errorhandler.py\", line 166, in check_response',\n", " u' raise exception_class(message, screen, stacktrace)',\n", " u'WebDriverException: Message: resultElm.querySelector(...) is null',\n", " u'Stacktrace:',\n", " u' at anonymous/< (https://duckduckgo.com/?q=clojure&ia=about line 69 > Function:8)',\n", " u' at anonymous (https://duckduckgo.com/?q=clojure&ia=about line 69 > Function:3)',\n", " u' at handleEvaluateEvent (https://duckduckgo.com/?q=clojure&ia=about:69)',\n", " u'']" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "manager.errors[0][0].splitlines()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#Running two different docker workers at once" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true }, "outputs": [], "source": [ "duckDuckGoResultQueue = Queue.Queue()\n", "redditResultQueue = Queue.Queue()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [], "source": [ "workQueue = Queue.Queue()\n", "for duckDuckGoQuery,redditQuery in zip(duckDuckGoQueries,redditQueries):\n", " workQueue.put([DuckDuckGoWorker,[docker,duckDuckGoQuery,duckDuckGoResultQueue],{}])\n", " workQueue.put([RedditWorker,[docker,redditQuery,redditResultQueue],{}])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": true }, "outputs": [], "source": [ "manager = Manager(workQueue,2)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:rpyc_docker:,[, 'python', ],{}\n", "INFO:rpyc_docker:,[, '/r/python', ],{}\n", "WARNING:requests.packages.urllib3.connectionpool:Connection pool is full, discarding connection: localhost\n" ] } ], "source": [ "manager.start()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'WorkerManager Report\\nRunning workers 0 \\nResults 6 \\nErrors 2 \\nQueue Size 0'" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "manager.report()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }