{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": "## Ten Jupyter/IPython essentials" }, { "cell_type": "markdown", "metadata": {}, "source": "### Using IPython as an extended shell" }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": "'/home/cyrille/minibook/chapter1'" }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": "%pwd" }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": "!wget https://raw.githubusercontent.com/ipython-books/minibook-2nd-data/master/facebook.zip" }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": "facebook.zip [...]" }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": "%ls" }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": "!unzip facebook.zip" }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": "facebook facebook.zip [...]" }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": "%ls" }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": "/home/cyrille/minibook/chapter1/facebook" }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": "%cd facebook" }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": "%bookmark fbdata" }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": "0.circles 1684.circles 3437.circles 3980.circles 686.circles\n0.edges 1684.edges 3437.edges 3980.edges 686.edges\n107.circles 1912.circles 348.circles 414.circles 698.circles\n107.edges 1912.edges 348.edges 414.edges 698.edges" }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": "%ls" }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": "files = !ls -1 -S | grep .edges" }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": "['1912.edges',\n '107.edges',\n '1684.edges',\n '3437.edges',\n '348.edges',\n '0.edges',\n '414.edges',\n '686.edges',\n '698.edges',\n '3980.edges']" }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": "files" }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": "import os\nfrom operator import itemgetter\n# Get the name and file size of all .edges files.\nfiles = [(file, os.stat(file).st_size)\n for file in os.listdir('.')\n if file.endswith('.edges')]\n# Sort the list with the second item (file size),\n# in decreasing order.\nfiles = sorted(files,\n key=itemgetter(1),\n reverse=True)\n# Only keep the first item (file name), in the same order.\nfiles = [file for (file, size) in files]" }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": "2290 2363\n2346 2025\n2140 2428\n2201 2506\n2425 2557" }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": "!head -n5 {files[0]}" }, { "cell_type": "markdown", "metadata": {}, "source": "### Learning magic commands" }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": "Available line magics:\n%alias %alias_magic %autocall %automagic %autosave %bookmark %cat %cd %clear %colors %config %connect_info %cp %debug %dhist %dirs %doctest_mode %ed %edit %env %gui %hist %history %install_default_config %install_ext %install_profiles %killbgscripts %ldir %less %lf %lk %ll %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %lx %macro %magic %man %matplotlib %mkdir %more %mv %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %popd %pprint %precision %profile %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %rep %rerun %reset %reset_selective %rm %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode\n\nAvailable cell magics:\n%%! %%HTML %%SVG %%bash %%capture %%debug %%file %%html %%javascript %%latex %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile\n\nAutomagic is ON, % prefix IS NOT needed for line magics." }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": "%lsmagic" }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": "%history?" }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": "files = !ls -1 -S | grep .edges\nfiles\n!head -n5 {files[0]}\n%lsmagic\n%history?" }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": "%history -l 5" }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": "1440" }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": "# how many minutes in a day?\n24 * 60" }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": "525600" }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": "# and in a year?\n_ * 365" }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": "%%capture output\n%ls" }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": "0.circles 1684.circles 3437.circles 3980.circles 686.circles\n0.edges 1684.edges 3437.edges 3980.edges 686.edges\n107.circles 1912.circles 348.circles 414.circles 698.circles\n107.edges 1912.edges 348.edges 414.edges 698.edges" }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": "output.stdout" }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": "_HEY\nfacebook\nfacebook.zip\n[...]" }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": "%%bash\ncd ..\ntouch _HEY\nls\nrm _HEY\ncd facebook" }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": "GHCi, version 7.6.3: http://www.haskell.org/ghc/ :? for help\nLoading package ghc-prim ... linking ... done.\nLoading package integer-gmp ... linking ... done.\nLoading package base ... linking ... done.\nPrelude> Hello world!\nPrelude> Leaving GHCi." }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": "%%script ghci\nputStrLn \"Hello world!\"" }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": "Writing myfile.txt" }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": "%%writefile myfile.txt\nHello world!" }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": "Hello world!" }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": "!more myfile.txt" }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": "!rm myfile.txt" }, { "cell_type": "markdown", "metadata": {}, "source": "### Mastering tab completion" }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": "(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook\n/home/cyrille/minibook/chapter1/facebook\n0.circles 1684.circles 3437.circles 3980.circles 686.circles\n0.edges 1684.edges 3437.edges 3980.edges 686.edges\n107.circles 1912.circles 348.circles 414.circles 698.circles\n107.edges 1912.edges 348.edges 414.edges 698.edges" }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": "%cd fbdata\n%ls" }, { "cell_type": "markdown", "metadata": {}, "source": "### Writing interactive documents in the Notebook with Markdown" }, { "cell_type": "markdown", "metadata": {}, "source": "### Creating interactive widgets in the Notebook" }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": "from IPython.display import YouTubeVideo\nYouTubeVideo('j9YpkSX7NNM')" }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": "'The square of 7 is 49.'" }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": "from ipywidgets import interact # IPython.html.widgets before IPython 4.0\n@interact(x=(0, 10))\ndef square(x):\n return(\"The square of %d is %d.\" % (x, x**2))" }, { "cell_type": "markdown", "metadata": {}, "source": "### Running Python scripts from IPython" }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": "(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook\n/home/cyrille/minibook/chapter1/facebook" }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": "%cd fbdata\n%cd .." }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": "Overwriting egos.py" }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": "%%writefile egos.py\nimport sys\nimport os\n# We retrieve the folder as the first positional argument\n# to the command-line call\nif len(sys.argv) > 1:\n folder = sys.argv[1]\n# We list all files in the specified folder\nfiles = os.listdir(folder)\n# ids contains the list of idenfitiers\nidentifiers = [int(file.split('.')[0]) for file in files]\n# Finally, we remove duplicates with set(), and sort the list\n# with sorted().\nids = sorted(set(identifiers))" }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": "%run egos.py facebook" }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": "[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]" }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": "ids" }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": "folder = 'facebook'" }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": "%run egos.py" }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": "%run -i egos.py" }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": "[0, 107, 348, 414, 686, 698, 1684, 1912, 3437, 3980]" }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": "ids" }, { "cell_type": "markdown", "metadata": {}, "source": "### Introspecting Python objects" }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": "import networkx" }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": "networkx.Graph?" }, { "cell_type": "markdown", "metadata": {}, "source": "### Debugging Python code" }, { "cell_type": "markdown", "metadata": {}, "source": "### Benchmarking Python code" }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": "(bookmark:fbdata) -> /home/cyrille/minibook/chapter1/facebook\n/home/cyrille/minibook/chapter1/facebook" }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": "%cd fbdata" }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": "import networkx" }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": "graph = networkx.read_edgelist('107.edges')" }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": "(1034, 26749)" }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": "len(graph.nodes()), len(graph.edges())" }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": "True" }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": "networkx.is_connected(graph)" }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": "100 loops, best of 3: 5.92 ms per loop" }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": "%timeit networkx.is_connected(graph)" }, { "cell_type": "markdown", "metadata": {}, "source": "### Profiling Python code" }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": "import networkx" }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": "def ncomponents(file):\n graph = networkx.read_edgelist(file)\n return networkx.number_connected_components(graph)" }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": "import glob\ndef ncomponents_files():\n return [(file, ncomponents(file))\n for file in sorted(glob.glob('*.edges'))]" }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": "0.edges 5 component(s)\n107.edges 1 component(s)\n1684.edges 4 component(s)\n1912.edges 2 component(s)\n3437.edges 2 component(s)\n348.edges 1 component(s)\n3980.edges 4 component(s)\n414.edges 2 component(s)\n686.edges 1 component(s)\n698.edges 3 component(s)" }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": "for file, n in ncomponents_files():\n print(file.ljust(12), n, 'component(s)')" }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": "1 loops, best of 3: 634 ms per loop" }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": "%timeit ncomponents_files()" }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": "2391070 function calls in 1.038 seconds\n\nOrdered by: cumulative time\n\nncalls tottime percall cumtime percall filename:lineno(function)\n 1 0.000 0.000 1.038 1.038 {built-in method exec}\n 1 0.000 0.000 1.038 1.038 :1()\n 10 0.000 0.000 0.995 0.100 :1(read_edgelist)\n 10 0.000 0.000 0.995 0.100 decorators.py:155(_open_file)\n 10 0.376 0.038 0.995 0.099 edgelist.py:174(parse_edgelist)\n170174 0.279 0.000 0.350 0.000 graph.py:648(add_edge)\n170184 0.059 0.000 0.095 0.000 edgelist.py:366()\n 10 0.000 0.000 0.021 0.002 connected.py:98(number_connected_components)\n 35 0.001 0.000 0.021 0.001 connected.py:22(connected_components)" }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": "%prun -s cumtime ncomponents_files()" } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 0 }