{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "<!--<div class=\"alert alert-block alert-info\">\n", "<span style=\"font-size:xx-large;\"><center>Avanced Python</center></span>\n", "</div>-->\n", "\n", "# Avanced Python" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tips\n", "- On all your script : \n", " - shebang ``` #!/usr/bin/env python3 ```\n", " - encoding ``` # -*- coding: utf-8 -*- ```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- underscore have a lot of different meanings\n", " - separator in number ``` 10_000 ``` (only python 3.6)\n", " - last result in the interpreter ``` _ ```\n", " - I don't care ``` _ = f() ``` (dangerous with internationnaization)\n", " - weakly private ``` _something ``` (won't be imported with ```import *```)\n", " - avoid conflict ``` list_ ```\n", " - more private (mangled) ```__stuff``` -> `_ClassName__mangled_stuff`\n", " - magic methods (also mangled) ``` __init__```\n", " - for internationnalization ```_()```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- I/O\n", " - **Always** decode in input \n", " - **Always** encode in output" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- modules : import involve execution! \n", "Use\n", "```python\n", "if __name__ == '__main__':\n", " some_computation()\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- unpacking dans les boucles \n", " - zip:\n", "```python\n", "for name, surname in zip(names, surnames):\n", " ...\n", "```\n", " - enumerate\n", "```python\n", "for index, prime in enumerate(primes):\n", " ...\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- False tests :\n", "```python\n", "False, 0, None, __nonzero__(), __len__()\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- lambda functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "square1 = lambda x: x ** 2\n", "\n", "def square2(x):\n", " return x ** 2\n", "\n", "print(square1(5))\n", "print(square2(5))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Be carefull with shared references\n", "```python\n", "a is b # a and b are references to the same object\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = 2\n", "b = a\n", "print(a, b)\n", "a = 3\n", "print(a, b)\n", "l1 = [1, 2, 3]\n", "l2 = l1\n", "l3 = l1[:]\n", "print(l1, l2, l3)\n", "l1[1] = 4\n", "print(l1, l2, l3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- **never** use a mutable optionnal arg" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def good(a_list=None):\n", " if a_list is None:\n", " a_list = []\n", " a_list.append(1)\n", " return a_list\n", "print(good())\n", "print(good())\n", "print(good([2]))\n", "print(good())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def wrong(a_list=[]): # never use a mutable optionnal argument\n", " a_list.append(1)\n", " return a_list\n", "print(wrong())\n", "print(wrong())\n", "print(wrong([2]))\n", "print(wrong())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- use a shallow copy to modify a list in a loop (or be very carefull)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from copy import copy\n", "\n", "def good(my_set):\n", " for value in copy(my_set):\n", " if 'bert' in value:\n", " my_set.remove(value)\n", " return(my_set)\n", " \n", "print(\"list ok \", good([\"einstein\", \"albert\", \"bert\", \"curie\"]))\n", "print(\"set ok \", good({\"einstein\", \"albert\", \"bert\", \"curie\"}))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def wrong(my_set):\n", " for value in my_set:\n", " if 'bert' in value:\n", " my_set.remove(value)\n", " return(my_set)\n", "\n", "print(\"list Nok \", wrong([\"einstein\", \"albert\", \"bert\", \"curie\"]))\n", "print(\"set Nok \", wrong({\"einstein\", \"albert\", \"bert\", \"curie\"}))\n", "print(\"END\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- use exceptions\n", " - try is very fast but except is very slow\n", " - nice way to get out of multiples loops/functions at the same time\n", " - Allows you to be sure that an error had been taken care of" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " print(\"set Nok \", wrong({\"einstein\", \"albert\", \"bert\", \"curie\"}))\n", "\n", "except RuntimeError as e:\n", " print()\n", " print(\"Oups, something went wrong:\")\n", " print(e)\n", " print(\"Continuing anyway\")\n", " print()\n", " \n", "print(\"I'm continuing\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "class MyException(Exception):\n", " pass\n", "\n", "try:\n", " for i in range(10):\n", " for j in range(10):\n", " if i == j == 5:\n", " raise MyException(\"Found it\")\n", "except MyException as e:\n", " print(\"Out of the loop\")\n", " print(e)\n", " print(\"Stop\")\n", " \n", " # In a script, use a non-zero return code\n", " # exit(1) \n", " \n", " # In jupyter you can do\n", " raise\n", " \n", "print(\"I will never appear\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Use decorator\n", " - debugging, timing, ..." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from functools import wraps\n", "from time import time\n", "\n", "def PrintAppel(f):\n", " def before_f():\n", " new_f.NbAppels += 1\n", " print(\"Entering {}\".format(f.__name__))\n", " new_f.tin = time()\n", " \n", " def after_f():\n", " new_f.tout = time()\n", " new_f.tcum += new_f.tout - new_f.tin\n", " print(\"Exiting {}\".format(f.__name__))\n", " print(\"This was the call n° {}\".format(new_f.NbAppels))\n", " print(\"It took {} s\".format(new_f.tout - new_f.tin))\n", " print(\"in average {} s\".format(new_f.tcum / new_f.NbAppels))\n", " \n", " @wraps(f)\n", " def new_f(*args, **xargs):\n", " before_f()\n", " res = f(*args, **xargs)\n", " after_f()\n", " return res\n", " \n", " new_f.NbAppels = 0\n", " new_f.tcum = 0.\n", "\n", " return new_f\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from time import sleep\n", "import numpy as np\n", "@PrintAppel\n", "def a_function(x):\n", " np.random.rand()\n", " sleep(np.random.rand())\n", " return 2 * x" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "res = a_function(2)\n", "print(res)\n", "\n", "print(a_function.tcum / a_function.NbAppels)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- Make use of classes in order to isolate your work (and your bugs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class egg(object): # All objects derived from the same object \"object\"\n", " \"\"\" Full exemple of a class in python \"\"\"\n", " total_number = 0 # shared attribut between all instances **DANGER** ! \n", " \n", " def __init__(self, number=1): # constructor\n", " \"\"\" constructor from number \"\"\"\n", " self.number = number # Good way of defining attributes\n", " egg.total_number += number\n", " \n", " @classmethod\n", " def from_recipe(cls, recipe): # Alternative constructor\n", " \"\"\" constructor from recipe \"\"\"\n", " return cls(recipe[\"oeufs\"])\n", " \n", " def __del__(self): # destructor (rare)\n", " \"\"\" destructor \"\"\"\n", " egg.total_number -= self.number\n", "\n", " def __str__(self): # convert your object into printable string\n", " \"\"\" egg to str convertor \"\"\"\n", " return \"On a total of {} eggs, I own {}\".format(egg.total_number, self.number)\n", " \n", " def how_many(self): # a function of the instance\n", " \"\"\" Return the current number of eggs in the recipe \"\"\"\n", " return self.number\n", "\n", " @staticmethod\n", " def how_many_egg(): # a function on the class (rare)\n", " \"\"\" Return the total number of eggs for all recipes \"\"\"\n", " return egg.total_number" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fried_egg = egg()\n", "omelette = egg(3)\n", "recipe_pancake = {\"oeufs\":2, \"lait\":0.5, \"farine\":300}\n", "pancake = egg.from_recipe(recipe_pancake)\n", "print(\"Fried egg : \", fried_egg)\n", "print(\"Omelette : \", omelette)\n", "print(\"Pancake : \", pancake)\n", "print()\n", "print(\"{:<12} : {:>5} | {}\".format(\"egg\",\n", " \"NaN\",\n", " egg.how_many_egg()))\n", "print(\"{:<12} : {:>5} | {}\".format(\"fried_egg\",\n", " fried_egg.how_many(),\n", " fried_egg.how_many_egg()))\n", "print(\"{:<12} : {:>5} | {}\".format(\"omelette\",\n", " omelette.how_many(), omelette.how_many_egg()))\n", "print(\"{:<12} : {:>5} | {}\".format(\"pancake\",\n", " pancake.how_many(),\n", " pancake.how_many_egg()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "del omelette\n", "print(\"{:<12} : {:>5} | {}\".format(\"egg\",\n", " \"NaN\",\n", " egg.how_many_egg()))\n", "print(\"{:<12} : {:>5} | {}\".format(\"fried_egg\",\n", " fried_egg.how_many(),\n", " fried_egg.how_many_egg()))\n", "print(\"{:<12} : {:>5} | {}\".format(\"pancake\",\n", " pancake.how_many(),\n", " pancake.how_many_egg()))\n", "del fried_egg\n", "del pancake\n", "\n", "print()\n", "help(egg)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- For launching external program :\n", " - If you don't care about the output of the program\n", " ```python\n", " subprocess.check_call([\"cmd\", \"arg1\", \"arg2\"])\n", " # or in jupyter\n", " !cmd arg1 arg2\n", " ``` \n", " - otherwise (remember to decode)\n", " ```python\n", " data = subprocess.check_output([\"cmd\", \"arg1\", \"arg2\"]).decode('utf-8')\n", " ``` \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!python3 script.py Marc\n", "\n", "import subprocess\n", "import sys\n", "data = subprocess.check_output([sys.executable, \"script.py\", \"Marc\"]).decode('utf-8')\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Packaging\n", "---------\n", "- respect PEP (not only for prettyness)\n", "- docstring (auto-documentation)\n", " - All fonctions\n", " - All classes\n", " - All modules (```__init__.py```)\n", " - All files\n", "- type hinting (that's new)\n", " - Almost totally ignored during execution\n", " - mypy (and more and more IDE) are capable of checking consistency\n", " - The typing module allows you to define complex types\n", " - More and more package are complient with this" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def greeting(name: str) -> str:\n", " var = \"Hello\" # type: str\n", " # python 3.7 : var = \"Hello\" : str\n", " \n", " return var + \" \" + name" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- pytest (unit-testing)\n", " - auto discovery (use tests folders, test_truc function, and TestMachin classes)\n", " - allow parametrization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#ONLY for ipython\n", "import ipytest.magics\n", "import pytest\n", "__file__ = '04.advanced.ipynb'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%run_pytest[clean] -qq\n", "#this was only for ipython\n", "\n", "def test_sorted():\n", " assert sorted([5, 1, 4, 2, 3]) == [1, 2, 3, 4, 5]\n", " \n", "# as does parametrize\n", "@pytest.mark.parametrize('input, expected', [\n", " ([2, 1], [1, 2]),\n", " ('zasdqw', list('adqswz')),\n", " ]\n", " )\n", "def test_exemples(input, expected):\n", " actual = sorted(input)\n", " assert actual == expected" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- gettext (auto-internationnalization) ?\n", "- argparse\n", "- configParser\n", "- logging\n", " - print -> go to console (for ordinary usage)\n", " - warning.warn -> go to console (usually once : for signaling a something the user should fix)\n", " - logging.level -> go anywhere you want (for detailled output and/or diagnostic)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import logging\n", "import warnings\n", "\n", "\n", "def prepare_logging():\n", " \"\"\"\n", " Prepare all logging facilities\n", " \n", " This should be done in a separate module\n", " \"\"\"\n", "\n", " # if not already done, initialize logging facilities\n", " logging.basicConfig()\n", "\n", " # create a logger for the current module\n", " logger = logging.getLogger(__name__)\n", "\n", " ## ONLY FOR IPYTHON\n", " # clean logger (ipython + multiple call)\n", " from copy import copy\n", " for handler in copy(logger.handlers):\n", " logger.removeHandler(handler)\n", " # Do not propagate message to ipython (or else thy will be printed twice)\n", " logger.propagate=False\n", " ## ONLY FOR IPYTHON\n", "\n", "\n", " # optionnal : change format of the log\n", " logFormatter = logging.Formatter(\"%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s\")\n", "\n", " # optionnal : create a handler for file output\n", " fileHandler = logging.FileHandler(\"{logPath}/{fileName}.log\".format(logPath=\".\", fileName=\"test\"))\n", " # optionnal : create a handler for console output\n", " consoleHandler = logging.StreamHandler()\n", "\n", " # optionnal : Apply formatter to both handles\n", " fileHandler.setFormatter(logFormatter)\n", " consoleHandler.setFormatter(logFormatter)\n", "\n", " # optionnal : attach handler to the logger\n", " logger.addHandler(fileHandler)\n", " logger.addHandler(consoleHandler)\n", "\n", " # what severity to log (default is NOTSET, i.e. all)\n", " logger.setLevel(logging.DEBUG) # ALL\n", " fileHandler.setLevel(logging.INFO) # NO DEBUG\n", " consoleHandler.setLevel(logging.WARNING) # ONLY WARNING AND ERRORS\n", "\n", " return logger" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def egg():\n", " warnings.warn(\"A warning only once\")\n", "\n", "logger = prepare_logging()\n", "\n", "egg()\n", "\n", "logger.info('Start reading database')\n", "\n", "records = {'john': 55, 'tom': 66}\n", "\n", "logger.debug('Records: {}'.format(records))\n", "logger.info('Updating records ...')\n", "logger.warning(\"There is only 2 record !\")\n", "logger.info('Saving records ...')\n", "logger.error(\"Something happend, impossible to save the records\")\n", "logger.info('Restoring records ...')\n", "logger.critical(\"Database corrupted !\")\n", "logger.info('End of program')\n", "\n", "egg()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "# Performance\n", "\n", "- profiling : Only optimize the bottlenecks !\n", " - timeit (for small snippets of code)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%timeit [1 + i for i in range(1,10000)]\n", "%timeit [1 * i for i in range(1,10000)]\n", "%timeit [1 / i for i in range(1,10000)]\n", "%timeit [1 // i for i in range(1,10000)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%timeit [1. + float(i) for i in range(1,10000)]\n", "%timeit [1. * float(i) for i in range(1,10000)]\n", "%timeit [1. / float(i) for i in range(1,10000)]\n", "%timeit [1. // float(i) for i in range(1,10000)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " - cProfile (for real code) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import cProfile\n", "import re\n", "\n", "def function2(array):\n", " for i in range(500):\n", " array += 3\n", " array = array * 2\n", " return array\n", "\n", "def function1():\n", " array = np.random.randint(500000, size=5000000)\n", " array = function2(array)\n", " return sorted(array)\n", "\n", "cProfile.run('function1()', sort=\"tottime\")\n", "\n", "# or in jupyter\n", "\n", "%prun function1()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Or, for a beautifull call graph of a complex program: \n", " ```bash\n", " python3 -m cProfile -o profile.pstats script.py\n", " gprof2dot -f pstats profile.pstats | dot -Tpng -o profile.png\n", " ```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import Image\n", "Image(filename='images/profile.png') " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## In sequential and in Python\n", "- small is beautifull (PEP 20)\n", "- IO cost a lot (avoid reading and writing into files)\n", "- choose the right data structure / algorithm\n", "- prefere numpy based array\n", "- avoid loops (vectorization using slice)\n", "- avoid copy of array\n", "- changing size of an array\n", "> Stop optimizing your Python code here (and compile it)\n", "- inline manually\n", "- local is faster than global (and avoid dots)\n", "- use the out argument in numpy" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def init_copy(f, g):\n", " \"\"\"\n", " Just to be sure that the arguments are constant\n", " \"\"\"\n", " return f.copy(), g.copy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def finish(res):\n", " \"\"\"\n", " A dummy function that does nothing\n", " \"\"\"\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import textwrap\n", "\n", "def checker(ref, res):\n", " \"\"\"\n", " A function that, given two results, check that they are identical\n", " \"\"\"\n", " if (type(ref) is not type(res) or\n", " ref.dtype != res.dtype or\n", " not np.array_equal(res, ref)):\n", "\n", " print(\"Failed\")\n", "\n", " print(\"types: \",type(ref), type(res))\n", " print(\"dtypes: \",ref.dtype, res.dtype)\n", "\n", " differ = np.where(np.logical_not(np.isclose(ref,res)))\n", "\n", " print(textwrap.dedent(\"\"\"results:\n", " ref shape: {}\n", " res shape: {}\n", "\n", " idx\n", " {}\n", "\n", " ref\n", " {}\n", "\n", " res\n", " {}\"\"\".format(ref.shape,\n", " res.shape,\n", " differ,\n", " ref[differ],\n", " res[differ])\n", " ))\n", " return False\n", " return True" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def instrument(check=None, setup=init_copy, finish=finish, timing=True):\n", " \"\"\"\n", " A decorator that will time a function, and if given,\n", " check it's result with a reference function\n", " \n", " setup and finish are part of the function not timed\n", " \"\"\"\n", " def _check(function):\n", " def wrapped(*arg, check=check, timing=timing):\n", " \n", " # Our result\n", " a, b = setup(*arg)\n", " res = function(a, b)\n", " res = finish(res)\n", " \n", " if check is not None:\n", " print(\"Testing \", function.__name__, \" ... \",end=\"\")\n", " \n", " # The reference (might not be decorated)\n", " try:\n", " ref = check(*arg, check=None, timing=False)\n", " except TypeError:\n", " ref = check(*arg)\n", "\n", " if not checker(ref, res):\n", " raise RuntimeError\n", " else:\n", " print(\"OK\")\n", " \n", " if timing:\n", " print(\"Timing \", function.__name__, \" ...\")\n", " %timeit function(a, b)\n", " return res\n", " return wrapped\n", " return _check" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create data\n", "a = np.arange(1,1e6)\n", "b = np.arange(1,1e6)\n", "\n", "n = 4\n", "s = 2 * n + 1\n", "g = np.arange(s ** 2, dtype=np.int).reshape((s, s))\n", "\n", "N = 200\n", "small_f = np.arange(N * N, dtype=np.int).reshape((N, N))\n", "N = 2000\n", "large_f = np.arange(N * N, dtype=np.int).reshape((N, N))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Python code (reference)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@instrument()\n", "def py_simple_operations_with_tmparrays_and_loops(a, b):\n", " n = len(a)\n", " \n", " c = np.empty_like(a) \n", " for i in range(n):\n", " c[i] = a[i] * b[i]\n", " \n", " d = np.empty_like(a)\n", " for i in range(n):\n", " d[i] = 4.1 * a[i]\n", " \n", " e = np.empty_like(a)\n", " for i in range(n):\n", " e[i] = c[i] - d[i]\n", " \n", " f = np.empty_like(a)\n", " for i in range(n):\n", " f[i] = 2.5 * b[i]\n", " \n", " g = np.empty_like(a, dtype=np.bool)\n", " for i in range(n):\n", " g[i] = e[i] > f[i]\n", " \n", " return g\n", "\n", "py_simple_operations_with_tmparrays_and_loops(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@instrument(check=py_simple_operations_with_tmparrays_and_loops)\n", "def py_simple_operations_with_loops(a, b):\n", " n = len(a)\n", " \n", " g = np.empty_like(a, dtype=np.bool)\n", " for i in range(n):\n", " c = a[i] * b[i]\n", " d = 4.1 * a[i]\n", " e = c - d\n", " f = 2.5 * b[i]\n", " g[i] = e > f\n", " \n", " return g\n", "py_simple_operations_with_loops(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@instrument(check=py_simple_operations_with_loops)\n", "def py_simple_operations(a, b):\n", " return a * b - 4.1 * a > 2.5 * b\n", "\n", "py_simple_operations(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def py_tough_operations(a, b):\n", " return np.sin(a) + np.arcsinh(a / b)\n", "\n", "i_py_tough_operations = instrument()(py_tough_operations)\n", "i_py_tough_operations(a, b);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can look at the bytecode (halfway to assembly)\n", "And use it to optimize some things" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import dis\n", "print(dis.code_info(py_tough_operations))\n", "print()\n", "print(\"Code :\")\n", "dis.dis(py_tough_operations)\n", "print()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from numpy import sin, arcsinh\n", "\n", "# We can avoid the step 0 and 12\n", "def py_tough_operations_with_localsin(a, b):\n", " return sin(a) + arcsinh(a / b)\n", "\n", "i_py_tough_operations_with_localsin = instrument()(py_tough_operations_with_localsin)\n", "i_py_tough_operations_with_localsin(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "import dis\n", "print(dis.code_info(py_tough_operations_with_localsin))\n", "print()\n", "print(\"Code :\")\n", "dis.dis(py_tough_operations_with_localsin)\n", "print()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from numpy import sum as npsum\n", "\n", "@instrument()\n", "def convolve_python(f, g):\n", " # f is an image and is indexed by (v, w)\n", " # g is a filter kernel and is indexed by (s, t),\n", " # it needs odd dimensions\n", " # h is the output image and is indexed by (x, y),\n", "\n", " if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:\n", " raise ValueError(\"Only odd dimensions on filter supported\")\n", " \n", " # smid and tmid are number of pixels between the center pixel\n", " # and the edge, ie for a 5x5 filter they will be 2.\n", " vmax, wmax = f.shape\n", " smax, tmax = g.shape\n", " \n", " smid = smax // 2\n", " tmid = tmax // 2\n", "\n", " # Allocate result image.\n", " h = np.zeros_like(f)\n", " \n", " # Do convolution\n", " for x in range(smid, vmax - smid):\n", " for y in range(tmid, wmax - tmid):\n", " \n", " v1 = x - smid\n", " v2 = v1 + smax\n", " w1 = y - tmid\n", " w2 = w1 + tmax\n", "\n", " h[x, y] = npsum(g * f[v1:v2, w1:w2])\n", " \n", " return h\n", "\n", "convolve_python(small_f, g);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## When possible use already available function" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from scipy.signal import convolve2d\n", " \n", "def scipy_setup(f, g):\n", " # for some reason, scipy take the filter in reverse...\n", " gr = g[::-1,::-1]\n", " return f.copy(), gr.copy()\n", "\n", "@instrument(check=convolve_python, setup=scipy_setup)\n", "def scipy_convolve(f, g):\n", " \n", " vmax, wmax = f.shape\n", " smax, tmax = g.shape\n", " \n", " smid = smax // 2\n", " tmid = tmax // 2\n", " \n", " h = np.zeros_like(f)\n", " h[smid:vmax - smid, tmid:wmax - tmid] = convolve2d(f, g, mode=\"valid\")\n", "\n", " return h" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "scipy_convolve(small_f, g);\n", "scipy_convolve(large_f, g, check=None);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## numexpr allows compilation of very simple code\n", "And is multithreaded" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numexpr as ne\n", "\n", "@instrument(check=py_simple_operations)\n", "def ne_simple_operations(a, b):\n", " return ne.evaluate('a * b - 4.1 * a > 2.5 * b')\n", "\n", "ne_simple_operations(a,b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@instrument(check=py_tough_operations_with_localsin)\n", "def ne_tough_operations(a, b):\n", " return ne.evaluate(\"sin(a) + arcsinh(a / b)\")\n", "\n", "ne_tough_operations(a,b);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Numba allows compilation of more complex code using only decorators\n", "And can parallelize part of your code \n", "But it doesn't work everytime" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numba as nb\n", "\n", "@instrument(check=ne_simple_operations)\n", "@nb.jit(nopython=True, nogil=True, cache=False, parallel=True)\n", "def nb_simple_operations(a, b):\n", " return a * b - 4.1 * a > 2.5 * b\n", "\n", "nb_simple_operations(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@instrument(check=ne_tough_operations)\n", "@nb.jit(nopython=True, nogil=True, cache=False, parallel=True)\n", "def nb_tough_operations(a, b):\n", " return np.sin(a) + np.arcsinh(a / b)\n", "\n", "nb_tough_operations(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "@instrument(check=scipy_convolve)\n", "@nb.jit(nopython=True, nogil=True, cache=False, parallel=True)\n", "def convolve_numba(f, g):\n", " # smid and tmid are number of pixels between the center pixel\n", " # and the edge, ie for a 5x5 filter they will be 2.\n", " vmax, wmax = f.shape\n", " smax, tmax = g.shape\n", "\n", " if smax % 2 != 1 or tmax % 2 != 1:\n", " raise ValueError(\"Only odd dimensions on filter supported\")\n", " \n", " smid = smax // 2\n", " tmid = tmax // 2\n", "\n", " # Allocate result image.\n", " h = np.zeros_like(f)\n", " \n", " # Do convolution\n", " for x in range(smid, vmax - smid):\n", " for y in range(tmid, wmax - tmid):\n", " # Calculate pixel value for h at (x,y). Sum one component\n", " # for each pixel (s, t) of the filter g.\n", " \n", " value = 0\n", " for s in range(smax):\n", " for t in range(tmax):\n", " v = x - smid + s\n", " w = y - tmid + t\n", " value += g[s, t] * f[v, w]\n", " h[x, y] = value\n", " return h\n", "\n", "convolve_numba(small_f, g);\n", "convolve_numba(large_f, g);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Stencil contains implicit loops\n", "@nb.stencil(standard_indexing=(\"g\",),neighborhood=((-4, 4),(-4, 4)))\n", "def convolve_kernel(f, g):\n", "\n", " smax, tmax = g.shape\n", "\n", " smid = smax // 2\n", " tmid = tmax // 2\n", " \n", " h = 0\n", " for s in range(smax):\n", " for t in range(tmax):\n", " h += g[s, t] * f[s - smid, t - tmid]\n", " \n", " return h\n", "\n", "@instrument(check=convolve_numba)\n", "@nb.jit(nopython=True, nogil=True, cache=False, parallel=True)\n", "def convolve_numba_with_stencil(f, g):\n", "\n", " if g.shape[0] % 2 != 1 or g.shape[1] % 2 != 1:\n", " raise ValueError(\"Only odd dimensions on filter supported\")\n", "\n", " return convolve_kernel(f, g).astype(f.dtype)\n", "\n", "convolve_numba_with_stencil(small_f, g);\n", "convolve_numba_with_stencil(large_f, g);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cython is the most efficient way to optimize your code\n", "But you have to:\n", "- type *every* variable\n", "- explicit all loops\n", "- parallelize manually\n", "- compile it separately (or use ipython magic)\n", "- dependencies\n", " - Windows : \n", " - Visual studio build tools\n", " - or Mingw\n", " - or Windows Subsystem for Linux\n", " - Linux :\n", " - gcc\n", " - or icc" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext Cython\n", "%set_env CFLAGS=\"-Ofast -march=native -fvect-cost-model=cheap\" \\\n", " \"-fopenmp -Wno-unused-variable -Wno-cpp -Wno-maybe-uninitialized\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%cython\n", "# cython: language_level=3\n", "# cython: initializedcheck=False\n", "# cython: binding=True\n", "# cython: nonecheck=False\n", "# distutils: extra_link_args = -fopenmp\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "cimport cython\n", "from cython.parallel cimport parallel, prange\n", "\n", "@cython.boundscheck(False) # turn off bounds-checking for entire function\n", "@cython.wraparound(False) # turn off negative index wrapping for entire function\n", "def csimple_operations(const double[::1]& a, const double[::1]& b):\n", " cdef long n = a.shape[0]\n", "\n", " cdef long[:] res = np.empty([n], dtype=long)\n", " \n", " cdef Py_ssize_t i\n", " for i in prange(n, nogil=True, num_threads=8):\n", " res[i] = a[i] * b[i] - 4.1 * a[i] > 2.5 * b[i]\n", " return np.asarray(res, dtype=bool)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "i_csimple_operations = instrument(check=nb_simple_operations)(csimple_operations)\n", "i_csimple_operations(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%cython\n", "# cython: language_level=3\n", "# cython: initializedcheck=False\n", "# cython: binding=True\n", "# cython: nonecheck=False\n", "# cython: boundscheck=False\n", "# cython: wraparound=False\n", "# distutils: extra_link_args = -fopenmp\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "from libc.math cimport sin, asinh\n", "from cython.parallel cimport parallel, prange\n", "\n", "def ctough_operations(const double[::1]& a, const double[::1]& b):\n", " cdef long n = a.shape[0]\n", "\n", " cdef double[:] res = np.empty([n], dtype=np.double)\n", " \n", " cdef Py_ssize_t i\n", " for i in prange(n, nogil=True, num_threads=8):\n", " res[i] = sin(a[i]) + asinh(a[i] / b[i])\n", " return np.asarray(res)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "i_ctough_operations = instrument(check=nb_tough_operations)(ctough_operations)\n", "i_ctough_operations(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "%%cython\n", "# cython: language_level=3\n", "# cython: initializedcheck=False\n", "# cython: binding=True\n", "# cython: nonecheck=False\n", "# cython: boundscheck=False\n", "# cython: wraparound=False\n", "# distutils: extra_link_args = -fopenmp\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "from cython.parallel cimport parallel, prange\n", "\n", "def convolve_cython(const long[:,::1]& f, const long[:,::1]& g):\n", " cdef long vmax = f.shape[0]\n", " cdef long wmax = f.shape[1]\n", " cdef long smax = g.shape[0]\n", " cdef long tmax = g.shape[1]\n", " \n", " # f is an image and is indexed by (v, w)\n", " # g is a filter kernel and is indexed by (s, t),\n", " # it needs odd dimensions\n", " # h is the output image and is indexed by (x, y),\n", " if smax % 2 != 1 or tmax % 2 != 1:\n", " raise ValueError(\"Only odd dimensions on filter supported\")\n", "\n", " # smid and tmid are number of pixels between the center pixel\n", " # and the edge, ie for a 5x5 filter they will be 2.\n", " \n", " cdef long smid = smax // 2\n", " cdef long tmid = tmax // 2\n", " \n", " # Allocate result image.\n", " cdef long[:,::1] h = np.zeros([vmax, wmax], dtype=long)\n", "\n", " cdef long value\n", " cdef long x, y, s, t, v, w\n", "\n", " # Do convolution\n", " for x in prange(smid, vmax - smid, nogil=True, num_threads=8):\n", " for y in range(tmid, wmax - tmid):\n", " # Calculate pixel value for h at (x,y). Sum one component\n", " # for each pixel (s, t) of the filter g.\n", "\n", " value = 0\n", " for s in range(smax):\n", " for t in range(tmax):\n", " v = x - smid + s\n", " w = y - tmid + t\n", " value = value + g[s, t] * f[v, w]\n", " h[x, y] = value\n", " \n", " return np.asarray(h)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "i_convolve_cython = instrument(check=convolve_numba_with_stencil)(convolve_cython)\n", "i_convolve_cython(small_f, g);\n", "i_convolve_cython(large_f, g);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile CModule.h\n", "#include <stddef.h>\n", "\n", "void convolve_c (const long f[],\n", " const long g[],\n", " long h[],\n", " const size_t vmax,\n", " const size_t wmax,\n", " const size_t smax,\n", " const size_t tmax); " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile CModule.c\n", "#include \"CModule.h\"\n", "\n", "void convolve_c (const long f[],\n", " const long g[],\n", " long h[],\n", " const size_t vmax,\n", " const size_t wmax,\n", " const size_t smax,\n", " const size_t tmax) \n", "{\n", "\n", " const size_t smid = smax / 2;\n", " const size_t tmid = tmax / 2;\n", "\n", " for(size_t s = 0; s < vmax * wmax; ++s) {\n", " h[s] = 0;\n", " }\n", "\n", " // Do convolution\n", " #pragma omp parallel for default(shared) num_threads(8)\n", " for(size_t x = smid; x < vmax - smid; ++x) {\n", " for(size_t y = tmid; y < wmax - tmid; ++y) {\n", " // Calculate pixel value for h at (x,y).\n", " // Sum one component for each pixel (s, t) of the filter g.\n", "\n", " long value = 0;\n", " for(size_t s = 0; s < smax; ++s) {\n", " for(size_t t = 0; t < tmax; ++t) {\n", " size_t v = x - smid + s;\n", " size_t w = y - tmid + t;\n", " value = value + g[s*tmax + t] * f[v*wmax + w];\n", " }\n", " }\n", " h[x*wmax + y] = value;\n", " }\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "%%cython\n", "# cython: language_level=3\n", "# cython: initializedcheck=False\n", "# cython: binding=True\n", "# cython: nonecheck=False\n", "# cython: boundscheck=False\n", "# cython: wraparound=False\n", "# distutils: extra_link_args = -fopenmp\n", "# distutils: sources = CModule.c\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "from cython.parallel cimport parallel, prange\n", "\n", "cdef extern from \"CModule.h\":\n", " long* convolve_c (const long f[],\n", " const long g[],\n", " long h[],\n", " const size_t vmax,\n", " const size_t wmax,\n", " const size_t smax,\n", " const size_t tmax) nogil\n", "\n", "def convolve_cython_pure(const long[:,::1]& f, const long[:,::1]& g):\n", " # f is an image and is indexed by (v, w)\n", " # g is a filter kernel and is indexed by (s, t),\n", " # it needs odd dimensions\n", " # h is the output image and is indexed by (x, y),\n", " \n", " cdef long vmax = f.shape[0]\n", " cdef long wmax = f.shape[1]\n", " cdef long smax = g.shape[0]\n", " cdef long tmax = g.shape[1]\n", " \n", " if smax % 2 != 1 or tmax % 2 != 1:\n", " raise ValueError(\"Only odd dimensions on filter supported\")\n", " \n", " cdef long[:,::1] h = np.empty([vmax, wmax], dtype=long)\n", " \n", " # Do convolution\n", " with nogil:\n", " convolve_c(&f[0,0],\n", " &g[0,0],\n", " &h[0,0], vmax, wmax, smax, tmax)\n", " \n", " \n", " return np.asarray(h)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "i_convolve_cython_pure = instrument(check=i_convolve_cython)(convolve_cython_pure)\n", "i_convolve_cython_pure(small_f, g);\n", "i_convolve_cython_pure(large_f, g);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!gcc CModule.c main.c -o full_c -Ofast -march=native -fopenmp -fvect-cost-model=cheap\n", "!./full_c" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fortran through f2py is also very efficient\n", "But you have to\n", "- rewrite your code\n", "- be carefull with memory organization\n", "- compile it separately" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext fortranmagic\n", "%set_env F90FLAGS=\"-Ofast -march=native -fvect-cost-model=cheap -fopenmp\"\n", "%fortran_config --extra=\"-lgomp\" --extra=\"--noarch\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%fortran\n", "subroutine fsimple_operations(a, b, c, n)\n", " implicit none\n", " integer(kind=8), intent(in) :: n\n", " double precision,intent(in) :: a(n)\n", " double precision,intent(in) :: b(n)\n", "\n", " logical,intent(out) :: c(n)\n", "\n", " !$OMP PARALLEL WORKSHARE NUM_THREADS(8)\n", " c = a * b - 4.1 * a > 2.5 * b\n", " !$OMP END PARALLEL WORKSHARE\n", "\n", "end subroutine fsimple_operations" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@instrument(check=i_csimple_operations)\n", "def i_fsimple_operations(a ,b):\n", " return fsimple_operations(a, b).astype(bool)\n", "\n", "i_fsimple_operations(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%fortran\n", "subroutine ftough_operations(a, b, c, n)\n", " implicit none\n", " integer(kind=8), intent(in) :: n\n", " double precision,intent(in) :: a(n)\n", " double precision,intent(in) :: b(n)\n", " \n", " double precision,intent(out) :: c(n)\n", "\n", " !$OMP PARALLEL WORKSHARE NUM_THREADS(8)\n", " c = sin(a) + asinh(a / b)\n", " !$OMP END PARALLEL WORKSHARE\n", "\n", "end subroutine ftough_operations" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@instrument(check=i_ctough_operations)\n", "def i_ftough_operations(a ,b):\n", " return ftough_operations(a, b)\n", "\n", "i_ftough_operations(a, b);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%fortran\n", "subroutine convolve_fortran(f, g, vmax, wmax, smax, tmax, h, err)\n", " implicit none\n", " integer(kind=8),intent(in) :: vmax,wmax,smax,tmax\n", " integer(kind=8),intent(in) :: f(vmax, wmax), g(smax, tmax)\n", " \n", " integer(kind=8),intent(out) :: h(vmax, wmax)\n", " integer(kind=8),intent(out) :: err\n", " \n", " integer(kind=8) :: smid,tmid\n", " integer(kind=8) :: x, y\n", " integer(kind=8) :: v1,v2,w1,w2\n", " \n", " ! f is an image and is indexed by (v, w)\n", " ! g is a filter kernel and is indexed by (s, t),\n", " ! it needs odd dimensions\n", " ! h is the output image and is indexed by (v, w),\n", "\n", " err = 0\n", " if (modulo(smax, 2) /= 1 .or. modulo(tmax, 2) /= 1) then\n", " err = 1\n", " return\n", " endif\n", " \n", " ! smid and tmid are number of pixels between the center pixel\n", " ! and the edge, ie for a 5x5 filter they will be 2. \n", " smid = smax / 2\n", " tmid = tmax / 2\n", " \n", " h = 0\n", " ! Do convolution\n", " ! warning : memory layout is different in fortran\n", " ! warning : array start at 1 in fortran\n", "\n", " !$OMP PARALLEL DO DEFAULT(SHARED) COLLAPSE(1) &\n", " !$OMP PRIVATE(v1,v2,w1,w2) NUM_THREADS(8)\n", " do y = tmid + 1,wmax - tmid\n", " do x = smid + 1,vmax - smid\n", " ! Calculate pixel value for h at (x,y). Sum one component\n", " ! for each pixel (s, t) of the filter g.\n", "\n", " v1 = x - smid\n", " v2 = v1 + smax\n", " w1 = y - tmid\n", " w2 = w1 + tmax\n", " h(x, y) = sum(g(1:smax,1:tmax) * f(v1:v2,w1:w2))\n", " enddo\n", " enddo\n", " !$OMP END PARALLEL DO\n", " return\n", "end subroutine convolve_fortran" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def fortran_setup(f, g):\n", " # memory ordering for fortran\n", " ft = np.asfortranarray(f.copy())\n", " gt = np.asfortranarray(g.copy())\n", " return ft, gt\n", "\n", "@instrument(check=i_convolve_cython_pure, setup=fortran_setup)\n", "def i_convolve_fortran(f, g):\n", " h, err = convolve_fortran(f, g)\n", " if err:\n", " print(err)\n", " raise ValueError(\"FORTRAN ERROR ! (Probably : Only odd dimensions on filter supported)\")\n", " return h\n", "\n", "i_convolve_fortran(small_f, g);\n", "i_convolve_fortran(large_f, g);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile fortranModule.f90\n", "module fortranmodule\n", " implicit none\n", " contains\n", "\n", " subroutine convolve_fortran_pure(f, g, vmax, wmax, smax, tmax, h, err)\n", " implicit none\n", " integer(kind=8),intent(in) :: vmax,wmax,smax,tmax\n", " integer(kind=8),intent(in) :: f(vmax, wmax), g(smax, tmax)\n", "\n", " integer(kind=8),intent(out) :: h(vmax, wmax)\n", " integer(kind=8),intent(out) :: err\n", "\n", " integer(kind=8) :: smid,tmid\n", " integer(kind=8) :: x, y\n", " integer(kind=8) :: v1,v2,w1,w2\n", "\n", " ! f is an image and is indexed by (v, w)\n", " ! g is a filter kernel and is indexed by (s, t),\n", " ! it needs odd dimensions\n", " ! h is the output image and is indexed by (v, w),\n", "\n", " err = 0\n", " if (modulo(smax, 2) /= 1 .or. modulo(tmax, 2) /= 1) then\n", " err = 1\n", " return\n", " endif\n", "\n", " ! smid and tmid are number of pixels between the center pixel\n", " ! and the edge, ie for a 5x5 filter they will be 2. \n", " smid = smax / 2\n", " tmid = tmax / 2\n", "\n", " h = 0\n", " ! Do convolution\n", " ! warning : memory layout is different in fortran\n", " ! warning : array start at 1 in fortran\n", "\n", " !$OMP PARALLEL DO DEFAULT(SHARED) COLLAPSE(1) &\n", " !$OMP PRIVATE(v1,v2,w1,w2) NUM_THREADS(8)\n", " do y = tmid + 1,wmax - tmid\n", " do x = smid + 1,vmax - smid\n", " ! Calculate pixel value for h at (x,y). Sum one component\n", " ! for each pixel (s, t) of the filter g.\n", "\n", " v1 = x - smid\n", " v2 = v1 + smax\n", " w1 = y - tmid\n", " w2 = w1 + tmax\n", " h(x, y) = sum(g(1:smax,1:tmax) * f(v1:v2,w1:w2))\n", " enddo\n", " enddo\n", " !$OMP END PARALLEL DO\n", " return\n", " end subroutine convolve_fortran_pure\n", "end module fortranmodule" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!gfortran fortranModule.f90 main.f90 -o full_f -Ofast -march=native -fopenmp -fvect-cost-model=cheap\n", "!./full_f" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- parallelism\n", " - cuda" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "CUDA DOESN'T WORK ON THE VIRTUAL MACHINE\n", "YOU ARE WELCOME TO TRY THIS ON YOU OWN COMPUTER\n", "\"\"\"\n", "from string import Template\n", "\n", "cuda_src_template = Template(\"\"\"\n", "// Cuda splitting\n", "#define MTB ${max_threads_per_block}\n", "#define MBP ${max_blocks_per_grid}\n", "\n", "// Array size\n", "#define fx ${fx}\n", "#define fy ${fy}\n", "#define gx ${gx}\n", "#define gy ${gy}\n", "\n", "// Macro for converting subscripts to linear index:\n", "#define f_INDEX(i, j) (i) * (fy) + (j)\n", "\n", "// Macro for converting subscripts to linear index:\n", "#define g_INDEX(i, j) (i) * (gy) + (j)\n", "\n", "__global__ void convolve_cuda(long *f, long *g, long *h) {\n", "\n", " unsigned int idx = blockIdx.y * MTB * MBP + blockIdx.x * MTB + threadIdx.x;\n", "\n", " // Convert the linear index to subscripts:\n", " unsigned int i = idx / fy;\n", " unsigned int j = idx % fy;\n", "\n", " long smax = gx;\n", " long tmax = gy;\n", "\n", " long smid = smax / 2;\n", " long tmid = tmax / 2;\n", "\n", " if (smid <= i && i < fx - smid) {\n", " if (tmid <= j && j < fy - tmid) {\n", "\n", " h[f_INDEX(i, j)] = 0.;\n", " \n", " for (long s = 0; s < smax; s++)\n", " for (long t = 0; t < tmax; t++)\n", " h[f_INDEX(i, j)] += g[g_INDEX(s, t)] * f[f_INDEX(i + s - smid, j + t - tmid)];\n", " \n", " }\n", " }\n", "}\n", "\"\"\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "CUDA DOESN'T WORK ON THE VIRTUAL MACHINE\n", "YOU ARE WELCOME TO TRY THIS ON YOU OWN COMPUTER\n", "\"\"\"\n", "import skcuda.misc as misc\n", "import pycuda.autoinit\n", "device = pycuda.autoinit.device\n", "max_threads_per_block, _, max_grid_dim = misc.get_dev_attrs(device)\n", "max_blocks_per_grid = max(max_grid_dim)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "CUDA DOESN'T WORK ON THE VIRTUAL MACHINE\n", "YOU ARE WELCOME TO TRY THIS ON YOU OWN COMPUTER\n", "\"\"\"\n", "from functools import partial\n", "from pycuda.compiler import SourceModule\n", "\n", "cuda_src = cuda_src_template.substitute(max_threads_per_block=max_threads_per_block,\n", " max_blocks_per_grid=max_blocks_per_grid,\n", " fx=large_f.shape[0], fy=large_f.shape[1],\n", " gx=g.shape[0], gy=g.shape[1]\n", " )\n", "cuda_module = SourceModule(cuda_src, options= [\"-O3\", \"-use_fast_math\", \"-default-stream=per-thread\"])\n", "print(\"Compilation OK\")\n", "\n", "__convolve_cuda = cuda_module.get_function('convolve_cuda')\n", "\n", "block_dim, grid_dim = misc.select_block_grid_sizes(device, large_f.shape)\n", "_convolve_cuda = partial(__convolve_cuda,\n", " block=block_dim,\n", " grid=grid_dim)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "CUDA DOESN'T WORK ON THE VIRTUAL MACHINE\n", "YOU ARE WELCOME TO TRY THIS ON YOU OWN COMPUTER\n", "\"\"\"\n", "import pycuda.gpuarray as gpuarray\n", "\n", "def cuda_setup(f, g):\n", " f_gpu = gpuarray.to_gpu(f)\n", " g_gpu = gpuarray.to_gpu(g)\n", " return f_gpu, g_gpu\n", "\n", "def cuda_finish(h_gpu):\n", " return h_gpu.get()\n", "\n", "@instrument(check=i_convolve_cython_pure)\n", "def convolve_cuda(f, g):\n", " f_gpu, g_gpu = cuda_setup(f, g)\n", " h_gpu = gpuarray.zeros_like(f_gpu)\n", " _convolve_cuda(f_gpu, g_gpu, h_gpu)\n", " return cuda_finish(h_gpu)\n", "\n", "convolve_cuda(large_f, g);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "CUDA DOESN'T WORK ON THE VIRTUAL MACHINE\n", "YOU ARE WELCOME TO TRY THIS ON YOU OWN COMPUTER\n", "\"\"\"\n", "@instrument(check=i_convolve_cython_pure, setup=cuda_setup, finish=cuda_finish)\n", "def convolve_cuda2(f_gpu, g_gpu):\n", " h_gpu = gpuarray.zeros_like(f_gpu)\n", " _convolve_cuda(f_gpu, g_gpu, h_gpu)\n", " return h_gpu\n", "\n", "convolve_cuda2(large_f, g);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Conclusion on optimisation\n", "(values may be different as before)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simple operations\n", "\n", "context | time | comment\n", ":-------|-------:|:----------\n", "Python | 1040ms | naive implementation\n", "Python | 578ms | removing tmparrays\n", "Python | 4.67ms | using implicit loops\n", "numexpr | 1.02ms |\n", "numba | **793us** |\n", "cython | 2.36ms |\n", "f2py | 1.28ms |\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Tough operations\n", "\n", "context | time | comment\n", "--------|--------|-----------\n", "Python | 58.8ms | naive implementation\n", "Python | 58.6ms | using local sin\n", "numexpr | **11.7ms** |\n", "numba | 14.2ms |\n", "cython | 15ms |\n", "f2py | 12ms |" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convolution\n", "\n", "context | time<br>small case | time<br>large case | comment\n", "--------|------------|------------|--------\n", "Python | 169ms | | naive implementation\n", "scipy | 6.19ms | 693ms |\n", "numba | 2.34ms | 253ms |\n", "numba | 1.50ms | 103ms | using stencil\n", "cython | 748us | 81.2ms | using numpy datastructure\n", "cython | 731us | 64.8ms | using c datastructure\n", "c | | 57.2ms |\n", "f2py | **707ms** | 70ms |\n", "fortran | | 61.9ms |\n", "cuda | | 42.1ms | including communication\n", "cuda | | **31.9ms** | excluding communication" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", "import numpy as np\n", "def heavy_fonction(i):\n", " t = np.random.rand() / 10\n", " time.sleep(t)\n", " return i, t\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- asyncio\n", " - not a real parallelism\n", " - effective for io-bound tasks (web)\n", " - not very interesting here" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- joblib\n", " - real parallelism\n", " - limited to one computer\n", " - relatively easy to use\n", " - multithreading of multiprocessing" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from joblib import Parallel, delayed\n", "\n", "if __name__ == \"__main__\":\n", "\n", " tic = time.time()\n", " res = Parallel(n_jobs=-1, backend='threading')(delayed(heavy_fonction)(i) \\\n", " for i in range(2000))\n", " tac = time.time()\n", " index, times = np.asarray(res).T\n", " print(tac - tic)\n", " print(times.sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- multithreading\n", " - real parallelism\n", " - limited to one computer\n", " - shared memory" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from threading import Thread, RLock\n", "\n", "N = 2000\n", "N_t = 10\n", "current = 0\n", "nprocs = 8\n", "output_list = np.empty(N)\n", "\n", "lock = RLock()\n", "\n", "class ThreadJob(Thread):\n", " def run(self):\n", " \"\"\"This code will be executed by each thread\"\"\"\n", " global current\n", " \n", " while current < N:\n", " \n", " with lock:\n", " position = current\n", " current += N_t\n", " \n", " fin = min(position + N_t + 1, N)\n", " \n", " for i in range(position, fin):\n", " j, t = heavy_fonction(i)\n", " output_list[j] = t\n", "\n", "if __name__ == \"__main__\":\n", "\n", " # Threads creation\n", " threads = [ThreadJob() for i in range(nprocs)]\n", "\n", " tic = time.time()\n", " # Threads starts\n", " for thread in threads:\n", " thread.start()\n", "\n", " # Waiting that all thread have finish\n", " for thread in threads:\n", " thread.join()\n", " tac = time.time()\n", "\n", "\n", " print(tac - tic)\n", " print(output_list.sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- multiprocessing\n", " - real parallelism\n", " - limited to one computer" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import multiprocessing as mp\n", "from queue import Empty\n", "\n", "def process_job(q,r):\n", " \"\"\"This code will be executed by each process\"\"\"\n", " while True:\n", " try:\n", " i = q.get(block=False)\n", " r.put(heavy_fonction(i))\n", " except Empty:\n", " if q.empty():\n", " if q.qsize() == 0:\n", " break\n", "\n", "if __name__ == \"__main__\":\n", "\n", " # Define an output queue\n", " r = mp.Queue()\n", "\n", " # Define an input queue\n", " q = mp.Queue()\n", "\n", " for i in range(2000):\n", " q.put(i)\n", "\n", " nprocs = 8\n", " # Setup a list of processes that we want to run\n", " processes = [mp.Process(target=process_job, args=(q, r)) for i in range(nprocs)]\n", "\n", " tic = time.time()\n", "\n", " # Run processes\n", " for p in processes:\n", " p.start()\n", "\n", " # Get process results from the output queue\n", " results = np.empty(2000)\n", " for i in range(2000):\n", " j, t = r.get()\n", " results[j] = t\n", "\n", " tac = time.time()\n", "\n", " # Exit the completed processes\n", " for p in processes:\n", " p.join()\n", "\n", " print(tac - tic)\n", " print(results.sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- mpi (mpi4py)\n", " - real parallelism\n", " - unlimited\n", " - relatively complex to use (same as in C, fortran, ...)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Exercise\n", "The following code read an image in pgm format (ascii) and store it in a 2D list. \n", "For each pixel of the image a kernel get all neighbors (9 counting the pixel itself) and apply a computation. \n", "Analyse the performance of the code, identify bottleneck and try to optimize it." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can apply your code on the following images :\n", "- data/test.pgm\n", "- data/test32.pgm\n", "- data/brain_604.ascii.pgm\n", "- data/apollonian_gasket.ascii.pgm\n", "- data/dla.ascii.pgm\n", "\n", "For reference, the timing on my computer are : \n", "For data/test.pgm\n", "\n", "On my computer :\n", "```\n", "Reading Files\n", "6.67 ms ± 262 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", "Computing\n", "503 µs ± 5.41 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", "```\n", "\n", "My solution\n", "```\n", "Reading Files\n", "65.3 µs ± 1.58 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", "Computing\n", "66.2 µs ± 1.02 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", "```\n", "\n", "And, the bigger the image, the bigger the gain !" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<!--REMOVE BUTTON FOR GITHUB\n", "<button data-toggle=\"collapse\" data-target=\"#hints\">Hints</button>\n", "<div id=\"hints\" class=\"collapse\">-->\n", "\n", "### Hints\n", " \n", "Part 1\n", "- Open input file only once\n", "- Avoid appending data\n", "- Use numpy array for data storage\n", "- What is really doing the compute_wtf function ?\n", "\n", "Part 2\n", "- compile the compute part\n", "\n", "Part 3\n", "- parallelize the work on each image\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_description(filename):\n", " \"\"\"\n", " Read the header part of the file\n", " \"\"\"\n", " f = open(filename, 'r')\n", " nline = 0\n", " description = {}\n", " while nline < 3:\n", " line = f.readline()\n", " if line[0] == '#':\n", " continue\n", " nline += 1\n", " if nline == 1:\n", " description['format'] = line.strip()\n", " elif nline == 2:\n", " description['dimension'] = int(line.split()[1]), int(line.split()[0])\n", " elif nline ==3:\n", " description['deep'] = int(line.strip())\n", " f.close()\n", " return description\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_value(filename, coord):\n", " \"\"\"\n", " Get value at coord in an image in the PGM format\n", " \n", " The main problem here is that the file have a limited width, and the values are wrapped\n", " Thus, the value at coord 12,32 might be in the 24,6 in the file\n", " \"\"\"\n", " description = get_description(filename)\n", " xdim, ydim = description['dimension']\n", " i = coord[0]\n", " j = coord[1]\n", " f = open(filename, 'r', encoding='utf-8')\n", " nline = 0\n", " while nline < 3:\n", " line = f.readline()\n", " if line[0] == '#':\n", " continue\n", " nline += 1\n", " #here we are at coordinate (0,0)\n", " icur, jcur = 0,0\n", " test = True\n", " while(test):\n", " values = f.readline().split()\n", " nvalues = len(values)\n", " if (icur == i):\n", " if (jcur + nvalues > j):\n", " jvalues = j - jcur\n", " value = values[jvalues]\n", " test=False\n", " else:\n", " jcur += nvalues\n", " else:\n", " jcur += nvalues\n", " if (jcur >= ydim):\n", " icur += jcur // ydim\n", " jcur = jcur % ydim\n", " f.close()\n", " return int(value)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def read_values(filename, description):\n", " \"\"\"\n", " Read all the values\n", " \"\"\"\n", " values = []\n", " for i in range(description['dimension'][0]):\n", " values.append([])\n", " for j in range(description['dimension'][1]):\n", " values[i].append(get_value(filename, (i, j)))\n", "\n", " return values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def read_file(filename):\n", " \"\"\"\n", " Read an image in the PGM format\n", " \"\"\"\n", " # read the header part\n", " description = get_description(filename)\n", "\n", " # read the values\n", " values = read_values(filename, description)\n", " return values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def init(files):\n", " \"\"\"\n", " Read all files\n", " \"\"\"\n", " data = []\n", " for file in files:\n", " data.append(read_file(file))\n", " \n", " return data " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_neighbors(tab, i, j):\n", " \"\"\"\n", " Extract from the array the neighbors of a pixel\n", " \"\"\"\n", " neigh = []\n", " for jrange in [-1, 0, 1]:\n", " for irange in [-1, 0, 1]:\n", " neigh.append(tab[i + irange][j + jrange])\n", " return neigh" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import math\n", "\n", "def compute_wtf(neigh):\n", " \"\"\"\n", " Apply a reduction operation on the array neigh\n", " \"\"\"\n", " value = 1.\n", " for i in range(len(neigh)):\n", " value *= math.exp(neigh[i]) ** (1 / len(neigh))\n", " value = math.log(value)\n", " \n", " return float(value)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def kernel(tab):\n", " \"\"\"\n", " Apply compute_wtf on each pixel except boundary\n", " \"\"\"\n", " xdim = len(tab)\n", " ydim = len(tab[0])\n", " \n", " # create the result list\n", " result = []\n", " \n", " #1st line contains only 0\n", " result.append([0])\n", " for jrange in range(1, ydim):\n", " result[0].append(0)\n", " \n", " for irange in range(1, xdim - 1):\n", " #1st column contains only 0\n", " result.append([0])\n", " \n", " # For each pixel inside the image\n", " for jrange in range(1, ydim - 1):\n", " # Extract the neighboring pixels\n", " neigh = get_neighbors(tab, irange, jrange)\n", " \n", " # Apply compute_wtf on it\n", " res = compute_wtf(neigh)\n", " \n", " # Store the result\n", " result[irange].append(res)\n", " \n", " #last colum contains only 0\n", " result[irange].append(0)\n", " \n", " #last line contains only 0\n", " result.append([])\n", " for jrange in range(ydim):\n", " result[xdim - 1].append(0)\n", " \n", " return result" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def job(data):\n", " \"\"\"\n", " Apply kernel of each image\n", " \"\"\"\n", " results = []\n", " for image in data:\n", " results.append(kernel(image))\n", " return results" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib notebook \n", "import matplotlib.pyplot as plt\n", "\n", "def plot(data):\n", " nimages = len(data)\n", " \n", " if nimages > 1:\n", " fig, axes = plt.subplots(nimages, 1)\n", " for image, ax in zip(data, axes):\n", " ax.imshow(image)\n", " else:\n", " plt.figure()\n", " plt.imshow(data[0])\n", " \n", " plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "files = [\"data/test.pgm\"] #,\n", " #\"data/test32.pgm\",\n", " #\"data/brain_604.ascii.pgm\",\n", " #\"data/apollonian_gasket.ascii.pgm\",\n", " #\"data/dla.ascii.pgm\",\n", " #]\n", " \n", "if __name__ == \"__main__\":\n", " print(\"Reading Files\")\n", " data = init(files)\n", " %timeit init(files)\n", "\n", " print(\"Computing\")\n", " result = job(data)\n", " %timeit job(data)\n", "\n", " plot(data)\n", " plot(result)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<!--REMOVE BUTTON FOR GITHUB\n", "<button data-toggle=\"collapse\" data-target=\"#optimfull\">Solution</button>\n", "<div id=\"optimfull\" class=\"collapse\">-->\n", "\n", "### Solution\n", "\n", "#### Cell 1:\n", "```\n", "%load_ext Cython\n", "%set_env CFLAGS=\"-Ofast -march=native -fvect-cost-model=cheap -fopenmp -Wno-unused-variable -Wno-cpp -Wno-maybe-uninitialized\"\n", "```\n", "\n", "#### Cell 2:\n", "```cython\n", "%%cython --link-args=-fopenmp\n", "# cython: language_level=3\n", "# cython: initializedcheck=False\n", "# cython: binding=True\n", "# cython: nonecheck=False\n", "# cython: boundscheck=False\n", "# cython: wraparound=False\n", "# distutils: extra_link_args = -fopenmp\n", "\n", "import numpy as np\n", "cimport numpy as np\n", "cimport cython\n", "from cython.parallel cimport parallel, prange\n", "\n", "def ckernel(const double[:,::1] &data, const long nt):\n", " cdef long n = data.shape[0]\n", " cdef long m = data.shape[1]\n", " \n", " cdef double[:,::1] res = np.zeros([n, m], dtype=np.double)\n", "\n", " cdef double value\n", " cdef long i, j, s, t\n", "\n", " with nogil, parallel(num_threads=nt):\n", " for i in prange(1, n - 1):\n", " for j in range(1, m - 1):\n", " value = 0\n", " for s in range(-1, 2):\n", " for t in range(-1, 2):\n", " value += data[i + s, j + t]\n", " res[i, j] += value / 9\n", " return np.asarray(res)\n", "```\n", "\n", "#### Cell 3 :\n", "```python\n", "%matplotlib notebook \n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from threading import Thread, RLock\n", "from copy import deepcopy\n", "import os\n", "\n", "nprocs = os.cpu_count()\n", "\n", "result = []\n", "data = []\n", "\n", "current = 0\n", "\n", "verrou = RLock()\n", "\n", "class ThreadJob(Thread):\n", " def run(self):\n", " global current, verrou\n", " \"\"\"Code à exécuter pendant l'exécution du thread.\"\"\"\n", " while current < len(data):\n", " \n", " with verrou:\n", " position = current\n", " current += 1\n", " \n", " kernel(position)\n", "\n", "def get_description(file):\n", " \"\"\"\n", " Read the header part of the file\n", " \n", " if file is an opened file:\n", " go back the the begining of the file\n", " read the header\n", " leave the file at the end of header (start of values)\n", " else:\n", " call itself with the file openened\n", " (this should be never called)\n", " \"\"\"\n", " if isinstance(file, str):\n", " with open(file,'r', encoding=\"utf-8\") as opened_file:\n", " return get_description(opened_file)\n", "\n", " # return to begining\n", " file.seek(0)\n", " nline = 0\n", " description = {}\n", " while nline < 3:\n", " line = file.readline()\n", " if line[0] == '#':\n", " continue\n", " nline += 1\n", " if nline == 1:\n", " description['format']=line.strip()\n", " elif nline == 2:\n", " description['dimension']=int(line.split()[1]), int(line.split()[0])\n", " elif nline == 3:\n", " description['deep']=int(line.strip())\n", " return description\n", " \n", "def read_values(file, description):\n", " \"\"\"\n", " Read all the values directly\n", " The file must be already opened\n", " The values are stored in a numpy array\n", " \"\"\"\n", " # pre-allocate the array\n", " nx, ny = description['dimension']\n", " values = np.empty((nx * ny))\n", " \n", " i = 0\n", " for line in file:\n", " if line[0] == '#':\n", " continue\n", " vals = line.split()\n", " nvals = len(vals)\n", " values[i:i + nvals] = vals\n", " i += nvals\n", " return values.reshape((nx, ny))\n", "\n", "def read_file(filename):\n", " \"\"\"\n", " Read an image in the PGM format\n", " \n", " Open the file *once*\n", " \"\"\"\n", " # open the file once\n", " with open(filename, 'r', encoding=\"utf-8\") as file:\n", "\n", " # read the header part\n", " description = get_description(file)\n", "\n", " # read the values\n", " values = read_values(file, description)\n", " return values\n", "\n", "def kernel(i):\n", " \"\"\"\n", " Apply compute_wtf on each pixel except boundary\n", " \"\"\"\n", " global data, result, nt_omp, nt_job\n", " if data[i].size < 1000:\n", " result[i] = ckernel(data[i], 1)\n", " else:\n", " result[i] = ckernel(data[i], nt_job)\n", "\n", "def job(data):\n", " \"\"\"\n", " Apply kernel of each image\n", " \"\"\"\n", " global current, nt_job\n", " \n", " current = 0\n", " # Création des threads\n", " threads = [ThreadJob() for i in range(nt_job)]\n", "\n", " # Lancement des threads\n", " for thread in threads:\n", " thread.start()\n", "\n", " # Attend que les threads se terminent\n", " for thread in threads:\n", " thread.join()\n", "\n", "def init(files):\n", " \"\"\"\n", " Read all files\n", " \"\"\"\n", " data = []\n", " for file in files:\n", " data.append(read_file(file))\n", " \n", " return data\n", "\n", "def plot(data):\n", " nimages = len(data)\n", " if nimages > 1:\n", " fig, axes = plt.subplots(nimages, 1)\n", " for image, ax in zip(data, axes):\n", " ax.imshow(image)\n", " else:\n", " plt.figure()\n", " plt.imshow(data[0])\n", " \n", " plt.show()\n", " \n", "\n", "files = [\"data/test.pgm\"] #,\n", " #\"data/test32.pgm\",\n", " #\"data/brain_604.ascii.pgm\",\n", " #\"data/apollonian_gasket.ascii.pgm\",\n", " #\"data/dla.ascii.pgm\",\n", " #]\n", "\n", "nt_job = min(nprocs // 2, len(files))\n", "nt_omp = nprocs - nt_job\n", "\n", "if __name__ == \"__main__\":\n", " \n", " print(\"Reading Files\")\n", " data = init(files)\n", " %timeit init(files)\n", " \n", " print(\"Computing\")\n", " #sort data: biggest images first for better equilibrium in parallel\n", " data = sorted(data, key=np.size, reverse=True)\n", "\n", " #prepare result array\n", " result = deepcopy(data)\n", "\n", " job(data)\n", " %timeit job(data)\n", " \n", " plot(data)\n", " plot(result)\n", "```" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }