{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = \"1\";\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n", " window._bokeh_onload_callbacks = [];\n", " window._bokeh_is_loading = undefined;\n", " }\n", "\n", "\n", " \n", " if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n", " window._bokeh_timeout = Date.now() + 5000;\n", " window._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " if (window.Bokeh !== undefined) {\n", " Bokeh.$(\"#5cc4f75d-f7ac-4641-b5c7-d516675fdd72\").text(\"BokehJS successfully loaded.\");\n", " } else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", " function run_callbacks() {\n", " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " delete window._bokeh_onload_callbacks\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " window._bokeh_onload_callbacks.push(callback);\n", " if (window._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " window._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " window._bokeh_is_loading--;\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };var element = document.getElementById(\"5cc4f75d-f7ac-4641-b5c7-d516675fdd72\");\n", " if (element == null) {\n", " console.log(\"Bokeh: ERROR: autoload.js configured with elementid '5cc4f75d-f7ac-4641-b5c7-d516675fdd72' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " \n", " Bokeh.$(\"#5cc4f75d-f7ac-4641-b5c7-d516675fdd72\").text(\"BokehJS is loading...\");\n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if ((window.Bokeh !== undefined) || (force === \"1\")) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i](window.Bokeh);\n", " }if (force === \"1\") {\n", " display_loaded();\n", " }} else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!window._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " window._bokeh_failed_load = true;\n", " } else if (!force) {\n", " var cell = $(\"#5cc4f75d-f7ac-4641-b5c7-d516675fdd72\").parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(this));" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from bokeh.io import output_notebook\n", "output_notebook()\n", "from bokeh.plotting import figure, output_file, show\n", "from bokeh.layouts import gridplot\n", "import numpy as np\n", "from freud import parallel, box, density\n", "from ipywidgets import IntProgress\n", "from IPython.display import display\n", "parallel.setNumThreads(4)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def default_bokeh(p):\n", " p.title.text_font_size = \"18pt\"\n", " p.title.align = \"center\"\n", "\n", " p.xaxis.axis_label_text_font_size = \"14pt\"\n", " p.yaxis.axis_label_text_font_size = \"14pt\"\n", "\n", " p.xaxis.major_tick_in = 10\n", " p.xaxis.major_tick_out = 0\n", " p.xaxis.minor_tick_in = 5\n", " p.xaxis.minor_tick_out = 0\n", "\n", " p.yaxis.major_tick_in = 10\n", " p.yaxis.major_tick_out = 0\n", " p.yaxis.minor_tick_in = 5\n", " p.yaxis.minor_tick_out = 0\n", "\n", " p.xaxis.major_label_text_font_size = \"12pt\"\n", " p.yaxis.major_label_text_font_size = \"12pt\"" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Data copying vs. Data from pointers\n", "\n", "If you have never programmed in C++ before (or even if you have), you may not be familiar with pointers. While it's beyond the scope of this tutorial to cover what pointers are and how they are used, this tutorial will briefly cover some of the issues you may have in Freud which involve pointers. If you are interested in pointers, or if you plan on developing freud, the [C++ tutorial on pointers](http://www.cplusplus.com/doc/tutorial/pointers/) is a good place to start.\n", "\n", "## Python level example (not really pointers, but it gets the point across)\n", "\n", "Run the following code and take a guess what the value of `b[0]` will be:\n", "* 1\n", "* 2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2\n" ] } ], "source": [ "a = [1]\n", "b = a\n", "a[0] = 2\n", "print(b[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Variables by reference\n", "\n", "In Python, this is an example of assignment by reference. `b` is not created as its own list. Rather, b is created as a reference to `a`, so by changing `a`, we also change `b`. In C++ you can do the same thing (create/pass variables by reference) but you can also do it by pointer, which is the memory address of the value. While there is a difference, the end result is the same: by changing the value of `a` you change the value of `b`. There are many pros to doing things this way, including lower memory usage and faster performance, but it's easy to inadvertently overwrite data.\n", "\n", "# Creating Numpy arrays by pointer\n", "\n", "In Freud we decided to create our NumPy arrays by passing a pointer from C++. This was the fastest, most efficient way to do this, but can result in data being changed or overwritten, so take care when performing calculations, and create copies where necessary.\n", "\n", "## \"Overwriting\" data\n", "\n", "The example below \"overwrites\" data stored in `r_avg` and `y_avg`. Read the code and see the output." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# create the RDF object\n", "rdf = density.RDF(rmax=10.0, dr=0.1)\n", "# load the example data\n", "data_path = \"ex_data/phi065\"\n", "box_data = np.load(\"{}/box_data.npy\".format(data_path))\n", "pos_data = np.load(\"{}/pos_data.npy\".format(data_path))\n", "quat_data = np.load(\"{}/quat_data.npy\".format(data_path))\n", "n_frames = pos_data.shape[0]\n", "\n", "# for all frames except the first (your syntax will vary based on your reader)\n", "myProgressBar = IntProgress(min=1,max=n_frames)\n", "display(myProgressBar)\n", "for frame in range(1, n_frames):\n", " myProgressBar.value = frame\n", " # read box, position data\n", " l_box = box_data[frame]\n", " l_pos = pos_data[frame]\n", " # create the freud box object\n", " fbox = box.Box(Lx=l_box[\"Lx\"], Ly=l_box[\"Ly\"], is2D=True)\n", " # compute\n", " rdf.accumulate(fbox, l_pos, l_pos)\n", "\n", "# get the center of the histogram bins\n", "r_avg = rdf.getR()\n", "# get the value of the histogram bins\n", "y_avg = rdf.getRDF()\n", "\n", "# do the same thing, but only for the last frame\n", "# read box, position data\n", "l_box = box_data[-1]\n", "l_pos = pos_data[-1]\n", "# create the freud box object\n", "fbox = box.Box(Lx=l_box[\"Lx\"], Ly=l_box[\"Ly\"], is2D=True)\n", "# compute; reset is not necessary, called automatically\n", "rdf.compute(fbox, l_pos, l_pos)\n", "# get the center of the histogram bins\n", "r = rdf.getR()\n", "# get the value of the histogram bins\n", "y = rdf.getRDF()\n", "\n", "# create bokeh plot\n", "p0 = figure(title=\"RDF\", x_axis_label='r', y_axis_label='g(r)')\n", "p0.circle(r, y, legend=\"Compute\")\n", "p0.line(r, y, legend=\"Compute\", line_width=2)\n", "p0.square(r_avg, y_avg, legend=\"Accumulate\", fill_color=None, line_color=\"red\")\n", "p0.line(r_avg, y_avg, legend=\"Accumulate\", line_dash=[4,4], line_width=2, line_color=\"red\")\n", "\n", "default_bokeh(p0)\n", "\n", "p1 = figure(title=\"RDF\", x_axis_label='r', y_axis_label='g(r)')\n", "p1.line(r, y, legend=\"Compute\", line_width=2)\n", "p1.line(r_avg, y_avg, legend=\"Accumulate\", line_width=2, line_color=\"red\")\n", "\n", "default_bokeh(p1)\n", "\n", "grid = gridplot([p0, p1], ncols=2, plot_width=400, plot_height=400)\n", "\n", "show(grid)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### What happened?\n", "\n", "`y_avg` is created from the pointer to the data at the C++ level. When `compute` is called, the values in memory are changed, but the pointer remains the same, so `y` and `y_avg` hold the same data.\n", "\n", "## Avoid overwriting\n", "\n", "Use `numpy.copy()` to avoid this issue: `y_avg = np.copy(rdf.getRDF())`" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "rdf = density.RDF(rmax=10.0, dr=0.1)\n", "data_path = \"ex_data/phi065\"\n", "box_data = np.load(\"{}/box_data.npy\".format(data_path))\n", "pos_data = np.load(\"{}/pos_data.npy\".format(data_path))\n", "quat_data = np.load(\"{}/quat_data.npy\".format(data_path))\n", "# reset the rdf; required if not using compute\n", "n_frames = pos_data.shape[0]\n", "# for all frames except the first (your syntax will vary based on your reader)\n", "myProgressBar = IntProgress(min=1,max=n_frames)\n", "display(myProgressBar)\n", "for frame in range(1, n_frames):\n", " myProgressBar.value = frame\n", " # read box, position data\n", " l_box = box_data[frame]\n", " l_pos = pos_data[frame]\n", " # create the freud box object\n", " fbox = box.Box(Lx=l_box[\"Lx\"], Ly=l_box[\"Ly\"], is2D=True)\n", " # compute\n", " rdf.accumulate(fbox, l_pos, l_pos)\n", "\n", "# get the center of the histogram bins\n", "r_avg = np.copy(rdf.getR())\n", "# get the value of the histogram bins\n", "y_avg = np.copy(rdf.getRDF())\n", "\n", "# read box, position data\n", "l_box = box_data[-1]\n", "l_pos = pos_data[-1]\n", "# create the freud box object\n", "fbox = box.Box(Lx=l_box[\"Lx\"], Ly=l_box[\"Ly\"], is2D=True)\n", "# compute\n", "rdf.compute(fbox, l_pos, l_pos)\n", "# get the center of the histogram bins\n", "r = rdf.getR()\n", "# get the value of the histogram bins\n", "y = rdf.getRDF()\n", "\n", "# create bokeh plot\n", "p0 = figure(title=\"RDF\", x_axis_label='r', y_axis_label='g(r)')\n", "p0.circle(r, y, legend=\"Compute\")\n", "p0.line(r, y, legend=\"Compute\", line_width=2)\n", "p0.square(r_avg, y_avg, legend=\"Accumulate\", fill_color=None, line_color=\"red\")\n", "p0.line(r_avg, y_avg, legend=\"Accumulate\", line_dash=[4,4], line_width=2, line_color=\"red\")\n", "\n", "default_bokeh(p0)\n", "\n", "p1 = figure(title=\"RDF\", x_axis_label='r', y_axis_label='g(r)')\n", "p1.line(r, y, legend=\"Compute\", line_width=2)\n", "p1.line(r_avg, y_avg, legend=\"Accumulate\", line_width=2, line_color=\"red\")\n", "\n", "default_bokeh(p1)\n", "\n", "grid = gridplot([p0, p1], ncols=2, plot_width=400, plot_height=400)\n", "\n", "show(grid)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Garbage collection and segmentation faults\n", "\n", "While unlikely, it is possible for freud objects to pass out of scope and be garbage collected, so the pointer ends up pointing to other data, resulting in a segmentation fault. This is a very rare occurrence that can be avoided by using copies.\n", "\n", "# Summary:\n", "\n", "1. Freud returns Numpy arrays from pointers instead of their own copied\n", " * Done for performance\n", "2. Certain workflows can result in \"overwriting\" your data\n", " * `np.copy()` can be used to avoid this scenario\n", "3. It is possible for freud objects to pass out of scope, and the resulting numpy arrays to have their data garbage collected, causing segmentation faults. Again, `np.copy()` can be used to avoid this." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }