{ "cells": [ { "cell_type": "code", "execution_count": 1, "source": [ "from servicex import ServiceXDataset\r\n", "from coffea.processor.servicex import DataSource, Analysis\r\n", "from coffea.processor.servicex import DaskExecutor\r\n", "from func_adl_servicex import ServiceXSourceXAOD \r\n", "\r\n", "import matplotlib.pyplot as plt\r\n", "\r\n", "from coffea import hist, processor\r\n", "from IPython.display import display, update_display, HTML" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 2, "source": [ "dids = ['mc15_13TeV:mc15_13TeV.361106.PowhegPythia8EvtGen_AZNLOCTEQ6L1_Zee.merge.DAOD_STDM3.e3601_s2576_s2132_r6630_r6264_p2363_tid05630052_00']\r\n", "datasets = [\r\n", " ServiceXDataset(did, backend_name='atlas_xaod')\r\n", " for did in dids\r\n", "]" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 3, "source": [ "leptons_per_event_query = ServiceXSourceXAOD('dummy_dataset') \\\r\n", " .Select(lambda e: e.Electrons(\"Electrons\")) \\\r\n", " .Select(lambda eles: eles.Where(lambda e: e.pt()/1000.0 > 30.0)) \\\r\n", " .Select(lambda eles: eles.Where(lambda e: abs(e.eta()) < 2.5)) \\\r\n", " .Where(lambda eles: len(eles) == 2) \\\r\n", " .Select(lambda ls: {\r\n", " 'electrons_pt': ls.Select(lambda e: e.pt()/1000.0),\r\n", " 'electrons_eta': ls.Select(lambda e: e.eta()),\r\n", " 'electrons_phi': ls.Select(lambda e: e.phi()),\r\n", " 'electrons_mass': ls.Select(lambda e: e.m()/1000.0),\r\n", " 'electrons_charge': ls.Select(lambda e: e.charge())\r\n", " })" ], "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Found backend type matching \"xaod\". Matching by type is depreciated. Please switch to using the \"name\" keyword in your servicex.yaml file.\n", "Found backend type matching \"xaod\". Matching by type is depreciated. Please switch to using the \"name\" keyword in your servicex.yaml file.\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 4, "source": [ "datasource = DataSource(query=leptons_per_event_query, metadata={'dataset-nickname': 'Zee'}, datasets=datasets)" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 5, "source": [ "class Z_EEAnalysis(Analysis):\r\n", " @staticmethod\r\n", " def process(events):\r\n", " import awkward as ak\r\n", " from collections import defaultdict\r\n", "\r\n", " sumw = defaultdict(float)\r\n", " mass_hist = hist.Hist(\r\n", " \"Events\",\r\n", " hist.Cat(\"dataset\", \"Dataset\"),\r\n", " hist.Bin(\"mass\", \"$Z_{ee}$ [GeV]\", 60, 60, 120),\r\n", " )\r\n", "\r\n", " dataset = events.metadata['dataset-nickname']\r\n", "\r\n", " electrons = events.electrons\r\n", "\r\n", " # Form the invar mass, plot.\r\n", " cut = (ak.num(electrons) == 2)\r\n", " diele = electrons[cut][:, 0] + electrons[cut][:, 1]\r\n", "\r\n", " sumw[dataset] += len(events)\r\n", " mass_hist.fill(\r\n", " dataset=dataset,\r\n", " mass=diele.mass,\r\n", " )\r\n", " \r\n", " return {\r\n", " \"sumw\": sumw,\r\n", " \"mass\": mass_hist\r\n", " }" ], "outputs": [], "metadata": {} }, { "cell_type": "markdown", "source": [ "We create the analysis and executor. The `DaskExecutor` can be done two ways:\n", "\n", "- `DaskExecutor()` which creates a local cluster. All data will be pulled down to the local machine via an `uproot.open`. This can be paiful depending on what your connection looks like.\n", "- `DaskExecutor(client_addr=\"node.name.edu:8786\")` which will attach to a remote `dask` cluster. This is particularly powerful if the `dask` cluster is located close to the `servicex` installation." ], "metadata": {} }, { "cell_type": "code", "execution_count": 6, "source": [ "analysis = Z_EEAnalysis()\r\n", "#executor = DaskExecutor(client_addr=\"localhost:8786\")\r\n", "executor = DaskExecutor()" ], "outputs": [], "metadata": {} }, { "cell_type": "markdown", "source": [ "This next method makes an updating plot, as the data appears. If you didn't want the fancy updating plot, you could do `await executor.execute(analysis, datasource)`, and you'd end up with the coffea dict when that cell completed." ], "metadata": {} }, { "cell_type": "code", "execution_count": 7, "source": [ "%matplotlib inline\r\n", "async def plot_stream(accumulator_stream):\r\n", " global first\r\n", " fig, axes = plt.subplots()\r\n", " first = True\r\n", "\r\n", "\r\n", " count = 0\r\n", " async for coffea_info in accumulator_stream:\r\n", " print(coffea_info)\r\n", " hist.plot1d(coffea_info['mass'], ax=axes)\r\n", "\r\n", " count += 1\r\n", " plt.text(0.95, 0.8, f'Chunks of data: {count}', horizontalalignment='right', transform=axes.transAxes)\r\n", "\r\n", " # Either display it or update a previous version of the plot\r\n", " if first:\r\n", " display(fig, display_id='mass_update')\r\n", " first = False\r\n", " else:\r\n", " update_display(fig, display_id='mass_update')\r\n", " return coffea_info\r\n", "\r\n", "await plot_stream(executor.execute(analysis, datasource))\r\n", "plt.close() # Prevents another copy of the plot showing up in the notebook" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{'sumw': defaultdict(, {'Zee': 24000.0}), 'mass': }\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-08-16T22:42:43.430700\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.2, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", "image/png": "" }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "{'sumw': defaultdict(, {'Zee': 74000.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 124000.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 249000.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 399000.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 549000.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 699000.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 849000.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 998800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1148800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1298800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1343800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1493800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1543800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1693800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1843800.0}), 'mass': }\n", "{'sumw': defaultdict(, {'Zee': 1993800.0}), 'mass': }\n" ] } ], "metadata": {} } ], "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3.9.6 64-bit ('.venv': venv)" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.6" }, "pycharm": { "stem_cell": { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [] } }, "interpreter": { "hash": "0857beeb8bd56f57ec31dfb6d49aa425259567fda5d0227b9061b424f42da337" } }, "nbformat": 4, "nbformat_minor": 4 }