{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from coffea import hist\n", "from coffea.analysis_objects import JaggedCandidateArray\n", "import coffea.processor as processor" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# This program will graph the sum of Jet pT's which are greater than 30 GeV and farther than a Euclidean distance of 0.4 from any lepton with pT > 10 GeV.\n", "class Processor(processor.ProcessorABC):\n", " def __init__(self):\n", " dataset_axis = hist.Cat(\"dataset\", \"\")\n", " muon_axis = hist.Bin(\"Jet_pt\", \"Jet_pt [GeV]\", 100, 15, 200)\n", " \n", " self._accumulator = processor.dict_accumulator({\n", " 'Jet_pt': hist.Hist(\"Counts\", dataset_axis, muon_axis),\n", " 'cutflow': processor.defaultdict_accumulator(int)\n", " })\n", " \n", " @property\n", " def accumulator(self):\n", " return self._accumulator\n", " \n", " def process(self, events):\n", " output = self.accumulator.identity()\n", " \n", " dataset = events.metadata[\"dataset\"]\n", "\n", " muons = events.Muon\n", " electrons = events.Electron\n", " jets = events.Jet\n", " \n", " output['cutflow']['all events'] += jets.size\n", " output['cutflow']['all jets'] += jets.counts.sum()\n", " \n", " # Get jets with higher GeV than 30.\n", " min_jetpt = (jets.pt > 30)\n", " output['cutflow']['jets with pt > 30'] += min_jetpt.sum().sum()\n", " \n", " # Get all leptons with higher GeV than 10.\n", " min_muonpt = (muons.pt > 10)\n", " output['cutflow']['muons with pt > 10'] += min_muonpt.sum().sum()\n", " min_electronpt = (electrons.pt > 10)\n", " output['cutflow']['electrons with pt > 10'] += min_electronpt.sum().sum()\n", " \n", " # Mask jets and leptons with their minimum requirements/\n", " goodjets = jets[min_jetpt]\n", " goodmuons = muons[min_muonpt]\n", " goodelectrons = electrons[min_electronpt]\n", " \n", " # Cross is like distincts, but across multiple JCA's. So we cross jets with each lepton to generate all (jet, lepton) pairs. We have nested=True so that all jet values are stored in sublists together, and thus maintain uniqueness so we can get them back later.\n", " jet_muon_pairs = goodjets.cross(goodmuons, nested=True)\n", " jet_electron_pairs = goodjets.cross(goodelectrons, nested=True)\n", " \n", " # This long conditional checks that the jet is at least 0.4 euclidean distance from each lepton. It then checks if each unique jet contains a False, i.e., that a jet is 0.4 euclidean distance from EVERY specific lepton in the event.\n", " good_jm_pairs = (jet_muon_pairs.i0.delta_r(jet_muon_pairs.i1) > 0.4).all()\n", " good_je_pairs = (jet_electron_pairs.i0.delta_r(jet_electron_pairs.i1) > 0.4).all()\n", " \n", " output['cutflow']['jet-muon pairs'] += good_jm_pairs.sum().sum()\n", " output['cutflow']['jet-electron pairs'] += good_je_pairs.sum().sum()\n", " output['cutflow']['jet-lepton pairs'] += (good_jm_pairs & good_je_pairs).sum().sum()\n", " \n", " # We then mask our jets with all three of the above good pairs to get only jets that are 0.4 distance from every type of lepton, and sum them.\n", " sumjets = goodjets[good_jm_pairs & good_je_pairs].pt.sum()\n", " output['cutflow']['final jets'] += goodjets[good_jm_pairs & good_je_pairs].counts.sum()\n", " output['Jet_pt'].fill(dataset=dataset, Jet_pt=sumjets.flatten())\n", " \n", " return output\n", "\n", " def postprocess(self, accumulator):\n", " return accumulator" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[######### ] | 23% Completed | 1min 1.0s" ] }, { "name": "stderr", "output_type": "stream", "text": [ "distributed.comm.tcp - WARNING - Closing dangling stream in \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[########### ] | 29% Completed | 1min 2.1s" ] }, { "name": "stderr", "output_type": "stream", "text": [ "distributed.comm.tcp - WARNING - Closing dangling stream in \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[########################################] | 100% Completed | 1min 58.5s\r" ] } ], "source": [ "fileset = {'SingleMu' : [\"root://eospublic.cern.ch//eos/root-eos/benchmark/Run2012B_SingleMu.root\"]}\n", "\n", "from dask.distributed import Client\n", "from coffea_casa import CoffeaCasaCluster\n", "\n", "client = Client(\"tls://localhost:8786\")\n", "\n", "output = processor.run_uproot_job(fileset=fileset, \n", " treename=\"Events\", \n", " processor_instance=Processor(),\n", " executor=processor.dask_executor,\n", " executor_args={'client': client, 'nano': True},\n", " chunksize=250000)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "hist.plot1d(output['Jet_pt'], overlay='dataset', fill_opts={'edgecolor': (0,0,0,0.3), 'alpha': 0.8})" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "all events 53446198\n", "all jets 170952895\n", "jets with pt > 30 37461601\n", "muons with pt > 10 58440651\n", "electrons with pt > 10 5483772\n", "jet-muon pairs 28103661\n", "jet-electron pairs 33916050\n", "jet-lepton pairs 25397925\n", "final jets 25397925\n" ] } ], "source": [ "for key, value in output['cutflow'].items():\n", " print(key, value)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }