{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dimuon spectrum\n",
    "\n",
    "This code is a columnar adaptation of [a ROOT tutorial](https://root.cern.ch/doc/master/df102__NanoAODDimuonAnalysis_8py.html) showcasing the awkward array toolset, and utilizing Coffea [histograms](https://coffeateam.github.io/coffea/modules/coffea.hist.html).\n",
    "This also shows the analysis object syntax implemented by Coffea [JaggedCandidateArray](https://coffeateam.github.io/coffea/api/coffea.analysis_objects.JaggedCandidateMethods.html), and the usage of custom [accumulators](https://coffeateam.github.io/coffea/api/coffea.processor.AccumulatorABC.html) other than histograms.  Further, it introduces the [processor](https://coffeateam.github.io/coffea/api/coffea.processor.ProcessorABC.html) concept and the first level of scale-out, namely multicore local processing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run this cell if you do not have coffea installed (e.g. on SWAN with LCG 96Python3 stack)\n",
    "!pip install --user --upgrade coffea"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "%matplotlib inline\n",
    "from coffea import hist\n",
    "from coffea.analysis_objects import JaggedCandidateArray\n",
    "import coffea.processor as processor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Look at ProcessorABC documentation to see the expected methods and what they are supposed to do\n",
    "class DimuonProcessor(processor.ProcessorABC):\n",
    "    def __init__(self):\n",
    "        dataset_axis = hist.Cat(\"dataset\", \"Primary dataset\")\n",
    "        mass_axis = hist.Bin(\"mass\", r\"$m_{\\mu\\mu}$ [GeV]\", 30000, 0.25, 300)\n",
    "        \n",
    "        self._accumulator = processor.dict_accumulator({\n",
    "            'mass': hist.Hist(\"Counts\", dataset_axis, mass_axis),\n",
    "            'cutflow': processor.defaultdict_accumulator(int),\n",
    "        })\n",
    "    \n",
    "    @property\n",
    "    def accumulator(self):\n",
    "        return self._accumulator\n",
    "    \n",
    "    def process(self, df):\n",
    "        output = self.accumulator.identity()\n",
    "        \n",
    "        dataset = df['dataset']\n",
    "        muons = JaggedCandidateArray.candidatesfromcounts(\n",
    "            df['nMuon'],\n",
    "            pt=df['Muon_pt'].content,\n",
    "            eta=df['Muon_eta'].content,\n",
    "            phi=df['Muon_phi'].content,\n",
    "            mass=df['Muon_mass'].content,\n",
    "            charge=df['Muon_charge'].content,\n",
    "            )\n",
    "        \n",
    "        output['cutflow']['all events'] += muons.size\n",
    "        \n",
    "        twomuons = (muons.counts == 2)\n",
    "        output['cutflow']['two muons'] += twomuons.sum()\n",
    "        \n",
    "        opposite_charge = twomuons & (muons['charge'].prod() == -1)\n",
    "        output['cutflow']['opposite charge'] += opposite_charge.sum()\n",
    "        \n",
    "        dimuons = muons[opposite_charge].distincts()\n",
    "        output['mass'].fill(dataset=dataset, mass=dimuons.mass.flatten())\n",
    "        \n",
    "        return output\n",
    "\n",
    "    def postprocess(self, accumulator):\n",
    "        return accumulator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tstart = time.time()    \n",
    "\n",
    "fileset = {\n",
    "    'DoubleMuon': [\n",
    "        'root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root',\n",
    "        'root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012C_DoubleMuParked.root',\n",
    "    ]\n",
    "}\n",
    "\n",
    "output = processor.run_uproot_job(fileset,\n",
    "                                  treename='Events',\n",
    "                                  processor_instance=DimuonProcessor(),\n",
    "                                  executor=processor.futures_executor,\n",
    "                                  executor_args={'workers': 4},\n",
    "                                 )\n",
    "\n",
    "elapsed = time.time() - tstart\n",
    "print(output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ax = hist.plot1d(output['mass'], overlay='dataset')\n",
    "ax.set_xscale('log')\n",
    "ax.set_yscale('log')\n",
    "ax.set_ylim(0.1, 1e6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Events/s:\", output['cutflow']['all events']/elapsed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}