{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "
\n", " \n", "
\n", "
\n", " \n", "
\n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import ROOT\n", "from ROOT import TFile, TMVA, TCut" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Enable JS visualization" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%jsmva on" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dataset infos" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "infname = \"files/tmva_class_example.root\"\n", "dataset = \"files/tmva_class_example\"\n", "treeNameSig = \"TreeS\"\n", "treeNameBkg = \"TreeB\"\n", "outfname = \"files/TMVA.root\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Declare Factory and DataLoader" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "outputFile = TFile( outfname, 'RECREATE' )\n", "\n", "TMVA.Tools.Instance();" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory = TMVA.Factory(JobName=\"TMVAClassification\", TargetFile=outputFile,\n", " V=False, Color=False, DrawProgressBar=True, Transformations=[\"I\", \"D\", \"P\", \"G\",\"D\"],\n", " AnalysisType=\"Classification\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "loader = TMVA.DataLoader(dataset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Adding variables to DataLoader" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "loader.AddVariable( \"myvar1 := var1+var2\", 'F' )\n", "loader.AddVariable( \"myvar2 := var1-var2\", \"Expression 2\", 'F' )\n", "loader.AddVariable( \"var3\", \"Variable 3\", 'F' )\n", "loader.AddVariable( \"var4\", \"Variable 4\", 'F' )\n", "\n", "loader.AddSpectator( \"spec1:=var1*2\", \"Spectator 1\", 'F' )\n", "loader.AddSpectator( \"spec2:=var1*3\", \"Spectator 2\", 'F' )" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false }, "source": [ "## If the dataset is not available on local computer we download from cern server" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "if ROOT.gSystem.AccessPathName( \"./\"+infname ) != 0: \n", " ROOT.gSystem.Exec( \"cd files; wget https://root.cern.ch/\" + infname)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setting up dataset from Trees" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ "input = TFile.Open( infname )\n", "\n", "# Get the signal and background trees for training\n", "signal = input.Get( treeNameSig )\n", "background = input.Get( treeNameBkg )\n", " \n", "# Global event weights (see below for setting event-wise weights)\n", "signalWeight = 1.0\n", "backgroundWeight = 1.0\n", "\n", "signalWeight = 1.0\n", "backgroundWeight = 1.0\n", "\n", "mycuts = TCut(\"\")\n", "mycutb = TCut(\"\")\n", "\n", "loader.AddSignalTree(signal, signalWeight)\n", "loader.AddBackgroundTree(background, backgroundWeight)\n", "loader.fSignalWeight = signalWeight\n", "loader.fBackgroundWeight = backgroundWeight\n", "loader.fTreeS = signal\n", "loader.fTreeB = background\n", "\n", "loader.PrepareTrainingAndTestTree(SigCut=mycuts, BkgCut=mycutb,\n", " nTrain_Signal=1000, nTrain_Background=1000, nTest_Signal=2000, nTest_Background=2000,\n", " SplitMode=\"Random\", NormMode=\"NumEvents\", V=False);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualizing input variables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "loader.DrawInputVariable(\"myvar1\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can also visualize transformations on input variables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "loader.DrawInputVariable(\"myvar1\", processTrfs=[\"D\", \"N\"]) #I;N;D;P;U;G,D" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Correlation matrix of input variables" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "loader.DrawCorrelationMatrix(\"Signal\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory.BookDNN(loader)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Booking methods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "factory.BookMethod( DataLoader=loader, Method=TMVA.Types.kSVM, MethodTitle=\"SVM\", \n", " Gamma=0.25, Tol=0.001, VarTransform=\"Norm\" )\n", "\n", "factory.BookMethod( loader,TMVA.Types.kMLP, \"MLP\", \n", " H=False, V=False, NeuronType=\"tanh\", VarTransform=\"N\", NCycles=600, HiddenLayers=\"N+5\",\n", " TestRate=5, UseRegulator=False )\n", "\n", "factory.BookMethod( loader,TMVA.Types.kLD, \"LD\", \n", " H=False, V=False, VarTransform=\"None\", CreateMVAPdfs=True, PDFInterpolMVAPdf=\"Spline2\",\n", " NbinsMVAPdf=50, NsmoothMVAPdf=10 )\n", "\n", "factory.BookMethod( loader,TMVA.Types.kLikelihood,\"Likelihood\",\"NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10\",\n", " NSmooth=1, NAvEvtPerBin=50, H=True, V=False,TransformOutput=True,PDFInterpol=\"Spline2\")\n", "\n", "factory.BookMethod( loader, TMVA.Types.kBDT, \"BDT\",\n", " H=False, V=False, NTrees=850, MinNodeSize=\"2.5%\", MaxDepth=3, BoostType=\"AdaBoost\", AdaBoostBeta=0.5,\n", " UseBaggedBoost=True, BaggedSampleFraction=0.5, SeparationType=\"GiniIndex\", nCuts=20 )\n", "\n", "trainingStrategy = [{\n", " \"LearningRate\": 1e-1,\n", " \"Momentum\": 0.0,\n", " \"Repetitions\": 1,\n", " \"ConvergenceSteps\": 100,\n", " \"BatchSize\": 20,\n", " \"TestRepetitions\": 1,\n", " \"WeightDecay\": 0.001,\n", " \"Regularization\": \"NONE\",\n", " \"DropConfig\": \"0.0+0.5+0.5+0.5\",\n", " \"DropRepetitions\": 1,\n", " \"Multithreading\": True\n", " \n", " }, {\n", " \"LearningRate\": 1e-2,\n", " \"Momentum\": 0.5,\n", " \"Repetitions\": 1,\n", " \"ConvergenceSteps\": 100,\n", " \"BatchSize\": 30,\n", " \"TestRepetitions\": 1,\n", " \"WeightDecay\": 0.001,\n", " \"Regularization\": \"L2\",\n", " \"DropConfig\": \"0.0+0.1+0.1+0.1\",\n", " \"DropRepetitions\": 1,\n", " \"Multithreading\": True\n", " \n", " }, {\n", " \"LearningRate\": 1e-2,\n", " \"Momentum\": 0.3,\n", " \"Repetitions\": 1,\n", " \"ConvergenceSteps\": 100,\n", " \"BatchSize\": 40,\n", " \"TestRepetitions\": 1,\n", " \"WeightDecay\": 0.001,\n", " \"Regularization\": \"L2\",\n", " \"Multithreading\": True\n", " \n", " },{\n", " \"LearningRate\": 1e-3,\n", " \"Momentum\": 0.1,\n", " \"Repetitions\": 1,\n", " \"ConvergenceSteps\": 100,\n", " \"BatchSize\": 70,\n", " \"TestRepetitions\": 1,\n", " \"WeightDecay\": 0.001,\n", " \"Regularization\": \"NONE\",\n", " \"Multithreading\": True\n", " \n", "}]\n", "\n", "factory.BookMethod(DataLoader=loader, Method=TMVA.Types.kDNN, MethodTitle=\"DNN\", \n", " H = False, V=False, VarTransform=\"Normalize\", ErrorStrategy=\"CROSSENTROPY\",\n", " Layout=[\"TANH|100\", \"TANH|50\", \"TANH|10\", \"LINEAR\"], TrainingStrategy=trainingStrategy, Architecture=\"CPU\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Train Methods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "factory.TrainAllMethods()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Testing the methods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "factory.TestAllMethods()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate the methods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "factory.EvaluateAllMethods()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier Output Distributions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory.DrawOutputDistribution(dataset, \"MLP\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier Probability Distributions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory.DrawProbabilityDistribution(dataset, \"LD\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ROC curve" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "factory.DrawROCCurve(dataset)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classifier Cut Efficiencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory.DrawCutEfficiencies(dataset, \"MLP\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Draw Neural Network" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Mouseover (node, weight): focusing\n", "* Zooming and grab and move supported\n", "* Reset: double click" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory.DrawNeuralNetwork(dataset, \"MLP\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Draw Deep Neural Network" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "factory.DrawNeuralNetwork(dataset, \"DNN\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Draw Decision Tree" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Mouseover (node, weight): showing decision path\n", "* Zooming and grab and move supported\n", "* Reset: double click\n", "* Click on node: \n", " * hiding subtree, if node children are hidden the node will have a green border\n", " * rescaling: bigger nodes, bigger texts\n", " * click again to show the subtree" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory.DrawDecisionTree(dataset, \"BDT\") #11" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## DNN weights heat map" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "factory.DrawDNNWeights(dataset, \"DNN\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Close the factory's output file" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "outputFile.Close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }