<div>
    <div style="float:left;">
        <img src="http://oproject.org/tiki-download_file.php?fileId=8&display&x=450&y=128" width="50%" />
    </div>
    <div style="float:left;">
        <img src="https://raw.githubusercontent.com/qati/GSOC16/master/img/tmva.gif" width="50%"/>
    </div>
</div>

<hr style="border-top-width: 4px; border-top-color: #34609b;">

<!--<script src="JsRoot/scripts/JSRootCore.js?jq2d&onload=JsRootLoadedCall" type="text/javascript"></script>-->

In [None]:
import ROOT
from ROOT import TFile, TMVA, TCut

## Enable JS visualization

In [None]:
%jsmva on

# Dataset infos

In [None]:
infname     = "files/tmva_class_example.root"
dataset     = "files/tmva_class_example"
treeNameSig = "TreeS"
treeNameBkg = "TreeB"
outfname    = "files/TMVA.root"

## Declare Factory and DataLoader

In [None]:
outputFile = TFile( outfname, 'RECREATE' )

TMVA.Tools.Instance();

In [None]:
factory = TMVA.Factory(JobName="TMVAClassification", TargetFile=outputFile,
            V=False, Color=False, DrawProgressBar=True, Transformations=["I", "D", "P", "G","D"],
                       AnalysisType="Classification")

In [None]:
loader = TMVA.DataLoader(dataset)

## Adding variables to DataLoader

In [None]:
loader.AddVariable( "myvar1 := var1+var2", 'F' )
loader.AddVariable( "myvar2 := var1-var2", "Expression 2", 'F' )
loader.AddVariable( "var3",                "Variable 3", 'F' )
loader.AddVariable( "var4",                "Variable 4", 'F' )

loader.AddSpectator( "spec1:=var1*2",  "Spectator 1",  'F' )
loader.AddSpectator( "spec2:=var1*3",  "Spectator 2",  'F' )

## If the dataset is not available on local computer we download from cern server

In [None]:
if ROOT.gSystem.AccessPathName( "./"+infname ) != 0: 
    ROOT.gSystem.Exec( "cd files; wget https://root.cern.ch/" + infname)

## Setting up dataset from Trees

In [None]:
input = TFile.Open( infname )

# Get the signal and background trees for training
signal      = input.Get( treeNameSig )
background  = input.Get( treeNameBkg )
    
# Global event weights (see below for setting event-wise weights)
signalWeight     = 1.0
backgroundWeight = 1.0

signalWeight     = 1.0
backgroundWeight = 1.0

mycuts = TCut("")
mycutb = TCut("")

loader.AddSignalTree(signal, signalWeight)
loader.AddBackgroundTree(background, backgroundWeight)
loader.fSignalWeight = signalWeight
loader.fBackgroundWeight = backgroundWeight
loader.fTreeS = signal
loader.fTreeB = background

loader.PrepareTrainingAndTestTree(SigCut=mycuts, BkgCut=mycutb,
            nTrain_Signal=1000, nTrain_Background=1000, nTest_Signal=2000, nTest_Background=2000,
                                  SplitMode="Random", NormMode="NumEvents", V=False);

## Visualizing input variables

In [None]:
loader.DrawInputVariable("myvar1")

### We can also visualize transformations on input variables

In [None]:
loader.DrawInputVariable("myvar1", processTrfs=["D", "N"]) #I;N;D;P;U;G,D

## Correlation matrix of input variables

In [None]:
loader.DrawCorrelationMatrix("Signal")

In [None]:
factory.BookDNN(loader)

## Booking methods

In [None]:
factory.BookMethod( DataLoader=loader, Method=TMVA.Types.kSVM, MethodTitle="SVM", 
                Gamma=0.25, Tol=0.001, VarTransform="Norm" )

factory.BookMethod( loader,TMVA.Types.kMLP, "MLP", 
        H=False, V=False, NeuronType="tanh", VarTransform="N", NCycles=600, HiddenLayers="N+5",
                   TestRate=5, UseRegulator=False )

factory.BookMethod( loader,TMVA.Types.kLD, "LD", 
        H=False, V=False, VarTransform="None", CreateMVAPdfs=True, PDFInterpolMVAPdf="Spline2",
                   NbinsMVAPdf=50, NsmoothMVAPdf=10 )

factory.BookMethod( loader,TMVA.Types.kLikelihood,"Likelihood","NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10",
    NSmooth=1, NAvEvtPerBin=50, H=True, V=False,TransformOutput=True,PDFInterpol="Spline2")

factory.BookMethod( loader, TMVA.Types.kBDT, "BDT",
    H=False, V=False, NTrees=850, MinNodeSize="2.5%", MaxDepth=3, BoostType="AdaBoost", AdaBoostBeta=0.5,
                   UseBaggedBoost=True, BaggedSampleFraction=0.5, SeparationType="GiniIndex", nCuts=20 )

trainingStrategy = [{
        "LearningRate": 1e-1,
        "Momentum": 0.0,
        "Repetitions": 1,
        "ConvergenceSteps": 100,
        "BatchSize": 20,
        "TestRepetitions": 1,
        "WeightDecay": 0.001,
        "Regularization": "NONE",
        "DropConfig": "0.0+0.5+0.5+0.5",
        "DropRepetitions": 1,
        "Multithreading": True
        
    }, {
        "LearningRate": 1e-2,
        "Momentum": 0.5,
        "Repetitions": 1,
        "ConvergenceSteps": 100,
        "BatchSize": 30,
        "TestRepetitions": 1,
        "WeightDecay": 0.001,
        "Regularization": "L2",
        "DropConfig": "0.0+0.1+0.1+0.1",
        "DropRepetitions": 1,
        "Multithreading": True
        
    }, {
        "LearningRate": 1e-2,
        "Momentum": 0.3,
        "Repetitions": 1,
        "ConvergenceSteps": 100,
        "BatchSize": 40,
        "TestRepetitions": 1,
        "WeightDecay": 0.001,
        "Regularization": "L2",
        "Multithreading": True
        
    },{
        "LearningRate": 1e-3,
        "Momentum": 0.1,
        "Repetitions": 1,
        "ConvergenceSteps": 100,
        "BatchSize": 70,
        "TestRepetitions": 1,
        "WeightDecay": 0.001,
        "Regularization": "NONE",
        "Multithreading": True
        
}]

factory.BookMethod(DataLoader=loader, Method=TMVA.Types.kDNN, MethodTitle="DNN", 
                   H = False, V=False, VarTransform="Normalize", ErrorStrategy="CROSSENTROPY",
                   Layout=["TANH|100", "TANH|50", "TANH|10", "LINEAR"], TrainingStrategy=trainingStrategy, Architecture="CPU")

# Train Methods

In [None]:
factory.TrainAllMethods()

## Testing the methods

In [None]:
factory.TestAllMethods()

## Evaluate the methods

In [None]:
factory.EvaluateAllMethods()

## Classifier Output Distributions

In [None]:
factory.DrawOutputDistribution(dataset, "MLP")

## Classifier Probability Distributions

In [None]:
factory.DrawProbabilityDistribution(dataset, "LD")

## ROC curve

In [None]:
factory.DrawROCCurve(dataset)

## Classifier Cut Efficiencies

In [None]:
factory.DrawCutEfficiencies(dataset, "MLP")

## Draw Neural Network

* Mouseover (node, weight): focusing
* Zooming and grab and move supported
* Reset: double click

In [None]:
factory.DrawNeuralNetwork(dataset, "MLP")

## Draw Deep Neural Network

In [None]:
factory.DrawNeuralNetwork(dataset, "DNN")

## Draw Decision Tree

* Mouseover (node, weight): showing decision path
* Zooming and grab and move supported
* Reset: double click
* Click on node: 
    * hiding subtree, if node children are hidden the node will have a green border
    * rescaling: bigger nodes, bigger texts
    * click again to show the subtree

In [None]:
factory.DrawDecisionTree(dataset, "BDT") #11

## DNN weights heat map

In [None]:
factory.DrawDNNWeights(dataset, "DNN")

## Close the factory's output file

In [None]:
outputFile.Close()