// New code to try and access the log-likelihood using the createNLL function // in RooFit which is packaged with all the pdfs // Negative Log Likelihood // Ian Connelly // 6th May 2012 // I will also take care not to have memeory leaks when defining lots of pointers #include "RooTrace.h" #include #include #include #include "TFile.h" #include "TString.h" #include "TH1D.h" #include "RooAbsReal.h" #include "RooRealVar.h" #include "RooKeysPdf.h" #include "RooHistPdf.h" #include "RooPoisson.h" #include "RooDataSet.h" #include "RooWorkspace.h" #include "RooMinimizer.h" #include "RooMoment.h" #include "TMath.h" #include using std::cerr; using std::cout; using std::endl; using std::map; using namespace RooFit; // Intended usage : ./prog.exe where val1 maps the "signal" and val 2 maps the "background" to be used in the test statistic. int main (int argv, char ** argc){ RooMsgService::instance().deleteStream(1); //Memory tracing //RooTrace::active(1); // Get handle on the inputs // Have error catching on the inputs if (argv != 3){ cerr << "Wrong number of arguments. Include two parameters (sig then bkg) from command line please." << endl; return 99;} int sigRef = atoi(argc[1]); int bkgRef = atoi(argc[2]); if (sigRef > 4 || bkgRef > 4 || sigRef < 0 || bkgRef < 0){ cerr << "Invalid paramaters. Arguments go from 0 to 4." << endl; return 99;} //Files and histograms TFile* f_results = new TFile("results_.root","RECREATE"); TH1D* h_tSig = new TH1D("h_tSig","Test stat under signal generated data - ln(L(x|s)/L(x|b))",100,-10,10); TH1D* h_tBkg = new TH1D("h_tBkg","Test stat under background generated data - ln(L(x|s)/L(x|b))",100,-10,10); TH1D* h_tSigCheck = new TH1D("h_tSigCheck","Checking t -sig- distribution with just xi keys pdf",100,-10,10); TH1D* h_tBkgCheck = new TH1D("h_tBkgCheck","Checking t -bkg- distribution with just xi keys pdf",100,-10,10); //Initialise mappings map fileRef; fileRef[0]= "sig_pyth"; fileRef[1]= "sig_herw"; fileRef[2]= "bkg_zjj0"; fileRef[3]= "bkg_zjj1"; fileRef[4]= "bkg_tt"; TString sigFile = fileRef.find(sigRef)->second; TString bkgFile = fileRef.find(bkgRef)->second; //Get the relevant workspace root file TString relativePath = "/scratch3/connelly/testarea/analysis/kernalestimation/output/v2/"; TString sigfileName = "workspace"+sigFile; TString bkgfileName = "workspace"+bkgFile; TFile* f_sig = new TFile(relativePath+sigfileName+".root","OPEN"); TFile* f_bkg = new TFile(relativePath+bkgfileName+".root","OPEN"); cout << "===> Using signal file : " << sigFile << endl; cout << "===> Using backgd file : " << bkgFile << endl; // Pdf names : ___ // : pdfkeys, histpdf // : xi, hpm // : P1,P2,P3 // Loop for data generation type sig(x) or bkg(x) for(int i = 0; i < 2; i++){ //Initialise the pdfs to be used here //Then assign them inside the if statement //Then delete them at the end of this for loop if pointers are used // cout << "====>Begin initialisaion of generation pdfs." << endl; RooWorkspace* wspace; RooRealVar* xi; RooRealVar* hpm; RooRealVar* nHad; RooPoisson* pdfGenPoissonP1; RooPoisson* pdfGenPoissonP2; RooPoisson* pdfGenPoissonP3; RooHistPdf* pdfGen2DP1; RooHistPdf* pdfGen2DP2; RooHistPdf* pdfGen2DP3; RooRealVar* meanVarP1; RooRealVar* meanVarP2; RooRealVar* meanVarP3; RooKeysPdf* pdfGenXiP1; RooKeysPdf* pdfGenXiP2; RooKeysPdf* pdfGenXiP3; // ---- Setting up the pdfs for generation only here ------- // ---- Will also need to set up the pdfs for the ratio ---- //i == 0 : signal generation if (i == 0){ cout << "====> Generation " << i << endl; wspace = (RooWorkspace*)f_sig->Get("w"); xi = (RooRealVar*)wspace->var("xi"); hpm = (RooRealVar*)wspace->var("hadronPlaneMomentum"); nHad = (RooRealVar*)wspace->var("nHadrons"); //Use this to get the mean value - delete at the end of the if statement after mean is accessed and RooPoisson is allocated double meanP1 = wspace->data("data_"+sigFile+"_varExcP1")->mean(*nHad); //! double meanP2 = wspace->data("data_"+sigFile+"_varExcP2")->mean(*nHad); //! double meanP3 = wspace->data("data_"+sigFile+"_varExcP3")->mean(*nHad); //! cout << meanP1 << " " << meanP2 << " " << meanP3 << endl; meanVarP1 = new RooRealVar("meanP1","meanP1",meanP1); meanVarP2 = new RooRealVar("meanP2","meanP2",meanP2); meanVarP3 = new RooRealVar("meanP3","meanP3",meanP3); pdfGenPoissonP1 = new RooPoisson("pdfGenPoissonP1","Poisson modelling hadron number generation - P1", *nHad, *meanVarP1); pdfGenPoissonP2 = new RooPoisson("pdfGenPoissonP2","Poisson modelling hadron number generation - P2", *nHad, *meanVarP2); pdfGenPoissonP3 = new RooPoisson("pdfGenPoissonP3","Poisson modelling hadron number generation - P3", *nHad, *meanVarP3); cout << "====> Generating Poisson pdfs are initialised." << endl; // Get the 1D keys xi pdfs pdfGenXiP1 = (RooKeysPdf*)wspace->pdf("pdfkeys_"+sigFile+"_xi_P1"); pdfGenXiP2 = (RooKeysPdf*)wspace->pdf("pdfkeys_"+sigFile+"_xi_P2"); pdfGenXiP3 = (RooKeysPdf*)wspace->pdf("pdfkeys_"+sigFile+"_xi_P3"); } //i == 1 : background generation if(i == 1) { // cout << "====> Generation " << i << endl; wspace = (RooWorkspace*)f_bkg->Get("w"); xi = (RooRealVar*)wspace->var("xi"); hpm = (RooRealVar*)wspace->var("hadronPlaneMomentum"); nHad = (RooRealVar*)wspace->var("nHadrons"); //Use this to get the mean value - delete at the end of the if statement after mean is accessed and RooPoisson is allocated double meanP1 = wspace->data("data_"+bkgFile+"_varExcP1")->mean(*nHad); //! double meanP2 = wspace->data("data_"+bkgFile+"_varExcP2")->mean(*nHad); //! double meanP3 = wspace->data("data_"+bkgFile+"_varExcP3")->mean(*nHad); //! meanVarP1 = new RooRealVar("meanP1","meanP1",meanP1); meanVarP2 = new RooRealVar("meanP2","meanP2",meanP2); meanVarP3 = new RooRealVar("meanP3","meanP3",meanP3); pdfGenPoissonP1 = new RooPoisson("pdfGenPoissonP1","Poisson modelling hadron number generation - P1", *nHad, *meanVarP1); pdfGenPoissonP2 = new RooPoisson("pdfGenPoissonP2","Poisson modelling hadron number generation - P2", *nHad, *meanVarP2); pdfGenPoissonP3 = new RooPoisson("pdfGenPoissonP3","Poisson modelling hadron number generation - P3", *nHad, *meanVarP3); // cout << "====> Generating Poisson pdfs are initialised." << endl; // Get the 1D keys xi pdfs pdfGenXiP1 = (RooKeysPdf*)wspace->pdf("pdfkeys_"+bkgFile+"_xi_P1"); pdfGenXiP2 = (RooKeysPdf*)wspace->pdf("pdfkeys_"+bkgFile+"_xi_P2"); pdfGenXiP3 = (RooKeysPdf*)wspace->pdf("pdfkeys_"+bkgFile+"_xi_P3"); } // cout << "====> Initialisation stage for generation pdfs has ended." << endl; //----------------End of generation pdf initialisation---------------------------// //----------------Start of other pdf initialisation---------------------------// // cout << "====> Begin initialisation of comparison pdfs for likelihood." << endl; RooWorkspace* wspaceOther; RooPoisson* pdfOtherPoissonP1; RooPoisson* pdfOtherPoissonP2; RooPoisson* pdfOtherPoissonP3; RooHistPdf* pdfOther2DP1; RooHistPdf* pdfOther2DP2; RooHistPdf* pdfOther2DP3; RooRealVar* meanOtherVarP1; RooRealVar* meanOtherVarP2; RooRealVar* meanOtherVarP3; RooKeysPdf* pdfOtherXiP1; RooKeysPdf* pdfOtherXiP2; RooKeysPdf* pdfOtherXiP3; // i == 0 : background comparison if( i == 0 ){ // cout << "====> Comparison " << i << endl; wspaceOther = (RooWorkspace*)f_bkg->Get("w"); //Use this to get the mean value - delete at the end of the if statement after mean is accessed and RooPoisson is allocated double meanP1 = wspaceOther->data("data_"+bkgFile+"_varExcP1")->mean(*nHad); //! double meanP2 = wspaceOther->data("data_"+bkgFile+"_varExcP2")->mean(*nHad); //! double meanP3 = wspaceOther->data("data_"+bkgFile+"_varExcP3")->mean(*nHad); //! cout << meanP1 << " " << meanP2 << " " << meanP3 << endl; meanOtherVarP1 = new RooRealVar("meanOtherP1","meanOtherP1",meanP1); meanOtherVarP2 = new RooRealVar("meanOtherP2","meanOtherP2",meanP2); meanOtherVarP3 = new RooRealVar("meanOtherP3","meanOtherP3",meanP3); pdfOtherPoissonP1 = new RooPoisson("pdfOtherPoissonP1","Poisson modelling hadron number for comparison - P1", *nHad, *meanOtherVarP1); pdfOtherPoissonP2 = new RooPoisson("pdfOtherPoissonP2","Poisson modelling hadron number for comparison - P2", *nHad, *meanOtherVarP2); pdfOtherPoissonP3 = new RooPoisson("pdfOtherPoissonP3","Poisson modelling hadron number for comparison - P3", *nHad, *meanOtherVarP3); // cout << "====> Comparison Poisson pdfs are initialised." << endl; // Get the 1D keys xi pdfs pdfOtherXiP1 = (RooKeysPdf*)wspaceOther->pdf("pdfkeys_"+bkgFile+"_xi_P1"); pdfOtherXiP2 = (RooKeysPdf*)wspaceOther->pdf("pdfkeys_"+bkgFile+"_xi_P2"); pdfOtherXiP3 = (RooKeysPdf*)wspaceOther->pdf("pdfkeys_"+bkgFile+"_xi_P3"); } // i == 1 : signal comparison if ( i == 1 ) { // cout << "====> Comparison " << i << endl; wspaceOther = (RooWorkspace*)f_sig->Get("w"); //Use this to get the mean value - delete at the end of the if statement after mean is accessed and RooPoisson is allocated double meanP1 = wspaceOther->data("data_"+sigFile+"_varExcP1")->mean(*nHad); //! double meanP2 = wspaceOther->data("data_"+sigFile+"_varExcP2")->mean(*nHad); //! double meanP3 = wspaceOther->data("data_"+sigFile+"_varExcP3")->mean(*nHad); //! meanOtherVarP1 = new RooRealVar("meanOtherP1","meanOtherP1",meanP1); meanOtherVarP2 = new RooRealVar("meanOtherP2","meanOtherP2",meanP2); meanOtherVarP3 = new RooRealVar("meanOtherP3","meanOtherP3",meanP3); pdfOtherPoissonP1 = new RooPoisson("pdfOtherPoissonP1","Poisson modelling hadron number for comparison - P1", *nHad, *meanOtherVarP1); pdfOtherPoissonP2 = new RooPoisson("pdfOtherPoissonP2","Poisson modelling hadron number for comparison - P2", *nHad, *meanOtherVarP2); pdfOtherPoissonP3 = new RooPoisson("pdfOtherPoissonP3","Poisson modelling hadron number for comparison - P3", *nHad, *meanOtherVarP3); // cout << "====> Comparison Poisson pdfs are initialised." << endl; // Get the 1D keys xi pdfs pdfOtherXiP1 = (RooKeysPdf*)wspaceOther->pdf("pdfkeys_"+sigFile+"_xi_P1"); pdfOtherXiP2 = (RooKeysPdf*)wspaceOther->pdf("pdfkeys_"+sigFile+"_xi_P2"); pdfOtherXiP3 = (RooKeysPdf*)wspaceOther->pdf("pdfkeys_"+sigFile+"_xi_P3"); } // cout << "====> Initialisation stage for comparison pdfs has ended." << endl; //----------------End of other pdf initialisation---------------------------// // Now all the pdfs are initiliased as required. // I need to now generate the toy data // Loop number of toy events // Generate poisson values for number of hadrons -> RooDataSet // Generate values of xi, hpm for each plane accordingly -> RooDataSet // Use ->createNLL to investigate the log-likelihood using the datasets just createed // Do this for the generating pdfs // Do this for the comparison pdfs (other) // Use hist2dgenerate for a template of using this function //Print method to see what is loaded cout <<"====> Loaded objects are as follows. " << endl; xi->Print(); hpm->Print(); nHad->Print(); pdfGenPoissonP1->Print(); pdfOtherPoissonP1->Print(); //Declare non-pointer numbers outside th for loop as I think maybe it generates them on the stack double nllValPoissonP1 = 0; double nllValPoissonP2 = 0; double nllValPoissonP3 = 0; double nllVal2DP1 = 0; double nllVal2DP2 = 0; double nllVal2DP3 = 0; double nllValOtherPoissonP1 = 0; double nllValOtherPoissonP2 = 0; double nllValOtherPoissonP3 = 0; double nllValOther2DP1 = 0; double nllValOther2DP2 = 0; double nllValOther2DP3 = 0; double nllValXiP1 = 0; double nllValXiP2 = 0; double nllValXiP3 = 0; double nllValOtherXiP1 = 0; double nllValOtherXiP2 = 0; double nllValOtherXiP3 = 0; double nllGenSum = 0; double nllOtherSum = 0; double nllNewGenSum = 0; double nllNewOtherSum = 0; double nllRatio = 0; int b_REUSE = 0; //Need to define outside the for loop //-Doing this to try and solve the memory leak issue //-See advise here http://root.cern.ch/phpBB3/viewtopic.php?f=15&t=14113 //-Looks like might have been fixed in the latest latest release (ie newer than my mac version) //-Definately newer than the linappserv version RooAbsReal* nllPoissonP1; RooAbsReal* nllPoissonP2; RooAbsReal* nllPoissonP3; RooAbsReal* nll2DP1; RooAbsReal* nll2DP2; RooAbsReal* nll2DP3; RooAbsReal* nllXiP1; RooAbsReal* nllXiP2; RooAbsReal* nllXiP3; RooAbsReal* nllOtherPoissonP1; RooAbsReal* nllOtherPoissonP2; RooAbsReal* nllOtherPoissonP3; RooAbsReal* nllOther2DP1; RooAbsReal* nllOther2DP2; RooAbsReal* nllOther2DP3; RooAbsReal* nllOtherXiP1; RooAbsReal* nllOtherXiP2; RooAbsReal* nllOtherXiP3; int numToyGen = 4000; //CAN NO LONGER USE THIS DATASET TO EVALUATE THE NLL AS IT HAS MULTIPLE EVENTS IN IT NOW //WILL EVALUATE PROBABILITY USING TMATH::POISSON() RooDataSet* dataGenHadP1 = (RooDataSet*)pdfGenPoissonP1->generate(RooArgSet(*nHad),numToyGen); RooDataSet* dataGenHadP2 = (RooDataSet*)pdfGenPoissonP2->generate(RooArgSet(*nHad),numToyGen); RooDataSet* dataGenHadP3 = (RooDataSet*)pdfGenPoissonP3->generate(RooArgSet(*nHad),numToyGen); //Just generate one data set rather than loads incase this helps speed for (int j = 0; j < numToyGen; j++){ // Generate number of hadrons in plane RooArgSet* setHadP1 = (RooArgSet*)dataGenHadP1->get(j); nHad = (RooRealVar*)setHadP1->find(nHad->GetName()); int genHadP1 = static_cast(nHad->getVal()); RooArgSet* setHadP2 = (RooArgSet*)dataGenHadP2->get(j); nHad = (RooRealVar*)setHadP2->find(nHad->GetName()); int genHadP2 = static_cast(nHad->getVal()); RooArgSet* setHadP3 = (RooArgSet*)dataGenHadP3->get(j); nHad = (RooRealVar*)setHadP3->find(nHad->GetName()); int genHadP3 = static_cast(nHad->getVal()); cout << "====> Generated number of hadrons : " << genHadP1 << " - " << genHadP2 << " - " << genHadP3 << endl; //----Generate xi data only RooDataSet* dataGenXiP1 = (RooDataSet*)pdfGenXiP1->generate(*xi,genHadP1); //!Might be intereting to check the range of the generated variables? RooDataSet* dataGenXiP2 = (RooDataSet*)pdfGenXiP2->generate(*xi,genHadP2); RooDataSet* dataGenXiP3 = (RooDataSet*)pdfGenXiP3->generate(*xi,genHadP3); if(b_REUSE == 0){ // -------Calculate NLLs // Generator pdfs nllXiP1 = pdfGenXiP1->createNLL(*dataGenXiP1); nllXiP2 = pdfGenXiP2->createNLL(*dataGenXiP2); nllXiP3 = pdfGenXiP3->createNLL(*dataGenXiP3); //Comparison pdfs nllOtherXiP1 = pdfOtherXiP1->createNLL(*dataGenXiP1); nllOtherXiP2 = pdfOtherXiP2->createNLL(*dataGenXiP2); nllOtherXiP3 = pdfOtherXiP3->createNLL(*dataGenXiP3); b_REUSE = 1; // for the remaining iterations } //Reuse the NLL - don't delete pointer //On the advice of Lorenzo, to fix the bug with 5.32+ on the RooPoisson evaluation // which I want to use to take advantage of memeory leak fixes // I need to either recreate the NLL each iteration, or use kTRUE in setDATA to // to clone the dataset. //This is what I am doing for now. if(b_REUSE == 1){ nllXiP1->setData(*dataGenXiP1, kTRUE); nllXiP2->setData(*dataGenXiP2, kTRUE); nllXiP3->setData(*dataGenXiP3, kTRUE); nllOtherXiP1->setData(*dataGenXiP1, kTRUE); nllOtherXiP2->setData(*dataGenXiP2, kTRUE); nllOtherXiP3->setData(*dataGenXiP3, kTRUE); } //Values of NLL //COUT to check that this does return the fixed const mean value used in generation // cout << meanVarP1->getVal() << " " << meanVarP2->getVal() << " " << meanVarP3->getVal() << endl; // cout << meanOtherVarP1->getVal() << " " << meanOtherVarP2->getVal() << " " << meanOtherVarP3->getVal() << endl; //Calculate the neg log likelihood by hand nllValPoissonP1 = -log(TMath::Poisson(genHadP1,meanVarP1->getVal())); nllValPoissonP2 = -log(TMath::Poisson(genHadP2,meanVarP2->getVal())); nllValPoissonP3 = -log(TMath::Poisson(genHadP3,meanVarP3->getVal())); nllValOtherPoissonP1 = -log(TMath::Poisson(genHadP1, meanOtherVarP1->getVal())); nllValOtherPoissonP2 = -log(TMath::Poisson(genHadP2, meanOtherVarP2->getVal())); nllValOtherPoissonP3 = -log(TMath::Poisson(genHadP3, meanOtherVarP3->getVal())); nllValXiP1 = nllXiP1->getVal(); nllValXiP2 = nllXiP2->getVal(); nllValXiP3 = nllXiP3->getVal(); nllValOtherXiP1 = nllOtherXiP1->getVal(); nllValOtherXiP2 = nllOtherXiP2->getVal(); nllValOtherXiP3 = nllOtherXiP3->getVal(); cout << "====> Generated NLLs (-lnL) : " << nllValPoissonP1 << " - " << nllValPoissonP2 << " - " << nllValPoissonP3 << " | " << nllVal2DP1 << " - " << nllVal2DP2 << " - " << nllVal2DP3 << endl; cout << "====> Comparison NLLs (-lnL) : " << nllValOtherPoissonP1 << " - " << nllValOtherPoissonP2 << " - " << nllValOtherPoissonP3 << " | " << nllValOther2DP1 << " - " << nllValOther2DP2 << " - " << nllValOther2DP3 << endl; // Calculate the log-likelihood ratio // NOTE need to use i == 0/1 to flip the ratio around I think nllGenSum = nllValPoissonP1+nllValPoissonP2+nllValPoissonP3; nllOtherSum = nllValOtherPoissonP1+nllValOtherPoissonP2+nllValOtherPoissonP3; nllNewGenSum = nllValPoissonP1+nllValPoissonP2+nllValPoissonP3+nllValXiP1+nllValXiP2+nllValXiP3; nllNewOtherSum = nllValOtherPoissonP1+nllValOtherPoissonP2+nllValOtherPoissonP3+nllValOtherXiP1+nllValOtherXiP2+nllValOtherXiP3; //As the ratio should always be taken the same way (ie L(x|s)/L(x|b), need to flip ratio when gen is bkg // ln[L(s)/L(b)] = ln(L(x|s)) - ln(L(x|b)) = -nllGenSum - (-nllOtherSum) as createNLL is -ln(L) if (i == 0){nllRatio = nllOtherSum - nllGenSum;h_tSig->Fill(nllRatio);} if (i == 1){nllRatio = nllGenSum - nllOtherSum;h_tBkg->Fill(nllRatio);} cout << "====> Full ln[L(x|s)/L(x|b)] : " << nllRatio << endl; nllRatio = 0; if(i==0){nllRatio = nllNewOtherSum - nllNewGenSum; h_tSigCheck->Fill(nllRatio); } if(i==1){nllRatio = nllNewGenSum - nllNewOtherSum; h_tBkgCheck->Fill(nllRatio); } cout << "====> nHad and Xi ln[L(x|s)/L(x|b)] : " << nllRatio << endl; cout << nllValXiP1+nllValXiP2+nllValXiP3 << "--" << nllValOtherXiP1+nllValOtherXiP2+nllValOtherXiP3 << endl; // Plot value into histogram within if statement for sig/bkg data // Rinse and repeat //Delete remainig pointers (if possible) // delete setHadP1, setHadP2, setHadP3;//These can be deleted though // delete dataGenXiHpmP1, dataGenXiHpmP2, dataGenXiHpmP3; //These can be deleted (at least before I use them for anything) // delete dataGenHadP1, dataGenHadP2, dataGenHadP3;//Deleting the RooDataSet causes segfault???Not now? } //---Deletion of pointers to prevent memory leaks delete pdfGenPoissonP1, pdfGenPoissonP2, pdfGenPoissonP3; delete pdfOtherPoissonP1, pdfOtherPoissonP2, pdfOtherPoissonP3; delete xi, hpm; /* delete wspace; //SegFault if I delete the pointer for workspace??? delete wspaceOther; */ } f_bkg->Close(); f_sig->Close(); f_results->Write(); f_results->Close(); //RooTrace::dump(); return 0; }