13.2.17. Macro “dataserverPCAExample.C”
13.2.17.1. Objective
The goal of this macro is to show how to handle a PCA analysis. It is not much discussed here, as a large description of both methods and concepts is done in Combining these aspects: performing PCA.
13.2.17.2. Macro Uranie
// Read the database
TDataServer * tdsPCA = new TDataServer("tdsPCA", "my TDS");
tdsPCA->fileDataRead("Notes.dat");
// Create the PCA object precising the variables of interest
TPCA * tpca = new TPCA(tdsPCA, "Maths:Physics:French:Latin:Music");
tpca->compute();
bool graphical=true; // do graphs
bool dumponscreen=true; //or dumping results
bool showcoordinate=false; // show the coordinate of the points while dumping results
if(graphical) {
// Draw all point in PCA planes
TCanvas *cPCA = new TCanvas("cpca", "PCA",800,800);
TPad *apad1 = new TPad("apad1","apad1",0, 0.03, 1, 1); apad1->Draw(); apad1->cd();
apad1->Divide(2,2);
apad1->cd(1); tpca->drawPCA(1,2,"Pupil");
apad1->cd(3); tpca->drawPCA(1,3,"Pupil");
apad1->cd(4); tpca->drawPCA(2,3,"Pupil");
// Draw all variable weight in PC definition
TCanvas *cLoading = new TCanvas("cLoading", "Loading Plot",800,800);
TPad *apad2 = new TPad("apad2","apad2",0, 0.03, 1, 1); apad2->Draw(); apad2->cd();
apad2->Divide(2,2);
apad2->cd(1); tpca->drawLoading(1,2);
apad2->cd(3); tpca->drawLoading(1,3);
apad2->cd(4); tpca->drawLoading(2,3);
// Draw the eigen values in different normalisation
TCanvas *c = new TCanvas("cEigenValues", "Eigen Values Plot",1100,500);
TPad *apad3 = new TPad("apad3","apad3",0, 0.03, 1, 1);
apad3->Draw();
apad3->cd();
apad3->Divide(3,1);
TNtupleD *ntd = tpca->getResultNtupleD();
apad3->cd(1); ntd->Draw("eigen:i","","lp");
apad3->cd(2); ntd->Draw("eigen_pct:i","","lp"); gPad->SetGrid();
apad3->cd(3); ntd->Draw("sum_eigen_pct:i","","lp"); gPad->SetGrid();
}
if(dumponscreen)
{
int nPCused=5; // 3 to see only the meaningful ones
TString PCname="", Cosname="", Contrname="", Variable="Pupil";
for (unsigned int iatt=1; iatt<=nPCused; iatt++)
{
//list of PC: PC_1:PC_2:PC_3:PC_4:PC_5
PCname+=Form("PC_%d",iatt)+((iatt!=nPCused)?TString(":"):TString(""));
//list of quality coeff: cosca_1:cosca_2:cosca_3:cosca_4:cosca_5
Cosname+=Form("cosca_%d",iatt)+((iatt!=nPCused)?TString(":"):TString(""));
//list of contribution: contr_1:contr_2:contr_3:contr_4:contr_5
Contrname+=Form("contr_%d",iatt)+((iatt!=nPCused)?TString(":"):TString(""));
}
cout<<endl<<"========= EigenValues ====================="<<endl;
tpca->getResultNtupleD()->Scan("*");
if(showcoordinate)
{
cout<<endl<<"========= EigenVectors ====================="<<endl;
tpca->_matEigenVectors.Print();
cout<<endl<<"========= New Coordinates ====================="<<endl;
tdsPCA->scan( (Variable+":"+PCname).Data() );
}
TDSNtupleD *varRes = tpca->getVariableResultNtupleD();
cout<<endl<<"=============== Looking at variables: Quality of representation ====================="<<endl;
varRes->Scan(("Variable:"+Cosname).Data());
cout<<endl<<"==================== Looking at variables: Contribution to axis ====================="<<endl;
varRes->Scan(("Variable:"+Contrname).Data());
cout<<endl<<"================== Looking at events: Quality of representation ===================="<<endl;
tdsPCA->scan((Variable+":"+Cosname).Data());
cout<<endl<<"===================== Looking at events: Contribution to axis ======================="<<endl;
tdsPCA->scan((Variable+":"+Contrname).Data());
}
This first part of this macro is described in Combining these aspects: performing PCA. We will focus here on the second
part, where all the numerical results are dumped on screen. These results are stored in the dataserver
for the points and in a dedicated ntuple for the variable that can be retrieved by calling the method
getVariableResultNtupleD. In both cases, the results can be split into two kinds:
the quality of the representation: it is called “cosca_X” as it is a squared cosinus of the projection of the source under study (point or subject) on the X-th PC.
the contribution to axis: it is called “contr_X” as it is the contribution of the source under study (point or subject) to the definition of the X-th PC.
We start by defining the list of variables that one might want to display
int nPCused=5; // 3 to see only the meaningful ones
TString PCname="", Cosname="", Contrname="", Variable="Pupil";
for (unsigned int iatt=1; iatt<=nPCused; iatt++)
{
//list of PC: PC_1:PC_2:PC_3:PC_4:PC_5
PCname+=Form("PC_%d",iatt)+((iatt!=nPCused)?TString(":"):TString(""));
//list of quality coeff: cosca_1:cosca_2:cosca_3:cosca_4:cosca_5
Cosname+=Form("cosca_%d",iatt)+((iatt!=nPCused)?TString(":"):TString(""));
//list of contribution: contr_1:contr_2:contr_3:contr_4:contr_5
Contrname+=Form("contr_%d",iatt)+((iatt!=nPCused)?TString(":"):TString(""));
}
From there, once the variable ntuple is retrieved, one can dump both the quality and contribution coefficients for the variable, here the subjects (it leads to the second and third block in the output shown in Console).
TDSNtupleD *varRes = tpca->getVariableResultNtupleD();
cout<<endl<<"=============== Looking at variables: Quality of representation ====================="<<endl;
varRes->Scan(("Variable:"+Cosname).Data());
cout<<endl<<"==================== Looking at variables: Contribution to axis ====================="<<endl;
varRes->Scan(("Variable:"+Contrname).Data());
Finally, one can do the same for the data points, with the same Scan method (which lead to the
fourth and fifth block in the output shown in Console).
cout<<endl<<"================== Looking at events: Quality of representation ===================="<<endl;
tdsPCA->scan((Variable+":"+Cosname).Data());
cout<<endl<<"===================== Looking at events: Contribution to axis ======================="<<endl;
tdsPCA->scan((Variable+":"+Contrname).Data());
13.2.17.3. Console
========= EigenValues =====================
************************************************************
* Row * i.i * eigen.eig * eigen_pct * sum_eigen *
************************************************************
* 0 * 1 * 2.8618175 * 57.236350 * 57.236350 *
* 1 * 2 * 1.1506811 * 23.013622 * 80.249973 *
* 2 * 3 * 0.9831407 * 19.662814 * 99.912787 *
* 3 * 4 * 0.0039371 * 0.0787424 * 99.991530 *
* 4 * 5 * 0.0004234 * 0.0084696 * 100 *
************************************************************
=============== Looking at variables: Quality of representation =====================
************************************************************************************
* Row * Variable * cosca_1 * cosca_2 * cosca_3 * cosca_4 * cosca_5 *
************************************************************************************
* 0 * Maths * 0.649485 * 0.32645 * 0.0235449 * 0.0003614 * 0.0001582 *
* 1 * Physics * 0.804627 * 0.185581 * 0.0086212 * 0.0010515 * 0.0001193 *
* 2 * French * 0.574697 * 0.373378 * 0.0509446 * 0.0008976 * 8.252e-05 *
* 3 * Latin * 0.828562 * 0.157999 * 0.0117556 * 0.0016205 * 6.335e-05 *
* 4 * Music * 0.0044470 * 0.107272 * 0.888274 * 5.976e-06 * 8.299e-08 *
************************************************************************************
==================== Looking at variables: Contribution to axis =====================
************************************************************************************
* Row * Variable * contr_1 * contr_2 * contr_3 * contr_4 * contr_5 *
************************************************************************************
* 0 * Maths * 0.226948 * 0.283702 * 0.0239486 * 0.0917987 * 0.373602 *
* 1 * Physics * 0.281159 * 0.16128 * 0.0087691 * 0.267082 * 0.28171 *
* 2 * French * 0.200815 * 0.324485 * 0.0518182 * 0.228004 * 0.194878 *
* 3 * Latin * 0.289523 * 0.137309 * 0.0119572 * 0.411598 * 0.149613 *
* 4 * Music * 0.0015539 * 0.0932252 * 0.903507 * 0.0015180 * 0.0001959 *
************************************************************************************
================== Looking at events: Quality of representation ====================
************************************************************************************
* Row * Pupil * cosca_1 * cosca_2 * cosca_3 * cosca_4 * cosca_5 *
************************************************************************************
* 0 * Jean * 0.8854534 * 0.0522119 * 0.0619429 * 0.0002655 * 0.0001260 *
* 1 * Aline * 0.7920409 * 0.0542262 * 0.1530381 * 0.0006354 * 5.916e-05 *
* 2 * Annie * 0.4784294 * 0.4813342 * 0.0384099 * 0.0018007 * 2.560e-05 *
* 3 * Monique * 0.8785990 * 0.0024790 * 0.1180158 * 0.0009035 * 2.557e-06 *
* 4 * Didier * 0.8515216 * 0.1382946 * 0.0079754 * 0.0021718 * 3.640e-05 *
* 5 * Andre * 0.2465355 * 0.3961581 * 0.3567663 * 9.471e-05 * 0.0004451 *
* 6 * Pierre * 0.0263090 * 0.7670958 * 0.2060832 * 0.0004625 * 4.925e-05 *
* 7 * Brigitte * 0.1876629 * 0.5897686 * 0.2211409 * 0.0013390 * 8.836e-05 *
* 8 * Evelyne * 0.0583185 * 0.3457931 * 0.5953786 * 0.0004600 * 4.957e-05 *
************************************************************************************
===================== Looking at events: Contribution to axis =======================
************************************************************************************
* Row * Pupil * contr_1 * contr_2 * contr_3 * contr_4 * contr_5 *
************************************************************************************
* 0 * Jean * 0.3012931 * 0.0441856 * 0.0613538 * 0.0656820 * 0.2897335 *
* 1 * Aline * 0.0618830 * 0.0105370 * 0.0348056 * 0.0360894 * 0.0312404 *
* 2 * Annie * 0.0401366 * 0.1004284 * 0.0093797 * 0.1098075 * 0.0145176 *
* 3 * Monique * 0.3784615 * 0.0026558 * 0.1479781 * 0.2828995 * 0.0074454 *
* 4 * Didier * 0.1484067 * 0.0599446 * 0.0040461 * 0.2751439 * 0.0428726 *
* 5 * Andre * 0.0348742 * 0.1393737 * 0.1469047 * 0.0097384 * 0.4255425 *
* 6 * Pierre * 0.0041001 * 0.2973223 * 0.0934888 * 0.0524003 * 0.0518702 *
* 7 * Brigitte * 0.0157710 * 0.1232678 * 0.0540974 * 0.0817989 * 0.0501849 *
* 8 * Evelyne * 0.0150734 * 0.2222843 * 0.4479453 * 0.0864396 * 0.0865925 *
************************************************************************************