# -*- coding: utf-8 -*-
import pathlib
import shutil
import numpy as np
import pandas as pd
import yaml
from ..compute_theory import make_predictions
from ..covmat import covmat_from_systematics
from ..loader import Loader, load_datasets
from ..log import logging
_logger = logging.getLogger(__name__)
[docs]
class Projection:
def __init__(
self,
commondata_path,
theory_path,
dataset_names,
projections_path,
coefficients,
order,
use_quad,
use_theory_covmat,
rot_to_fit_basis,
fred_tot,
fred_sys,
use_t0,
):
self.commondata_path = commondata_path
self.theory_path = theory_path
self.dataset_names = dataset_names
self.projections_path = projections_path
self.coefficients = coefficients
self.order = order
self.use_quad = use_quad
self.use_theory_covmat = use_theory_covmat
self.rot_to_fit_basis = rot_to_fit_basis
self.fred_tot = fred_tot
self.fred_sys = fred_sys
self.use_t0 = use_t0
self.datasets = load_datasets(
self.commondata_path,
self.dataset_names,
self.coefficients,
self.order,
self.use_quad,
self.use_theory_covmat,
self.use_t0,
False,
theory_path=self.theory_path,
)
if self.coefficients:
_logger.info(
f"Some coefficients are specified in the runcard: EFT correction will be used for the central values"
)
[docs]
@classmethod
def from_config(cls, projection_card):
"""
Returns the class Projection
Parameters
----------
projection_card: pathlib.Path
path to projection runcard
Returns
-------
Projection class
"""
with open(projection_card, encoding="utf-8") as f:
projection_config = yaml.safe_load(f)
commondata_path = pathlib.Path(projection_config["commondata_path"]).absolute()
theory_path = pathlib.Path(projection_config["theory_path"]).absolute()
projections_path = pathlib.Path(
projection_config["projections_path"]
).absolute()
dataset_names = projection_config["datasets"]
coefficients = projection_config.get("coefficients", [])
order = projection_config.get("order", "LO")
use_quad = projection_config.get("use_quad", False)
use_theory_covmat = projection_config.get("use_theory_covmat", True)
rot_to_fit_basis = projection_config.get("rot_to_fit_basis", None)
fred_tot = projection_config.get("fred_tot", 1)
fred_sys = projection_config.get("fred_sys", 1)
use_t0 = projection_config.get("use_t0", False)
return cls(
commondata_path,
theory_path,
dataset_names,
projections_path,
coefficients,
order,
use_quad,
use_theory_covmat,
rot_to_fit_basis,
fred_tot,
fred_sys,
use_t0,
)
[docs]
def compute_cv_projection(self):
"""
Computes the new central value under the EFT hypothesis (is SM when coefficients are zero)
Returns
-------
cv : numpy.ndarray
SM + EFT theory predictions
"""
cv = self.datasets.SMTheory
if self.coefficients:
coefficient_values = []
for coeff in self.datasets.OperatorsNames:
coefficient_values.append(self.coefficients[coeff]["value"])
cv = make_predictions(
self.datasets, coefficient_values, self.use_quad, False
)
return cv
[docs]
def rescale_sys(self, sys, fred_sys):
"""
Projects the systematic uncertainties by reducing them by fred_sys
Parameters
----------
sys: numpy.ndarray
systematics
fred: float
Systematics reduction factor
Returns
-------
Projected systematic uncertainties
"""
# check whether the systematics are artificial (i.e. no breakdown of separate systematic sources),
# characterised by a square matrix and negative entries
is_square = sys.shape[0] == sys.shape[1]
is_artificial = is_square & np.any(sys < 0)
if is_artificial:
# reconstruct covmat and keep only diagonal components
cov_tot = sys @ sys.T
sys_diag = np.sqrt(np.diagonal(cov_tot))
# rescale systematics and make df
sys_rescaled = np.diag(sys_diag * fred_sys)
return pd.DataFrame(sys_rescaled, index=sys.index, columns=sys.columns)
return sys * fred_sys
[docs]
@staticmethod
def rescale_stat(stat, lumi_old, lumi_new):
"""
Projects the statistical uncertainties from lumi_old to lumi_new
Parameters
----------
stat: numpy.ndarray
old statistical uncertainties
lumi_old: float
Old luminosity
lumi_new: float
New luminosity
Returns
-------
Updated statistical uncertainties after projection
"""
fred_stat = np.sqrt(lumi_old / lumi_new)
return stat * fred_stat
[docs]
def build_projection(self, lumi_new, closure):
"""
Constructs runcard for projection by updating the central value and statistical and
systematic uncertainties
Parameters
----------
lumi_new: float
Adjusts the statistical uncertainties according to the specified luminosity
closure: bool
Set to true for a L1 closure test (no rescaling, only cv gets fluctuated according to
original uncertainties)
"""
# compute central values under projection
cv = self.compute_cv_projection()
cnt = 0
for dataset_idx, num_data in enumerate(self.datasets.NdataExp):
dataset_name = self.datasets.ExpNames[dataset_idx]
path_to_dataset = self.commondata_path / f"{dataset_name}.yaml"
_logger.info(f"Building projection for : {dataset_name}")
try:
with open(path_to_dataset, encoding="utf-8") as f:
data_dict = yaml.safe_load(f)
except FileNotFoundError:
_logger.info(f"Dataset {dataset_name} not in the database")
continue
idxs = slice(cnt, cnt + num_data)
central_values = np.array(data_dict["data_central"])
cv_theory = cv[idxs]
# ratio SM to experimental central value
ratio_sm_exp = cv_theory / central_values
# set negative ratios to one
ratio_sm_exp[ratio_sm_exp < 0] = 1
# rescale the statistical uncertainty to the SM
stat = np.asarray(data_dict["statistical_error"]) * np.sqrt(ratio_sm_exp)
# load systematics
num_sys = data_dict["num_sys"]
sys_add = np.array(data_dict["systematics"])
if num_sys != 0:
type_sys = np.array(data_dict["sys_type"])
name_sys = data_dict["sys_names"]
# express systematics as percentage values of the central values
sys_mult = sys_add / central_values * 1e2
# Identify add and mult systematics
# and replace the mult ones with corresponding value computed
# from data central value. Required for implementation of t0 prescription
indx_add = np.where(type_sys == "ADD")[0]
indx_mult = np.where(type_sys == "MULT")[0]
sys_t0 = np.zeros((num_sys, num_data))
sys_t0[indx_add] = sys_add[indx_add].reshape(sys_t0[indx_add].shape)
sys_t0[indx_mult] = (
sys_mult[indx_mult].reshape(sys_t0[indx_mult].shape)
* cv_theory
* 1e-2
)
# limit case with 1 sys
if num_sys == 1:
name_sys = [name_sys]
# limit case no sys
else:
name_sys = ["UNCORR"]
sys_t0 = np.zeros((num_sys + 1, num_data))
# these are absolute systematics
sys = pd.DataFrame(data=sys_t0.T, columns=name_sys)
n_sys = data_dict["num_sys"]
th_covmat = self.datasets.ThCovMat[idxs, idxs]
if not closure:
# if all stats are zero, we only have access to the total error which we rescale by 1/3 (compromise)
no_stats = not np.any(stat)
if no_stats:
fred = self.fred_tot
sys_red = self.rescale_sys(sys, fred)
stat_red = stat
# if separate stat and sys
else:
fred = self.fred_sys
lumi_old = self.datasets.Luminosity[dataset_idx]
stat_red = self.rescale_stat(stat, lumi_old, lumi_new)
sys_red = self.rescale_sys(sys, fred)
if num_data > 1:
data_dict["systematics"] = sys_red.T.values.tolist()
else:
data_dict["systematics"] = sys_red.T.values.flatten().tolist()
if data_dict["sys_type"] is not None:
data_dict["sys_type"] = ["ADD"] * n_sys if n_sys > 1 else "ADD"
data_dict["statistical_error"] = stat_red.tolist()
# build covmat for projections. Use rescaled uncertainties
newcov = covmat_from_systematics([stat_red], [sys_red])
else: # closure test
# we store absolute uncertainties and convert all multipicative uncertainties to additive ones
if num_data > 1:
data_dict["systematics"] = sys.T.values.tolist()
else:
data_dict["systematics"] = sys.T.values.flatten().tolist()
if data_dict["sys_type"] is not None:
data_dict["sys_type"] = ["ADD"] * n_sys if n_sys > 1 else "ADD"
data_dict["statistical_error"] = stat.tolist()
newcov = covmat_from_systematics([stat], [sys])
if self.use_theory_covmat:
newcov += th_covmat
# add L1 noise to cv
cv_projection = np.random.multivariate_normal(cv[idxs], newcov)
# replace cv with updated central values
if len(cv_projection) > 1:
data_dict["data_central"] = cv_projection.tolist()
else:
data_dict["data_central"] = float(cv_projection[0])
projection_folder = self.projections_path
projection_folder.mkdir(exist_ok=True)
if projection_folder != self.commondata_path:
if not closure:
with open(
f"{projection_folder}/{dataset_name}_proj.yaml", "w"
) as file:
yaml.dump(data_dict, file, sort_keys=False)
else:
with open(f"{projection_folder}/{dataset_name}.yaml", "w") as file:
yaml.dump(data_dict, file, sort_keys=False)
else:
print(
"Choose a different projection folder from commondata to avoid overwriting results"
)
sys.exit()
# copy corresponding theory predictions with _proj appended to filename
if not closure:
shutil.copy(
self.theory_path / f"{dataset_name}.json",
self.theory_path / f"{dataset_name}_proj.json",
)
cnt += num_data