Source code for ml4eft.core.th_predictions

"""Module to compute theory predictions in the SMEFT"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
import ml4eft.analyse.analyse as analyse
from ml4eft.core.truth import vh_prod, tt_prod
from collections import defaultdict


[docs]class TheoryPred: """ SMEFT theory calculator """
[docs] def __init__(self, path_to_theory_pred, bins=None): """ TheoryPred Constructor Parameters ---------- path_to_theory_pred: dict Nested dictionary that contains the paths to the events in the SM and the EFT. It must be of the form {'sm': ``<path_to_sm_data>``, 'lin': {'c1': ``<path_to_c1_data>``, 'c2': ``<path_to_c2_data>`` }, 'quad': {'c1_c1': ``<path_to_c1_c1_data>``, 'c1_c2': ``<path_to_c1_c1_data>``, 'c2_c2': ``<path_to_c1_c1_data>``}} bins: dict, optional Dictionary that specifies the binning per kinematic (keys) Examples -------- We consider :math:`pp \\rightarrow t\\bar{t} \\rightarrow \\ell^+\\ell^-\\nu_\\ell\\bar{\\nu}_\\ell b\\bar{b}` and compute SMEFT theory predictions for :math:`c_{tG}` and :math:`c_{tu}^{(8)}` in the bins :math:`m_{tt}\in[1450, 1500, 1600, 1700, 2000, \infty)` GeV Specify the binning (represent infinity by a large number) and initialise a TheoryPred object: >>> bins = [1450, 1500, 1600, 1700, 2000, 10000] >>> th_predictor = TheoryPred(path_to_theory_pred, kinematic="sqrts_hat", bins=bins) SMEFT predictions can now be computed by >>> th_predictor.build_theory_pred_df() >>> th_predictor.th_dict {'sm': array([0.00579859, 0.00844186, 0.00556653, 0.00799101, 0.00446887]), 'lin': {'ctGRe': array([-0.00176097, -0.00260213, -0.00173878, -0.00256058, -0.00140833]), 'ctu8': array([0.00039829, 0.0006311 , 0.00045633, 0.00075374, 0.0005058 ])}, 'quad': {'ctu8_ctu8': array([0.00033827, 0.0005906 , 0.00048487, 0.00100111, 0.00123616]), 'ctGRe_ctGRe': array([0.00090503, 0.00139715, 0.00099753, 0.00164754, 0.0013285 ]), 'ctGRe_ctu8': array([-6.63046602e-05, -1.10171981e-04, -8.56486609e-05, -1.57056872e-04,-1.38448708e-04])}} """ self.path_to_theory_pred = path_to_theory_pred self.bins = bins self.c_names = [] self.th_dict = defaultdict(dict) self.build_theory_pred_df() self.c_names_unique = self.get_c_names_unique()
[docs] def build_theory_pred_df(self): """ Builds a SMEFT theory prediction dictionary """ for order, dict_fo in self.path_to_theory_pred.items(): if order == 'sm': xsec_sm = self.compute_th_pred(path_to_events=dict_fo) self.th_dict[order] = xsec_sm else: for c_name, path_to_events in dict_fo.items(): xsec_eft = self.compute_th_pred(path_to_events) # read EFT value at which the events have been generated with open(os.path.join(path_to_events, 'info.txt')) as f: c_value = np.array([int(c) for c in f.read().split()]) if order == 'lin': self.th_dict[order][c_name] = (xsec_eft - self.th_dict['sm']) / c_value[0] elif order == 'quad': if 0 in c_value: c_value = np.sum(c_value) ** 2 else: c_value = np.prod(c_value) self.th_dict[order][c_name] = (xsec_eft - self.th_dict['sm']) / c_value if c_name not in self.c_names: self.c_names.append(c_name)
[docs] def get_c_names_unique(self): """ Returns a unique list of EFT parameters in which each parameter only appears once Returns ------- array_like ```(N, ) ndarray``` array of unique EFT parameters present in `path_to_theory_pred` """ c_names = [] if len(self.c_names) > 0: for c_name in self.c_names: if '_' in c_name: c_names.extend(c_name.split('_', 1)) else: c_names.append(c_name) c_names = np.array(c_names) return np.unique(c_names)
[docs] def compute_th_pred(self, path_to_events): """ Computes cross-section in the SMEFT for a given binning if specified in `bins`. Otherwise it returns an average of the total cross section averaged over the available replicas. Parameters ---------- path_to_events: str Path to events, e.g. ``tt_llvlvlbb/tt_ctGRe`` Returns ------- array_like Cross section per bin averaged over the available replicas """ # path to events events_paths = analyse.Analyse.get_event_paths(path_to_events) # store the xsec per bin for all the replicas xsec_collected = [] for path in events_paths: events, tot_xsec = analyse.Analyse.load_events(event_path=path) if self.bins is None: xsec_i = tot_xsec else: n_tot = len(events) if len(self.bins) == 1: kin, = self.bins.keys() x = events[kin].values n_i, _, = np.histogram(x, self.bins[kin]) elif len(self.bins) == 2: kin_1, kin_2 = self.bins.keys() x = events[kin_1].values y = events[kin_2].values n_i, _, _ = np.histogram2d(x, y, bins=(self.bins[kin_1], self.bins[kin_2])) xsec_i = (n_i / n_tot) * tot_xsec xsec_collected.append(xsec_i) xsec_collected = np.array(xsec_collected) # average over the replicas return np.mean(xsec_collected, axis=0)
[docs] def compute_diff_coefficients(self, optimizer): """ Computes unbinned SMEFT theory predictions for the analytical models, i.e. :math:`t\\bar{t}` or :math:`ZH` Parameters ---------- optimizer: :class:`ml4eft.limits.optimize_ns.Optimize` Optimizer object Returns ------- dict unbinned SMEFT predictions """ events = optimizer.observed_data features = optimizer.th_features n_features = len(features) if optimizer.process == 'ZH': dsigma_dx_sm = np.array( [vh_prod.dsigma_dmvh_dy(row['y'], row['m_zh'], 0, 0, lin=True, quad=False) for index, row in observed_data.iterrows()]) dsigma_dx_c1 = np.array( [vh_prod.dsigma_dmvh_dy(row['y'], row['m_zh'], 10, 0, lin=True, quad=False) for index, row in observed_data.iterrows()]) dsigma_dx_c1_lin_coef = (dsigma_dx_c1 - dsigma_dx_sm) / 10 dsigma_dx_c2 = np.array( [vh_prod.dsigma_dmvh_dy(row['y'], row['m_zh'], 0, 10, lin=True, quad=False) for index, row in observed_data.iterrows()]) dsigma_dx_c2_lin_coef = (dsigma_dx_c2 - dsigma_dx_sm) / 10 dsigma_dx_eft = {'lin': np.stack((dsigma_dx_c1_lin_coef, dsigma_dx_c2_lin_coef))} elif optimizer.process == 'ttparton': if n_features == 1: dsigma_dx = tt_prod.dsigma_dmtt if n_features == 2: dsigma_dx = tt_prod.dsigma_dmtt_dy if n_features == 3: dsigma_dx = tt_prod.dsigma_dmtt_dy_dpt dsigma_dx_sm = np.array([dsigma_dx(*row[features]) for _, row in events.iterrows()]) dsigma_dx_c1 = np.array([dsigma_dx(*row[features], [-10, 0], "lin") for _, row in events.iterrows()]) dsigma_dx_c2 = np.array( [dsigma_dx(*row[features], [0, 10], "lin") for _, row in events.iterrows()]) dsigma_dx_c1_quad = np.array( [dsigma_dx(*row[features], [-10, 0], "quad") for _, row in events.iterrows()]) dsigma_dx_c2_quad = np.array( [dsigma_dx(*row[features], [0, 10], "quad") for _, row in events.iterrows()]) dsigma_dx_c1_lin_coef = (dsigma_dx_c1 - dsigma_dx_sm) / (-10) dsigma_dx_c2_lin_coef = (dsigma_dx_c2 - dsigma_dx_sm) / 10 dsigma_dx_c1_quad_coef = (dsigma_dx_c1_quad - dsigma_dx_sm) / 100 dsigma_dx_c2_quad_coef = (dsigma_dx_c2_quad - dsigma_dx_sm) / 100 dsigma_dx = {'sm': dsigma_dx_sm, 'lin': {'ctGRe': dsigma_dx_c1_lin_coef, 'ctu8': dsigma_dx_c2_lin_coef}, 'quad': {'ctGRe_ctGRe': dsigma_dx_c1_quad_coef, 'ctu8_ctu8': dsigma_dx_c2_quad_coef}} return dsigma_dx