Source code for smefit.chi2

# -*- coding: utf-8 -*-
"""Module for the computation of chi-squared values."""
import json

import jax.numpy as jnp
import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as opt
from rich.progress import track

from . import compute_theory as pr
from .coefficients import CoefficientManager
from .loader import DataTuple, load_datasets
from .log import logging

_logger = logging.getLogger(__name__)


[docs] def compute_chi2( dataset, coefficients_values, use_quad, use_multiplicative_prescription, use_replica=False, rgemat=None, ): r""" Compute the :math:`\chi^2`. Parameters ---------- dataset : DataTuple dataset tuple coefficients_values : numpy.ndarray |EFT| coefficients values use_multiplicative_prescription: bool if True add the |EFT| contribution as a key factor use_quad: bool if True include also |HO| corrections rgemat: numpy.ndarray solution matrix of the RGE Returns ------- chi2_total : float :math:`\chi^2` value """ # compute theory prediction for each point in the dataset theory_predictions = pr.make_predictions( dataset, coefficients_values, use_quad, use_multiplicative_prescription, rgemat ) # compute experimental central values - theory if use_replica: diff = dataset.Replica - theory_predictions else: diff = dataset.Commondata - theory_predictions invcovmat = dataset.InvCovMat # note @ is slower when running with mpiexec return jnp.einsum("i,ij,j->", diff, invcovmat, diff)
[docs] class Scanner: r"""Class to compute and plot the idividual :math:`\chi^2` scan. Parameters ---------- run_card : dict run card dictionary n_replica : int number of replica to use """ def __init__(self, run_card, n_replica): self.n_replica = n_replica self.use_quad = run_card["use_quad"] self.result_path = f"{run_card['result_path']}/{run_card['result_ID']}" self.use_multiplicative_prescription = run_card.get( "use_multiplicative_prescription", False ) self.datasets = load_datasets( run_card["data_path"], run_card["datasets"], run_card["coefficients"], run_card["order"], run_card["use_quad"], run_card["use_theory_covmat"], False, self.use_multiplicative_prescription, run_card.get("theory_path", None), run_card.get("rot_to_fit_basis", None), run_card.get("uv_couplings", False), run_card.get("external_chi2", False), ) # set all the coefficients to 0 self.coefficients = CoefficientManager.from_dict(run_card["coefficients"]) self.coefficients.set_free_parameters( np.full(self.coefficients.free_parameters.shape[0], 0) ) # build empty dict to store results self.chi2_dict = {} for name, row in self.coefficients.free_parameters.iterrows(): self.chi2_dict[name] = {} self.chi2_dict[name]["x"] = np.linspace(row.minimum, row.maximum, 100)
[docs] def regularized_chi2_func(self, coeff, xs, use_replica): r"""Individual :math:`\chi^2` wrappper over series of values. Parameters ---------- coeff: `smefit.coefficient.Coefficient` coefficient to switch on. xs: numpy.array coeffient values. use_replica: bool if True compute the :math:`\chi^2` on |MC| replicas. Returns: -------- individual reduced :math:`\chi^2` for each x value. """ chi2_list = [] coeff.is_free = True for x in xs: coeff.value = x self.coefficients.set_constraints() chi2_list.append( compute_chi2( self.datasets, self.coefficients.value, self.use_quad, self.use_multiplicative_prescription, use_replica, ) ) return np.array(chi2_list) / self.datasets.Commondata.size
[docs] def compute_bounds(self): r"""Compute individual bounds solving. ..math:: \chi^2`- 2 = 0 """ # chi^2 - 3.841 def chi2_func(xs): return self.regularized_chi2_func(coeff, xs, False) - 3.841 # find the bound for each coefficient bounds = {} x0_interval = [-1000, 1000] for coeff in self.coefficients: if coeff.name not in self.chi2_dict: continue coeff.is_free = True roots = opt.newton( chi2_func, x0_interval, maxiter=400, ) # test roots are not the same try: np.testing.assert_allclose(roots[0] - roots[1], 0, atol=1e-5) raise ValueError( f"single bound found for {coeff.name}: {roots[0]} in range {x0_interval}." ) except AssertionError: # test roots are sorted try: np.testing.assert_allclose(roots, np.sort(roots)) except AssertionError: raise ValueError( f"Bound found for {coeff.name}: {roots} are not sorted." ) # save bounds and update the x ranges bounds[coeff.name] = roots.tolist() self.chi2_dict[coeff.name]["x"] = np.linspace(roots[0], roots[1], 100) coeff.is_free = False coeff.value = 0.0 _logger.info(f"chi^2 bounds for {coeff.name}: {roots}") with open(f"{self.result_path}/chi2_bounds.json", "w", encoding="utf-8") as f: json.dump(bounds, f)
[docs] def compute_scan(self): r"""Compute the individual :math:`\chi^2` scan for each replica and coefficient.""" # loop on replicas for rep in track( range(self.n_replica + 1), description="[green]Computing chi2 for each replica...", ): use_replica = rep != 0 if use_replica: self.datasets = DataTuple( self.datasets.Commondata, self.datasets.SMTheory, self.datasets.OperatorsNames, self.datasets.LinearCorrections, self.datasets.QuadraticCorrections, self.datasets.ExpNames, self.datasets.NdataExp, self.datasets.InvCovMat, np.random.multivariate_normal( self.datasets.Commondata, np.linalg.inv(self.datasets.InvCovMat), ), ) # loop on coefficients for coeff in self.coefficients: if coeff.name not in self.chi2_dict: continue self.chi2_dict[coeff.name][rep] = self.regularized_chi2_func( coeff, self.chi2_dict[coeff.name]["x"], use_replica ) coeff.value = 0.0 coeff.is_free = False
[docs] def plot_scan(self): r"""Plot and save the :math:`\chi^2` scan for each coefficient.""" # loop on coefficients for c, tab in self.chi2_dict.items(): _logger.info(f"Plotting scan for {c}") plt.figure() for rep in range(self.n_replica + 1): chi2_min = np.array(tab[rep]).min() if rep == 0: plt.plot(tab["x"], tab[rep] - chi2_min) else: plt.plot( tab["x"], tab[rep] - chi2_min, alpha=0.2, color="lightskyblue" ) plt.ylabel(r"$\chi^2 - \chi^2_{min}$") plt.hlines( 0, tab["x"].min(), tab["x"].max(), ls="dotted", color="black", lw=0.5 ) plt.title(f"{c}") plt.tight_layout() plt.savefig(f"{self.result_path}/chi2_scan_{c}.png") plt.savefig(f"{self.result_path}/chi2_scan_{c}.pdf")