Source code for smefit.optimize.analytic

# -*- coding: utf-8 -*-
"""Solve the linear plolem to get the best analytic bounds."""
import numpy as np
from rich.style import Style
from rich.table import Table

from smefit.rge.rge import load_rge_matrix

from .. import chi2, log
from ..analyze.pca import impose_constrain
from ..coefficients import CoefficientManager
from ..loader import load_datasets
from . import Optimizer

_logger = log.logging.getLogger(__name__)



[docs]
def is_semi_pos_def(x):
    """Check is a matrix is positive-semidefinite."""
    return np.all(np.linalg.eigvals(x) >= 0)




[docs]
class ALOptimizer(Optimizer):
    """Optimizer specification for the linear analytic solution.

    Parameters
    ----------
    loaded_datasets : `smefit.loader.DataTuple`,
        dataset tuple
    coefficients : `smefit.coefficients.CoefficientManager`
        instance of `CoefficientManager` with all the relevant coefficients to fit
    result_path: pathlib.Path
        path to result folder
    result_ID : str
        result name
    single_parameter_fits : bool
        True for individual scan fits
    n_samples:
        number of replica to sample
    """

    def __init__(
        self,
        loaded_datasets,
        coefficients,
        result_path,
        result_ID,
        single_parameter_fits,
        n_samples,
    ):
        super().__init__(
            results_path=f"{result_path}/{result_ID}",
            loaded_datasets=loaded_datasets,
            coefficients=coefficients,
            # disble quadratic corrections here
            use_quad=False,
            single_parameter_fits=single_parameter_fits,
            # this option does not make any difference here
            use_multiplicative_prescription=False,
        )
        self.n_samples = n_samples


[docs]
    @classmethod
    def from_dict(cls, config):
        """Create object from theory dictionary.

        Parameters
        ----------
        config : dict
            configuration dictionary

        Returns
        -------
        cls : Optimizer
            created object
        """
        use_quad = config["use_quad"]
        if use_quad:
            raise ValueError(
                "Analytic solution with quadratic corrections is not available."
            )

        rge_dict = config.get("rge", None)
        operators_to_keep = config["coefficients"]
        cutoff_scale = config.get("cutoff_scale", None)

        if rge_dict is not None and config.get("datasets") is not None:
            _logger.info("Loading RGE matrix")
            rgemat, operators_to_keep = load_rge_matrix(
                rge_dict,
                list(operators_to_keep.keys()),
                config["datasets"],
                config.get("theory_path", None),
                cutoff_scale,
                config.get("result_path", None),
                config.get("result_ID", None),
            )
            _logger.info("The operators generated by the RGE are: ")
            _logger.info(list(operators_to_keep.keys()))
        else:
            rgemat = None

        loaded_datasets = load_datasets(
            config["data_path"],
            config["datasets"],
            operators_to_keep,
            False,
            config["use_theory_covmat"],
            config["use_t0"],
            False,
            config.get("default_order", "LO"),
            config.get("theory_path", None),
            config.get("rot_to_fit_basis", None),
            config.get("uv_couplings", False),
            rgemat=rgemat,
            cutoff_scale=config.get("cutoff_scale", None),
        )

        coefficients = CoefficientManager.from_dict(config["coefficients"])
        single_parameter_fits = config.get("single_parameter_fits", False)
        return cls(
            loaded_datasets,
            coefficients,
            config["result_path"],
            config["result_ID"],
            single_parameter_fits,
            config["n_samples"],
        )



[docs]
    def log_result(self, coeff_best, coeff_covmat):
        """Log a table with solution."""
        table = Table(style=Style(color="white"), title_style="bold cyan", title=None)
        table.add_column("Parameter", style="bold red", no_wrap=True)
        table.add_column("Best value")
        table.add_column("Error")
        for par, val, var in zip(
            self.free_parameters.index, coeff_best, np.diag(coeff_covmat)
        ):
            table.add_row(f"{par}", f"{val:.3f}", f"{np.sqrt(var):.3f}")
        log.console.print(table)



[docs]
    def run_sampling(self):
        """Run sapmling accordying to the analytic solution."""

        fit_result = {}
        # add names of the free parameters
        fit_result["free_parameters"] = self.coefficients.free_parameters.index.tolist()

        # update linear corrections in casde
        new_LinearCorrections = impose_constrain(
            self.loaded_datasets, self.coefficients
        )

        # compute mean and cov
        _logger.info("Computing Analytic solution ...")
        fisher = (
            new_LinearCorrections

[docs]
            @ self.loaded_datasets.InvCovMat
            @ new_LinearCorrections.T
        )
        diff_sm = self.loaded_datasets.Commondata - self.loaded_datasets.SMTheory
        coeff_covmat = np.linalg.inv(fisher)

        # check if there are not flat directions
        if not is_semi_pos_def(coeff_covmat):
            raise ValueError(
                """Coefficient covariance is not symmetric positive-semidefinite,
                There might be flat directions to comment out."""
            )

        coeff_best = (
            coeff_covmat
            @ new_LinearCorrections
            @ self.loaded_datasets.InvCovMat
            @ diff_sm
        )

        # Compute some metrics of the fit result
        # Get names of coefficients, including the constrained ones
        coeffs_name = sorted(self.coefficients.name)

        # set the best fit point
        self.coefficients.set_free_parameters(coeff_best)
        self.coefficients.set_constraints()

        fit_result["best_fit_point"] = dict(zip(coeffs_name, self.coefficients.value))

        # compute max log likelihood
        chi2_tot = chi2.compute_chi2(
            self.loaded_datasets,
            self.coefficients.value,
            self.use_quad,
            self.use_multiplicative_prescription,
        )
        max_logl = float(-0.5 * chi2_tot)

        fit_result["max_loglikelihood"] = max_logl

        gaussian_integral = np.log(np.sqrt(np.linalg.det(2 * np.pi * coeff_covmat)))
        # NOTE: current formula for logz is not inluding the prior penalty
        logz = gaussian_integral + max_logl
        fit_result["logz"] = logz
        _logger.warning("The logZ computation is not including the prior penalty.")

        # generate samples in case n_samples > 0
        if self.n_samples > 0:
            self.log_result(coeff_best, coeff_covmat)

            # sample
            _logger.info("Sampling solutions ...")
            fit_result["samples"] = np.random.multivariate_normal(
                coeff_best, coeff_covmat, size=(self.n_samples,)
            )
            self.save(fit_result)
        else:  # record only chi2 if no samples are requested
            self.coefficients.set_free_parameters(coeff_best)
            self.coefficients.set_constraints()

            chi2_tot = chi2.compute_chi2(
                self.loaded_datasets,
                self.coefficients.value,
                self.use_quad,
                self.use_multiplicative_prescription,
            )
            chi2_red = chi2_tot / self.loaded_datasets.Commondata.size

            with open(self.results_path / "chi2.dat", "a") as f:
                f.write(f"{chi2_red} \n")


    def save(self, result):
        """Save samples to json inside a dictionary: {coff: [replicas values]}.
        Saving also some basic information about the fit.

        Parameters
        ----------
        samples : np.array
            raw samples with shape (n_samples, n_free_param)

        """
        posterior_samples = {}
        for c in self.coefficients.name:
            posterior_samples[c] = []

        # propagate constrain
        for sample in result["samples"]:
            self.coefficients.set_free_parameters(sample)
            self.coefficients.set_constraints()

            for c in self.coefficients.name:
                posterior_samples[c].append(self.coefficients[c].value)

        result["samples"] = posterior_samples
        # save fit result
        self.dump_fit_result(self.results_path / "fit_results.json", result)