Source code for smefit.covmat

# -*- coding: utf-8 -*-
"""
Module containing computation of covariance matrix.
Based on
https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats_utils.py
https://github.com/NNPDF/nnpdf/tree/master/validphys2/src/validphys/covmats.py

"""

import numpy as np
import pandas as pd
import scipy.linalg as la

INTRA_DATASET_SYS_NAME = ("UNCORR", "CORR", "THEORYUNCORR", "THEORYCORR")


[docs] def construct_covmat(stat_errors: np.array, sys_errors: pd.DataFrame): """ Basic function to construct a covariance matrix (covmat), given the statistical error and a dataframe of systematics. Errors with name UNCORR or THEORYUNCORR are added in quadrature with the statistical error to the diagonal of the covmat. Other systematics are treated as correlated; their covmat contribution is found by multiplying them by their transpose. Parameters ---------- stat_errors: numpy.ndarray a 1-D array of statistical uncertainties sys_errors: pandas.DataFrame a dataframe with shape (N_data * N_sys) and systematic name as the column headers. The uncertainties should be in the same units as the data. Returns ------- cov_mat: numpy.ndarray Covariance matrix Notes ----- This function doesn't contain any logic to ignore certain contributions to the covmat, if you wanted to not include a particular systematic/set of systematics i.e all uncertainties with MULT errors, then filter those out of ``sys_errors`` before passing that to this function. """ diagonal = stat_errors**2 is_uncorr = sys_errors.columns.isin(("UNCORR", "THEORYUNCORR")) diagonal += (sys_errors.loc[:, is_uncorr].to_numpy() ** 2).sum(axis=1) corr_sys_mat = sys_errors.loc[:, ~is_uncorr].to_numpy() return np.diag(diagonal) + corr_sys_mat @ corr_sys_mat.T
[docs] def covmat_from_systematics(stat_errors: list, sys_errors: list): """Given two lists containing the statistic and systematic errors, construct the full covariance matrix. This is similar to :py:meth:`construct_covmat` except that special corr systematics are concatenated across all datasets before being multiplied by their transpose to give off block-diagonal contributions. The other systematics contribute to the block diagonal in the same way as :py:meth:`construct_covmat`. Parameters ---------- stat_errors : list[(stat_errors: np.array)] list of stat_errors for each dataset. sys_errors : list[(sys_errors: pd.DataFrame)] list of sys_errors for each dataset. Returns ------- cov_mat : np.array Numpy array which is N_dat x N_dat (where N_dat is the number of data points) containing uncertainty and correlation information. """ special_corrs = [] block_diags = [] for dataset_stat_errors, dataset_sys_errors in zip(stat_errors, sys_errors): # separate out the special uncertainties which can be correlated across # datasets is_intra_dataset_error = dataset_sys_errors.columns.isin(INTRA_DATASET_SYS_NAME) block_diags.append( construct_covmat( dataset_stat_errors, dataset_sys_errors.loc[:, is_intra_dataset_error] ) ) special_corrs.append(dataset_sys_errors.loc[:, ~is_intra_dataset_error]) # concat systematics across datasets special_sys = pd.concat(special_corrs, axis=0, sort=False) # non-overlapping systematics are set to NaN by concat, fill with 0 instead. special_sys.fillna(0, inplace=True) diag = la.block_diag(*block_diags) covmat = diag + special_sys.to_numpy() @ special_sys.to_numpy().T return covmat