"""
Module for the computation of chi-squared values
"""
import numpy as np
from . import compute_theory as pr
from . import load_data as ld
# TODO: remove this dependency
# from NNPDF import CommonData
[docs]def compute_chi2(config, dataset, coeffs, labels):
""" Compute the components for the chi2
Here we also perform the cross-validation splitting at the level of the residuals,
so as to prevent singular covariances matrices.
A mask is applied to each experiment and if the dataset has only 1 datapoint
it is placed in the training set
Returns the theory - exp vector and the inverse cov mat * (theory - exp)
"""
corrected_theory = pr.make_predictions(config, dataset, coeffs, labels)
dat = dataset.Commondata
# If we are plotting chi2 we just want the experimental ones
diff = dat + dataset.Noise - corrected_theory
# The chi2 computation
covmat_inv = np.linalg.inv(dataset.CovMat)
# Multiply cov^-1 * diff but don't sum
covmatdiff = np.einsum("ij,j->i", covmat_inv, diff)
# Now we want to mask the diff and the cov mat experiment-by-experiment
if config["cv"]:
covmat_diff_mask = []
diff_mask = []
for mask in [dataset.TrainingMask, dataset.ValidationMask]:
cov_mask = dataset.CovMat[:, mask]
cov_mask = cov_mask[mask, :]
cov_inv_mask = np.linalg.inv(cov_mask)
# Get data, noise, and theory for split data
split_cd = np.take(dat, mask)
masked_noise = np.take(dataset.Noise, mask)
theory_mask = np.take(corrected_theory, mask)
# Compute theory - data for split data
diff = split_cd + masked_noise - theory_mask
# Take just the components we want for each of the sets
diff_mask.append(diff)
covmat_diff_mask.append(np.einsum("ij,j->i", cov_inv_mask, diff))
# Return the tr and val masked arrays
return diff_mask[0], diff_mask[1], covmat_diff_mask[0], covmat_diff_mask[1]
# If cross-val is off, return everything with dummy lists for the validation sets
return diff, [], covmatdiff, []
[docs]def compute_total_chi2(config, datasets, coefficients, labels):
"""Function to compute total central chi2 for all datasets
assuming no cross-correlations.
Returns the chi2 for the tr/val sets (or total chi2 if cross-validation is off)
"""
tr_diff_total = []
val_diff_total = []
tr_covmatdiff_total = []
val_covmatdiff_total = []
Ndat = []
# Read in the results from the chi2 function
trdiff, valdiff, trcovmatdiff, valcovmatdiff = compute_chi2(
config, datasets, coefficients, labels
)
tr_diff_total.extend(trdiff)
val_diff_total.extend(valdiff)
tr_covmatdiff_total.extend(trcovmatdiff)
val_covmatdiff_total.extend(valcovmatdiff)
Ndat.append(len(datasets.Commondata))
# If no cross-val just return the total chi2 (which is in the training set)
# and total number of points
if config["cv"]:
tr_chi2_total = np.einsum("j,j->", tr_diff_total, tr_covmatdiff_total)
val_chi2_total = np.einsum("j,j->", val_diff_total, val_covmatdiff_total)
# Return the tr and val chi2 and number of points in each
return (
[tr_chi2_total, val_chi2_total],
[len(tr_diff_total), len(val_diff_total)],
)
chi2_total = np.einsum("j,j->", tr_diff_total, tr_covmatdiff_total)
return chi2_total, np.sum(Ndat)