Source code for smefit.core.chisquared

"""
Module for the computation of chi-squared values
"""
import numpy as np
from . import compute_theory as pr
from . import load_data as ld

# TODO: remove this dependency
# from NNPDF import CommonData


[docs]def compute_chi2(config, dataset, coeffs, labels): """ Compute the components for the chi2 Here we also perform the cross-validation splitting at the level of the residuals, so as to prevent singular covariances matrices. A mask is applied to each experiment and if the dataset has only 1 datapoint it is placed in the training set Returns the theory - exp vector and the inverse cov mat * (theory - exp) """ corrected_theory = pr.make_predictions(config, dataset, coeffs, labels) dat = dataset.Commondata # If we are plotting chi2 we just want the experimental ones diff = dat + dataset.Noise - corrected_theory # The chi2 computation covmat_inv = np.linalg.inv(dataset.CovMat) # Multiply cov^-1 * diff but don't sum covmatdiff = np.einsum("ij,j->i", covmat_inv, diff) # Now we want to mask the diff and the cov mat experiment-by-experiment if config["cv"]: covmat_diff_mask = [] diff_mask = [] for mask in [dataset.TrainingMask, dataset.ValidationMask]: cov_mask = dataset.CovMat[:, mask] cov_mask = cov_mask[mask, :] cov_inv_mask = np.linalg.inv(cov_mask) # Get data, noise, and theory for split data split_cd = np.take(dat, mask) masked_noise = np.take(dataset.Noise, mask) theory_mask = np.take(corrected_theory, mask) # Compute theory - data for split data diff = split_cd + masked_noise - theory_mask # Take just the components we want for each of the sets diff_mask.append(diff) covmat_diff_mask.append(np.einsum("ij,j->i", cov_inv_mask, diff)) # Return the tr and val masked arrays return diff_mask[0], diff_mask[1], covmat_diff_mask[0], covmat_diff_mask[1] # If cross-val is off, return everything with dummy lists for the validation sets return diff, [], covmatdiff, []
[docs]def compute_total_chi2(config, datasets, coefficients, labels): """Function to compute total central chi2 for all datasets assuming no cross-correlations. Returns the chi2 for the tr/val sets (or total chi2 if cross-validation is off) """ tr_diff_total = [] val_diff_total = [] tr_covmatdiff_total = [] val_covmatdiff_total = [] Ndat = [] # Read in the results from the chi2 function trdiff, valdiff, trcovmatdiff, valcovmatdiff = compute_chi2( config, datasets, coefficients, labels ) tr_diff_total.extend(trdiff) val_diff_total.extend(valdiff) tr_covmatdiff_total.extend(trcovmatdiff) val_covmatdiff_total.extend(valcovmatdiff) Ndat.append(len(datasets.Commondata)) # If no cross-val just return the total chi2 (which is in the training set) # and total number of points if config["cv"]: tr_chi2_total = np.einsum("j,j->", tr_diff_total, tr_covmatdiff_total) val_chi2_total = np.einsum("j,j->", val_diff_total, val_covmatdiff_total) # Return the tr and val chi2 and number of points in each return ( [tr_chi2_total, val_chi2_total], [len(tr_diff_total), len(val_diff_total)], ) chi2_total = np.einsum("j,j->", tr_diff_total, tr_covmatdiff_total) return chi2_total, np.sum(Ndat)