# -*- coding: utf-8 -*-
from os import listdir
import subprocess
import copy
import yaml
import numpy as np
import matplotlib
import matplotlib.pyplot as py
from matplotlib import rc
from matplotlib import colors as mcolors
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pandas as pd
from collections.abc import Iterable
from progress.bar import Bar
from ..optimize.optimize import OPTIMIZE
from ..core import chisquared as chi2
from ..core import compute_theory as pr
from .tools import confidence_ellipse, latex_packages
# global mathplotlib settings
matplotlib.use('PDF')
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)
[docs]class ANALYZE(object): # pylint disable=anomalous-backslash-in-string
"""
This python class object produces a fit report based on
the result ID names given in the command line execution.
Disclaimer by JJE as of 01/03/21: this code is not written well.
It's a product of cowboy coding, but it gets the job done (in
most cases). Improvement is needed (e.g. unifying notation,
aggregating repeated calculations, etc.)
"""
# TODO: implement what is suggested in the description
# splitting the code in different submodules
def __init__(self, root_path, fits, outputname):
self.outputname = outputname
self.fits = fits
self.fit_labels={}
self.path = root_path
cnt=1
for k in self.fits:
lbl_list=k.split('_')
# Fit labels for plots
if "check" in self.outputname:
if "260121-NS" in lbl_list[0]:
lbl = r"${\rm final\ 2\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1])
else:
lbl = r"${\rm final\ 3\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1])
elif 'cpQ3_NLO_NHO' in self.outputname:
if "cpQ3fixed" in lbl_list:
lbl = r"${\rm cqQ3 \ fixed,\ Linear,\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm cqQ3 \ fitted,\ Linear,\ %s \ EFT }$"%(lbl_list[-2])
elif 'cpQ3_NLO_HO' in self.outputname:
if "cpQ3fixed" in lbl_list:
lbl = r"${\rm cqQ3 \ fixed,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm cqQ3 \ fitted,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2])
elif 'noEWPO_NLO_NHO' in self.outputname:
if "noEWPO" in lbl_list:
lbl = r"${\rm w/o \ EWPO,\ Linear,\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm with\ EWPO,\ Linear,\ %s \ EFT }$"%(lbl_list[-2])
elif 'noEWPO_NLO_HO' in self.outputname:
if "noEWPO" in lbl_list:
lbl = r"${\rm w/o \ EWPO,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm with\ EWPO,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2])
elif 'NSvsMCFit' in self.outputname:
if 'NS' in lbl_list[0]:
lbl=r"${\rm EFT\ NS}$"
elif 'MCFit' in lbl_list[0]:
lbl=r"${\rm EFT\ MCfit}$"
elif 'TOP19' in self.outputname:
if 'GLOBAL' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'TOP19' in lbl_list:
lbl = r"${\rm Top-only\ (Data\ '19),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Top-only\ (Data\ '21),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'TOPphilic' in self.outputname:
if 'GLOBAL' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Top\ Philic,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'TOP' in self.outputname:
if 'GLOBAL' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Top-only,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'HIGGS' in self.outputname:
if 'GLOBAL' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Higgs-only,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'noVV' in self.outputname:
if 'GLOBAL' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm TOP+Higgs,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'goodSM' in self.outputname:
if 'GLOBAL' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Top+Higgs+VV\ (good\ SM),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'noHighE' in self.outputname:
if 'GLOBAL' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Top+Higgs+VV\ (no\ high-E),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'AC' in self.outputname:
if 'AC' in lbl_list:
lbl = r"${\rm with\ Top \ AC\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1])
else:
lbl = r"${\rm w/out\ Top \ AC\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1])
elif 'individual' in self.outputname:
if 'SNS' in lbl_list:
lbl = r"${\rm Individual}$"
elif 'NHO' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Linear\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
elif 'nott2D' in self.outputname:
if 'nott2D' in lbl_list:
lbl = r"${\rm w/out\ Top \ MttYtt\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1])
else:
lbl = r"${\rm with\ Top \ MttYtt\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1])
else:
if 'NHO' in lbl_list:
lbl = r"${\rm Top+Higgs+VV,\ Linear\ %s \ EFT }$"%(lbl_list[-2])
else:
lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2])
self.fit_labels[k] = lbl
cnt+=1
self.load_results()
[docs] def load_results(self):
"""
Loads the fit results and computes the theoretical predictions
for each data point that was included.
Single NS (SNS) is treated separately. Only parameters are
loaded in this case.
"""
# Load coefficients and labels
self.coeffs = {}
self.labels = {}
for k in self.fits:
if k not in self.coeffs:
self.coeffs[k]=[]
if k not in listdir(f'{self.path}/results'):
raise NotADirectoryError(f'{k} not found in results directory. Exiting...')
outputdir = f'{self.path}/results/{k}'
if 'SNS_' in k:
cnt=0
self.labels[k] = [fname for fname in sorted(listdir(outputdir)) if fname.startswith('O')]
for f in sorted(listdir(outputdir)):
if f.startswith('O') is False:
continue
self.coeffs[k].append([])
for filename in listdir(outputdir+'/%s'%f):
if filename.startswith('SMEFT_coeffs_') is False or filename.endswith('_0.txt'):
continue
file = open(outputdir+'/%s/%s'%(f,filename),'r')
data = file.readlines()
lbls = list(data[0].split())
idx = np.where(np.array(lbls)==f)[0][0]
self.coeffs[k][cnt].append(np.float(data[1].split()[idx]))
file.close()
cnt+=1
else:
for f in listdir(outputdir):
if f.startswith('SMEFT_coeffs_') is False or f.endswith('_0.txt'):
continue
file = open(outputdir+'/%s'%f,'r')
data = file.readlines()
self.labels[k] = [data[0].split()[i] for i in range(len(data[0].split()))]
self.coeffs[k].append(np.array([np.float(data[1].split()[i]) for i in range(len(data[1].split()))]))
file.close()
# Load data and compute theory
self.config = {}
self.dataset = {}
self.theory = {}
self.mean_theory = {}
self.std_theory = {}
self.sm_theory = {}
self.cv_flag={}
self.boot_flag={}
self.full_coeffs={}
self.full_labels={}
for k in self.fits:
self.config[k] = {}
# Load results YAML file
yaml_file = f'{self.path}/results/{k}/{k}.yaml'
with open(yaml_file) as f:
self.config[k] = yaml.safe_load(f)
self.config[k]['root_path']=f'{self.path}'
# Turn off cross-validation and replica generation
self.config[k]['rep']=0
self.cv_flag[k]=self.config[k]['cv']
self.config[k]['cv']=False
self.boot_flag[k]=self.config[k]['bootstrap']
self.config[k]['bootstrap']=False
# Set paths to theory tables
self.config[k]['corrections_path'] = self.config[k]['root_path'] + '/tables/operator_res/' + self.config[k]['order'] + '/'
self.config[k]['theory_path'] = self.config[k]['root_path'] + '/tables/theory/'
# Set t0 to central data
if not self.config[k]['CT0L']:
self.config[k]['t0']='data'
opt=OPTIMIZE(self.config[k])
opt.get_free_params()
# Get free parameter indices
# This is needed in the case in which
# non-fitted coefficients and their values
# are printed in the output files
nonzero=[]
free_labels = opt.free_param_labels
for l in free_labels:
nonzero.append(self.labels[k].index(l))
nonzero=np.array(nonzero)
self.dataset[k]=opt.loaded_datasets
# dat = self.dataset[k].Commondata
# covmat_inv = np.linalg.inv(self.dataset[k].CovMat)
self.theory[k] = []
self.full_coeffs[k]=[]
self.full_labels[k]=opt.coefficients.labels
if 'SNS_' in k:
continue
# Loop over replicas/samples and compute theory
bar = Bar(r'Computing theory for each replica', max=len(self.coeffs[k]))
for j in range(len(self.coeffs[k])):
# Set free parameters in optimize class, propogate them
# to opt.coefficients.values, and set constraints
opt.free_params = self.coeffs[k][j][nonzero]
opt.propogate_params()
opt.set_constraints()
theory_rep = pr.make_predictions(self.config[k],self.dataset[k],opt.coefficients.values,opt.coefficients.labels)
# diff = dat + self.dataset[k].Noise - theory_rep # Noise is only necessary for closure test chi2
# chi2 = np.einsum('i,ij,j->',diff,covmat_inv,diff)
self.theory[k].append(theory_rep)
self.full_coeffs[k].append(copy.deepcopy(opt.coefficients.values))
bar.next()
bar.finish()
# Compute mean theory
self.theory[k] = np.array(self.theory[k])
self.mean_theory[k] = np.mean(self.theory[k],axis=0)
self.std_theory[k] = np.std(self.theory[k],axis=0)
# Compute SM Theory
self.sm_theory[k] = np.array(pr.make_predictions(self.config[k],self.dataset[k],np.zeros(len(opt.coefficients.values)),opt.coefficients.labels))
[docs] def read_dataset(self, dataset):
""" Read datasets and its corresponding systypes"""
with open(f'{self.path}/tables/exp_data/DATA_{dataset}.dat','r') as f:
datalines = f.readlines()
with open(f'{self.path}/tables/exp_data/SYSTYPE_{dataset}_DEFAULT.dat','r') as f:
systypes = f.readlines()
return datalines, systypes
[docs] def run_pdflatex(self, L, filename):
"""
Dump to file and run pdflatex
Parameters
----------
L : list(str)
latex lines
filename : str
file name
"""
L.append(r"\end{document}")
L = [l+"\n" for l in L]
latex_src = f'{self.path}/reports/{self.outputname}/{filename}.tex'
file = open(latex_src, "w")
file.writelines(L)
file.close()
subprocess.call(f"pdflatex -halt-on-error -output-directory {self.path}/reports/{self.outputname}/ {latex_src}", shell=True)
subprocess.call(f"rm {self.path}/reports/{self.outputname}/*.log {self.path}/reports/{self.outputname}/*.aux {self.path}/reports/{self.outputname}/*.out", shell=True)
[docs] def combine_plots(self, L, latex_src, pdfname, caption=False):
"""Combine plots and run pdflatex"""
for k in listdir(f'{self.path}/reports/{self.outputname}'):
if k.startswith(pdfname) is False:
continue
L.append(r"\begin{figure}[t]")
L.append(r"\centering")
L.append(r"\includegraphics[width=0.99\textwidth]{%s/reports/%s/%s}"%(self.path,self.outputname,k))
if caption is True:
L.append(r"\caption{{\rm %s}}"%self.fit_labels[k[12:-4]] )
L.append(r"\end{figure}")
self.run_pdflatex(L, latex_src )
self.move_to_meta(f"{pdfname}*")
[docs] def write_summary(self):
"""
Provides a summary of the fits included in the report: the fit settings,
fitted parameters (including any parameter constraints), and
datasets.
Uses data_references.yaml, data_groups.yaml, and coeff_groups.yaml
YAML files. The first gives the references used for hyperlinks,
and the other two organizes the data and parameters into groups
(top, higgs, etc.).
"""
self.data_list = []
arxiv = {}
ndat_total={}
coeff_list = []
# Load data references YAML file
#TODO: plcae these files outside the package
yaml_file = f'{self.path}/smefit/analyze/data_references.yaml'
with open(yaml_file) as f:
arxiv = yaml.safe_load(f)
# Load data grouping YAML file
yaml_file = f'{self.path}/smefit/analyze/data_groups.yaml'
with open(yaml_file) as f:
self.data_groups = yaml.safe_load(f)
for k in self.fits:
ndat_total[k]=0
for i in range(len(self.dataset[k].ExpNames)):
set_name=self.dataset[k].ExpNames[i]
ndat_exp=self.dataset[k].NdataExp[i]
ndat_total[k]+=ndat_exp
if set_name not in self.data_list:
self.data_list.append(set_name)
for coeff in self.config[k]['coefficients']:
if coeff not in coeff_list:
coeff_list.append(coeff)
# Writing summary report
# ================ Summary in latex =====================
L = latex_packages()
L.append(r"\usepackage{underscore}")
L.append(r"\begin{document}")
# Write fit details
for i,k in enumerate(self.fits):
L.append(r"{\bf \underline{Fit%d:}} %s"%(i+1,self.fit_labels[k])+'\n')
L.append("Number of replicas/samples: %d\n"%(len(self.coeffs[k])))
L.append("pQCD order: %s\n"%str(self.config[k]['order']))
if self.config[k]['HOlambda']=='HO':
lbl='$\Lambda^{-4}$'
elif self.config[k]['HOlambda']=='NHO':
lbl='$\Lambda^{-2}$'
L.append("SMEFT order: %s\n"%lbl)
if self.config[k]['CT0L']:
L.append('Closure Test Level: %s\n'%str(self.config[k]['ctlevel']))
L.append('Cross validation : %s\n'%str(self.cv_flag[k]))
L.append("MC Replica Generation: %s\n"%str(self.boot_flag[k]))
if 'bounds' in self.config[k].keys():
L.append("Parameter bounds taken from: %s\n"%str(self.config[k]['bounds']))
# Write dataset table
for ig,group in enumerate(self.data_groups):
L.append('\n')
L.append(r"\begin{table}[H]")
L.append(r"\centering")
temp = r"\begin{tabular}{|l|"
for i in range(len(self.fits)):
temp += "c|"
temp+="}"
L.append(temp)
L.append(r"\hline")
temp=' Datasets & '
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += r" %s &"%self.fit_labels[k]
else:
temp += r" %s \\ \hline"%self.fit_labels[k]
L.append(temp)
for isub,Dataset in enumerate(self.data_groups[group]):
if np.all([Dataset not in self.config[k]['datasets'] for k in self.config]):
continue
temp = r""
if Dataset in arxiv.keys():
temp += "\href{"+arxiv[Dataset]+"}{"+Dataset+"} & "
else:
temp += Dataset + " & "
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
if Dataset in self.config[k]['datasets']:
temp += r"\checkmark & "
else:
temp += r" & "
else:
if Dataset in self.config[k]['datasets']:
temp += r"\checkmark \\ \hline"
else:
temp += r" \\ \hline"
if ig==len(self.data_groups)-1 and isub == len(self.data_groups[group])-1 and i==len(self.fits)-1:
temp += r" \hline"
L.append(temp)
if ig==len(self.data_groups)-1:
temp = r""
temp += r"Total number of data points & "
for i, k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += str(ndat_total[k])+ " & "
else:
temp += str(ndat_total[k])+ r" \\ \hline"
L.append(temp)
L.append(r"\end{tabular}")
if ig==len(self.data_groups)-1:
L.append(r"\caption{Dataset comparison.}")
L.append(r"\end{table}")
# Write coefficients table
L.append('\n')
L.append('\n')
L.append(r"\begin{table}[H]")
L.append(r"\centering")
temp = r"\begin{tabular}{|c|c|"
for i in range(len(self.fits)):
temp += "c|c|"
temp+="}"
L.append(temp)
L.append(r"\hline")
temp=' & & '
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += r" \multicolumn{2}{c|}{%s} & "%self.fit_labels[k]
else:
temp += r" \multicolumn{2}{c|}{%s} \\ \hline"%self.fit_labels[k]
L.append(temp)
temp = r'Class & Coefficients &'
for i,k in enumerate(self.fits):
if i!= len(self.fits)-1:
temp += r" Fitted & Fixed &"
else:
temp += r" Fitted & Fixed \\ \hline"
L.append(temp)
#Load coefficient yaml card
yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml'
with open(yaml_file) as f:
coeff_config = yaml.safe_load(f)
# Build lists for grouped coefficients
coeffs_organized = {}
for group in coeff_config.keys():
coeffs_organized[group]=[]
for c in coeff_config[group]:
if c=='OPff':
c='Off'
if c not in coeffs_organized[group] and np.any([c in self.config[k]['coefficients'] for k in self.fits]):
coeffs_organized[group].append(c)
for ig,group in enumerate(coeffs_organized.keys()):
if len(coeffs_organized[group])==0:
continue
L.append('\multirow{%d}{*}{%s}'%(len(coeffs_organized[group]),group))
for isub,coeff in enumerate(coeffs_organized[group]):
temp=''
temp+=' & '+coeff.replace('O','c')+" & "
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
if coeff in self.config[k]['coefficients']:
if self.config[k]['coefficients'][coeff]['fixed'] is False:
temp += r"\checkmark & &"
elif self.config[k]['coefficients'][coeff]['fixed'] is True:
temp += r" & = %d &"%self.config[k]['coefficients'][coeff]['value']
else:
temp+= r" & = "
if isinstance(self.config[k]['coefficients'][coeff]['value'],Iterable):
for i in range(len(self.config[k]['coefficients'][coeff]['value'])):
temp+=r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'][i],\
self.config[k]['coefficients'][coeff]['fixed'][i])
if i!=len(self.config[k]['coefficients'][coeff]['value'])-1:
temp+=r" + "
else:
temp += r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'],\
self.config[k]['coefficients'][coeff]['fixed'])
temp+=r"&"
else:
temp += r" & & "
elif i == len(self.fits)-1 and isub!=len(coeffs_organized[group])-1:
if coeff in self.config[k]['coefficients']:
if self.config[k]['coefficients'][coeff]['fixed'] is False:
temp += r"\checkmark & \\ \cline{2-%d}"%(2+2*len(self.fits))
elif self.config[k]['coefficients'][coeff]['fixed'] is True:
temp += r" & = %d \\ \cline{2-%d}"%(self.config[k]['coefficients'][coeff]['value'],\
2+2*len(self.fits))
else:
temp+= r" & = "
if isinstance(self.config[k]['coefficients'][coeff]['value'],Iterable):
for i in range(len(self.config[k]['coefficients'][coeff]['value'])):
temp+=r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'][i],\
self.config[k]['coefficients'][coeff]['fixed'][i])
if i!=len(self.config[k]['coefficients'][coeff]['value'])-1:
temp+=r" + "
else:
temp += r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'],\
self.config[k]['coefficients'][coeff]['fixed'])
temp+=r"\\ \cline{2-%d}"%(2+2*len(self.fits))
else:
temp += r" & \\ \cline{2-%d}"%(2+2*len(self.fits))
elif i==len(self.fits)-1 and isub==len(coeffs_organized[group])-1:
if coeff in self.config[k]['coefficients']:
if self.config[k]['coefficients'][coeff]['fixed'] is False:
temp += r"\checkmark & \\ \hline"
elif self.config[k]['coefficients'][coeff]['fixed'] is True:
temp += r" & = %d \\ \hline"%(self.config[k]['coefficients'][coeff]['value'])
else:
temp+= r" & = "
if isinstance(self.config[k]['coefficients'][coeff]['value'],Iterable):
for i in range(len(self.config[k]['coefficients'][coeff]['fixed'])):
temp+=r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'][i],\
self.config[k]['coefficients'][coeff]['fixed'][i])
if i!=len(self.config[k]['coefficients'][coeff]['value'])-1:
temp+=r" + "
else:
temp += r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'],\
self.config[k]['coefficients'][coeff]['fixed'])
temp += r"\\ \hline"
else:
temp += r" & \\ \hline"
L.append(temp)
L.append(r"\end{tabular}")
L.append(r"\caption{Coefficient comparison.}")
L.append(r"\end{table}")
self.run_pdflatex(L, 'summary' )
[docs] def write_chi2_table(self):
"""
Computes and writes the table of chi2 values for each
fit included in the report.
Uses data_references.yaml and data_groups.yaml YAML files. The former
gives the references used to add a hyperlink while the latter groups
the datasets in a logical way.
"""
# Load data YAML file
yaml_file = f'{self.path}/smefit/analyze/data_references.yaml'
with open(yaml_file) as f:
arxiv = yaml.safe_load(f)
# Load fisher YAML file
yaml_file = f'{self.path}/smefit/analyze/fisher_groups.yaml'
with open(yaml_file) as f:
data_groups = yaml.safe_load(f)
self.chi2 = {}
self.chi2_sm = {}
self.ndat = {}
self.chi2_total = {}
self.chi2_rep = {}
self.chi2_total_rep = {}
self.ndat_total ={}
for k in self.fits:
self.chi2[k]={}
self.chi2_sm[k]={}
self.ndat[k]={}
self.chi2_rep[k]={}
self.chi2_total[k]=0
self.chi2_total_rep[k]=[]
self.ndat_total[k]=0
dat = self.dataset[k].Commondata
diff = dat + self.dataset[k].Noise - self.mean_theory[k] # Noise is only necessary for closure test chi2
diff_sm = dat - self.sm_theory[k]
covmat_inv = np.linalg.inv(self.dataset[k].CovMat)
covmat_diff = np.einsum('ij,j->i',covmat_inv, diff)
covmat_diff_sm = np.einsum('ij,j->i',covmat_inv, diff_sm)
# Compute chi2 by replica
for irep in range(len(self.theory[k])):
diff_rep = dat + self.dataset[k].Noise - self.theory[k][irep]
covmat_diff_rep = np.einsum('ij,j->i',covmat_inv,diff_rep)
cnt=0
chi2_temp = 0.
for i in range(len(self.dataset[k].ExpNames)):
set_name=self.dataset[k].ExpNames[i]
ndat_exp=self.dataset[k].NdataExp[i]
if set_name not in self.chi2_rep[k]:
self.chi2_rep[k][set_name] = []
self.chi2_rep[k][set_name].append(np.einsum('i,i->',diff_rep[cnt:cnt+ndat_exp],covmat_diff_rep[cnt:cnt+ndat_exp]))
chi2_temp += np.einsum('i,i->',diff_rep[cnt:cnt+ndat_exp],covmat_diff_rep[cnt:cnt+ndat_exp])
cnt+=ndat_exp
self.chi2_total_rep[k].append(chi2_temp)
cnt=0
# Compute Pearson chi2 (<T>) per experiment
for i in range(len(self.dataset[k].ExpNames)):
set_name=self.dataset[k].ExpNames[i]
ndat_exp=self.dataset[k].NdataExp[i]
self.chi2[k][set_name] = np.einsum('i,i->',diff[cnt:cnt+ndat_exp],covmat_diff[cnt:cnt+ndat_exp])
self.chi2_sm[k][set_name] = np.einsum('i,i->',diff_sm[cnt:cnt+ndat_exp],covmat_diff_sm[cnt:cnt+ndat_exp])
self.ndat[k][set_name] = ndat_exp
self.chi2_total[k]+=self.chi2[k][set_name]
self.ndat_total[k]+=ndat_exp
cnt+=ndat_exp
print('Finished!')
Entry={}
Ndats={}
data_list=[]
# Loop over results
chi2_sm_total={}
ndat_sm_total={}
for k in self.fits:
chi2_sm_total[k]=0
ndat_sm_total[k]=0
Entry[k]={}
for group in data_groups:
for Dataset in data_groups[group]:
if Dataset not in self.chi2[k]:
continue
Entry[k][Dataset]={}
Ndat = self.ndat[k][Dataset]
Entry[k][Dataset]["Dataset"] = Dataset
Entry[k][Dataset]["Ndat"] = Ndat
Entry[k][Dataset]["Chi2"] = str(round(self.chi2[k][Dataset], 4))
Entry[k][Dataset]["Chi2/Ndat"] = str(round(self.chi2[k][Dataset]/Ndat, 4))
chi2_std = np.std(self.chi2_rep[k][Dataset],axis=0)
chi2_upper = (self.chi2[k][Dataset]+chi2_std)
chi2_lower = (self.chi2[k][Dataset]-chi2_std)
if self.chi2_sm[k][Dataset] > chi2_upper:
Entry[k][Dataset]["Color"] = "blue"
elif self.chi2_sm[k][Dataset] < chi2_lower:
Entry[k][Dataset]["Color"] = "red"
else:
Entry[k][Dataset]["Color"] = "black"
chi2_sm_total[k] += self.chi2_sm[k][Dataset]
ndat_sm_total[k] += Ndat
Ndats[Dataset] = Ndat
if Dataset not in data_list:
data_list.append(Dataset)
Entry[Dataset]={}
Entry[Dataset]["SM_Chi2"] = str(round(self.chi2_sm[k][Dataset], 4))
Entry[Dataset]["SM_Chi2/Ndat"] = str(round(self.chi2_sm[k][Dataset]/Ndat, 4))
if group not in Entry:
Entry[group]={}
Entry[group]['SM_Chi2'] = self.chi2_sm[k][Dataset]
Entry[group]['Ndat'] = Ndat
else:
Entry[group]['SM_Chi2'] += self.chi2_sm[k][Dataset]
Entry[group]['Ndat'] += Ndat
if group not in Entry[k]:
Entry[k][group]={}
Entry[k][group]['Chi2'] = self.chi2[k][Dataset]
Entry[k][group]['Ndat'] = Ndat
else:
Entry[k][group]['Chi2'] += self.chi2[k][Dataset]
Entry[k][group]['Ndat'] += Ndat
Entry[k]["total_Ndat"] = self.ndat_total[k]
Entry[k]["total_chi2"] = str(round(self.chi2_total[k], 4))
Entry[k]["total_chi2/Ndat"] = str(round(self.chi2_total[k]/self.ndat_total[k], 4))
Entry[k]['total_sm_chi2/Ndat'] = str(round(chi2_sm_total[k]/ndat_sm_total[k], 4))
for k in self.fits:
for group in data_groups:
if group in Entry[k]:
Entry[k][group]['Chi2/Ndat'] = str(round(Entry[k][group]['Chi2']/Entry[k][group]['Ndat'], 4))
if group in Entry:
Entry[group]['SM_Chi2/Ndat'] = str(round(Entry[group]['SM_Chi2']/Entry[group]['Ndat'], 4))
# Writing Chi2 table
# ================ Chi2 latex table =====================
L = latex_packages()
L.append(r"\usepackage{underscore}")
L.append(r"\begin{document}")
# Chi2 table
for ig,group in enumerate(self.data_groups):
L.append(r"\begin{table}[H]")
L.append(r"\centering")
temp = r"\begin{tabular}{|l|c|c|"
for i in range(len(self.fits)):
temp += "C{2cm}|"
temp+="}"
L.append(temp)
L.append(r"\hline")
temp = r" \multicolumn{2}{|c|}{} & SM &"
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += r"Fit%d & "%(i+1)
else:
temp += r"Fit%d \\ \hline"%(i+1)
L.append(temp)
temp = r'Dataset & $N_{\rm data}$ & $\chi^ 2/N_{\rm data}$ &'
for i in range(len(self.fits)):
if i != len(self.fits)-1:
temp+=r'$\chi^ 2/N_{data}$ &'
else:
temp += r'$\chi^ 2/N_{data}$ \\ \hline'
L.append(temp)
for isub,Dataset in enumerate(self.data_groups[group]):
if np.all([Dataset not in self.config[k]['datasets'] for k in self.config]):
continue
temp = r""
temp += "\href{"+arxiv[Dataset]+"}{"+Dataset+"} & "+str(Ndats[Dataset])+" & "+Entry[Dataset]["SM_Chi2/Ndat"]+" & "
for i,k in enumerate(self.fits):
if i != len(self.chi2.keys())-1:
if Dataset in Entry[k].keys():
temp += "{\color{"+Entry[k][Dataset]["Color"]+"} "+Entry[k][Dataset]["Chi2/Ndat"]+"} & "
else:
temp += " & "
else:
if Dataset in Entry[k].keys():
temp += "{\color{"+Entry[k][Dataset]["Color"]+"} "+Entry[k][Dataset]["Chi2/Ndat"]+ r"} \\ \hline"
else:
temp += r" \\ \hline"
if ig==len(self.data_groups[group])-1 and isub == len(self.data_groups[group])-1:
temp += r" \hline"
L.append(temp)
if ig==len(self.data_groups[group])-1:
temp = r"Total & & & "
for i, k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+") & "
else:
temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+")"+r" \\ \hline"
L.append(temp)
L.append(r"\end{tabular}")
if ig==len(self.data_groups[group])-1:
L.append(r"\caption{$\chi^2$ table. In parenthesis is the total SM $\chi^2$ for the dataset included in the fit. Blue color text represents a value that is lower than the SM $\chi^2$ by more than one standard deviation of the $\chi^2$ distribution. Similarly, red color text represents values that are higher than the SM $\chi^2$ by more than one standard deviation.}")
L.append(r"\end{table}")
# Chi2 table for grouped data
L.append('\n')
L.append('\n')
L.append(r"\begin{table}[H]")
L.append(r"\centering")
temp = r"\begin{tabular}{|l|c|c|"
for i in range(len(self.fits)):
temp += "C{2cm}|"
temp+="}"
L.append(temp)
L.append(r"\hline")
temp = r" \multicolumn{2}{|c|}{} & SM &"
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += r"Fit%d & "%(i+1)
else:
temp += r"Fit%d \\ \hline"%(i+1)
L.append(temp)
temp = r'Process & $N_{\rm data}$ & $\chi^ 2/N_{\rm data}$ &'
for i in range(len(self.fits)):
if i != len(self.fits)-1:
temp+=r'$\chi^ 2/N_{data}$ &'
else:
temp += r'$\chi^ 2/N_{data}$ \\ \hline'
L.append(temp)
for ig,group in enumerate(data_groups):
if group not in Entry:
continue
temp = r""
temp += group+" & "+str(Entry[group]['Ndat'])+" & "+Entry[group]["SM_Chi2/Ndat"]+" & "
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
if group in Entry[k].keys():
temp += Entry[k][group]["Chi2/Ndat"]+" & "
else:
temp += " & "
else:
if group in Entry[k].keys():
temp += Entry[k][group]["Chi2/Ndat"]+ r" \\ \hline"
else:
temp += r" \\ \hline"
if ig == len(data_groups)-1:
temp += r" \hline"
L.append(temp)
temp = r"Total & & & "
for i, k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+") & "
else:
temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+")"+r" \\ \hline"
L.append(temp)
L.append(r"\end{tabular}")
L.append(r"\caption{$\chi^2$ table for grouped data}")
L.append(r"\end{table}")
self.run_pdflatex(L, 'chi2' )
[docs] def write_Fisher_table(self):
"""
Computes and writes the Fisher information table, and plots heat map.
Uses coeff_groups.yaml and fisher_groups.yaml YAML files to organize
the parameters and datasets in a meaningful way.
Linear Fisher information depends only on the theoretical corrections
(kappas), while quadratic information requires fit results.
Therefore, do not trust numbers in parenthesis (quadratic information)
if a quadratic fit is not included in the report.
Parameter contraints are also taken into account. Only fitted
degrees of freedom are shown in the table
Note: special treatment is done with cW and cB, degrees
of freedom that are included at the fit level, but not at the presentation
level. In this case, cW and cB are re-expressed as cpWB and cpD.
"""
corrections_val={}
corrections_dict={}
HOcorrections_val={}
HOcorrections_dict={}
theory_reps = {}
data_exp = {}
#Load coefficient yaml card
yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml'
with open(yaml_file) as f:
coeff_config = yaml.safe_load(f)
# Get organized coefficient list
coeff_org = {}
coeff_org_fixed = {}
for group in coeff_config.keys():
if group not in coeff_org:
coeff_org[group]=[]
coeff_org_fixed[group]=[]
for c in coeff_config[group]:
if c=='OPff':
c='Off'
if c not in coeff_org[group] and\
np.any([c in self.config[k]['coefficients'] for k in self.fits]) and\
np.any([self.config[k]['coefficients'][c]['fixed'] is False for k in self.fits if c in self.config[k]['coefficients']]):
coeff_org[group].append(c)
# Add correction factors from fixed parameters (i.e. impose indirect constraints)
new_coeff_dict = {}
new_HOcoeff_dict = {}
for k in self.fits:
fixed_params = []
for j,c in enumerate(self.dataset[k].CorrectionsDICT):
if self.config[k]['coefficients'][c]['fixed'] is False:
continue
elif self.config[k]['coefficients'][c]['fixed'] is not True:
if isinstance(self.config[k]['coefficients'][c]['value'],Iterable):
for m,coeff in enumerate(self.config[k]['coefficients'][c]['fixed']):
idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0]
corr = float(self.config[k]['coefficients'][c]['value'][m])
self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr
else:
coeff = self.config[k]['coefficients'][c]['fixed']
idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0]
corr = float(self.config[k]['coefficients'][c]['value'])
self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr
elif self.config[k]['coefficients'][c]['fixed'] is True:
corr = float(self.config[k]['coefficients'][c]['value'])
self.dataset[k].CorrectionsVAL.T[j]*=corr
if self.config[k]['coefficients'][c]['fixed'] is not False:
self.dataset[k].CorrectionsVAL.T[j]*=0.
fixed_params.append(j)
# Replace OW and OB with OpWB and OpD
if 'OW' in self.dataset[k].CorrectionsDICT and 'OB' in self.dataset[k].CorrectionsDICT:
OW_idx = np.where(self.dataset[k].CorrectionsDICT=='OW')[0][0]
OB_idx = np.where(self.dataset[k].CorrectionsDICT=='OB')[0][0]
OW_vals = self.dataset[k].CorrectionsVAL.T[OW_idx]
OB_vals = self.dataset[k].CorrectionsVAL.T[OB_idx]
self.dataset[k].CorrectionsVAL.T[OW_idx] = -(1./.088625)*OW_vals
self.dataset[k].CorrectionsVAL.T[OB_idx] = (1./.3545)*OB_vals - (1./.088625)*(.1626)*(1./.3545)*OW_vals
new_coeff_dict[k] = np.delete(self.dataset[k].CorrectionsDICT,[j for j in fixed_params],0)
# Need to do the same for O(Lambda^-4) corrections
fixed_HOparams = []
for j,c in enumerate(self.dataset[k].HOcorrectionsDICT):
c1 = c.split('*')[0]
c2 = c.split('*')[1]
if self.config[k]['coefficients'][c1]['fixed'] is False and self.config[k]['coefficients'][c2]['fixed'] is False:
continue
# Both parameters fixed
elif isinstance(self.config[k]['coefficients'][c1]['fixed'],bool) is False\
and isinstance(self.config[k]['coefficients'][c2]['fixed'],bool) is False:
if isinstance(self.config[k]['coefficients'][c1]['value'],Iterable)\
and isinstance(self.config[k]['coefficients'][c2]['value'],Iterable):
for m1,coeff1 in enumerate(self.config[k]['coefficients'][c1]['fixed']):
for m2,coeff2 in enumerate(self.config[k]['coefficients'][c2]['fixed']):
if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0]
elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0]
corr1 = float(self.config[k]['coefficients'][c1]['value'][m1])
corr2 = float(self.config[k]['coefficients'][c2]['value'][m2])
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2
elif isinstance(self.config[k]['coefficients'][c1]['value'],Iterable):
coeff2 = self.config[k]['coefficients'][c2]['fixed']
corr2 = float(self.config[k]['coefficients'][c2]['value'])
for m1,coeff1 in enumerate(self.config[k]['coefficients'][c1]['fixed']):
if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0]
elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0]
corr1 = float(self.config[k]['coefficients'][c1]['value'][m1])
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2
elif isinstance(self.config[k]['coefficients'][c2]['value'],Iterable):
coeff1 = self.config[k]['coefficients'][c1]['fixed']
corr1 = float(self.config[k]['coefficients'][c1]['value'])
for m2,coeff2 in enumerate(self.config[k]['coefficients'][c2]['fixed']):
if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0]
elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0]
corr2 = float(self.config[k]['coefficients'][c2]['value'][m2])
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2
else:
coeff1 = self.config[k]['coefficients'][c1]['fixed']
coeff2 = self.config[k]['coefficients'][c2]['fixed']
if coeff1 is False or coeff2 is False:
continue
if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0]
elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0]
corr1 = float(self.config[k]['coefficients'][c1]['value'])
corr2 = float(self.config[k]['coefficients'][c2]['value'])
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2
fixed_HOparams.append(j)
# One parameter fixed, the other free
else:
if isinstance(self.config[k]['coefficients'][c1]['fixed'],bool) is False:
if isinstance(self.config[k]['coefficients'][c1]['value'],Iterable):
for m,coeff in enumerate(self.config[k]['coefficients'][c1]['fixed']):
corr = float(self.config[k]['coefficients'][c1]['value'][m])
if coeff+'*'+c2 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff+'*'+c2)[0][0]
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr
else:
self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,coeff+'*'+c2)
self.dataset[k].HOcorrectionsVAL.T[j]*=corr
else:
coeff = self.config[k]['coefficients'][c1]['fixed']
corr = float(self.config[k]['coefficients'][c1]['value'])
if coeff+'*'+c2 in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff+'*'+c2)[0][0]
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr
else:
self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,coeff+'*'+c2)
self.dataset[k].HOcorrectionsVAL.T[j]*=corr
if isinstance(self.config[k]['coefficients'][c2]['fixed'],bool) is False:
if isinstance(self.config[k]['coefficients'][c2]['value'],Iterable):
for m,coeff in enumerate(self.config[k]['coefficients'][c2]['fixed']):
corr = float(self.config[k]['coefficients'][c2]['value'][m])
if c1+'*'+coeff in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==c1+'*'+coeff)[0][0]
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr
else:
self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,c1+'*'+coeff)
self.dataset[k].HOcorrectionsVAL.T[j]*=corr
else:
coeff = self.config[k]['coefficients'][c2]['fixed']
corr = float(self.config[k]['coefficients'][c2]['value'])
if c1+'*'+coeff in self.dataset[k].HOcorrectionsDICT:
idx = np.where(self.dataset[k].HOcorrectionsDICT==c1+'*'+coeff)[0][0]
self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr
else:
self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,c1+'*'+coeff)
self.dataset[k].HOcorrectionsVAL.T[j]*=corr
# Replace OW*OW and OB*OB with OpWB*OpWB and OpD*OpD
if 'OW' in self.dataset[k].CorrectionsDICT and 'OB' in self.dataset[k].CorrectionsDICT:
OW_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OW*OW')[0][0]
OB_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OB*OB')[0][0]
if 'OW*OB' in self.dataset[k].HOcorrectionsDICT:
OWOB_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OW*OB')[0][0]
elif 'OB*OW' in self.dataset[k].HOcorrectionsDICT:
OWOB_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OB*OW')[0][0]
OW_vals = self.dataset[k].HOcorrectionsVAL.T[OW_idx]
OB_vals = self.dataset[k].HOcorrectionsVAL.T[OB_idx]
OWOB_vals = self.dataset[k].HOcorrectionsVAL.T[OWOB_idx]
self.dataset[k].HOcorrectionsVAL.T[OW_idx] = (1./0.088625)**2*OW_vals
self.dataset[k].HOcorrectionsVAL.T[OB_idx] = (1./.3545)**2*OB_vals + (1./.088625)**2*(.1626)**2*(1./.3545)**2*OW_vals\
- (1./0.088625)*(0.1626)*(1./.3545)**2*OWOB_vals
self.dataset[k].HOcorrectionsVAL.T[OWOB_idx] = -(1./0.088625)*(1./.3545)*OWOB_vals + (1./0.088625)**2*(0.1626)*(1./.3545)*OW_vals
new_HOcoeff_dict[k] = np.delete(self.dataset[k].HOcorrectionsDICT,[j for j in fixed_HOparams],0)
# Get theory and data by experiment
for k in self.fits:
theory_reps[k]={}
data_exp[k]={}
corrections_val[k]={}
corrections_dict[k]={}
HOcorrections_val[k]={}
HOcorrections_dict[k]={}
# Construct theory arrays per experiment
cnt=0
for i in range(len(self.dataset[k].ExpNames)):
set_name=self.dataset[k].ExpNames[i]
ndat=self.dataset[k].NdataExp[i]
if set_name not in corrections_val[k]:
corrections_val[k][set_name] = self.dataset[k].CorrectionsVAL[cnt:cnt+ndat]
if set_name not in HOcorrections_val[k]:
HOcorrections_val[k][set_name] = self.dataset[k].HOcorrectionsVAL[cnt:cnt+ndat]
if set_name not in corrections_dict[k]:
corrections_dict[k][set_name] = self.dataset[k].CorrectionsDICT
if set_name not in HOcorrections_dict[k]:
HOcorrections_dict[k][set_name] = self.dataset[k].HOcorrectionsDICT
theory_reps[k][set_name] = np.array(self.theory[k]).T[cnt:cnt+ndat]
data_exp[k][set_name] = self.dataset[k].Commondata[cnt:cnt+ndat]
cnt+=ndat
datalines={}
systypes={}
# Read data and SM theory files
total_error={}
for dataset in self.data_list:
datalines[dataset], systypes[dataset] = self.read_dataset(dataset)
# Gather data from data tables
x_vals = []
central_data = []
stat_error = []
sys_errors = []
for line in datalines[dataset]:
ncols = len(line.split())
if ncols==3:
ndata = int(line.split()[-1])
if ncols>3:
x_vals.append(float(line.split()[2]))
central_data.append(float(line.split()[5]))
stat_error.append(float(line.split()[6]))
sys_errors.append([float(line.split()[i]) for i in range(7,ncols)])
add_corr_index=[]
mult_corr_index=[]
add_uncorr_index=[]
mult_uncorr_index=[]
for i in range(1,len(systypes[dataset])):
if systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]=='UNCORR':
add_uncorr_index.append(int(systypes[dataset][i].split()[0]))
elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]=='UNCORR':
mult_uncorr_index.append(int(systypes[dataset][i].split()[0]))
elif systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]!='UNCORR':
add_corr_index.append(int(systypes[dataset][i].split()[0]))
elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]!='UNCORR':
mult_corr_index.append(int(systypes[dataset][i].split()[0]))
add_uncorr_index = 2*np.array(add_uncorr_index)-2
mult_uncorr_index = 2*np.array(mult_uncorr_index)-1
add_corr_index = 2*np.array(add_corr_index)-2
mult_corr_index = 2*np.array(mult_corr_index)-1
# corr_index = np.concatenate([add_corr_index,mult_corr_index],0)
# corr_index_sorted = np.argsort(corr_index)
# Compute total uncorrelated uncertainty
total_uncorr=[]
for i in range(ndata):
err=0
err += stat_error[i]**2
if len(add_uncorr_index)>0:
add_uncorr_error = np.array(sys_errors)[i][add_uncorr_index]
err += np.sum(add_uncorr_error**2)
if len(mult_uncorr_index)>0:
mult_uncorr_error = np.array(sys_errors)[i][mult_uncorr_index]*np.array(central_data)[i]/100.
err += np.sum(mult_uncorr_error**2)
if len(add_corr_index)>0:
add_corr_error = np.array(sys_errors)[i][add_corr_index]
err += np.sum(add_corr_error**2)
if len(mult_corr_index)>0:
mult_corr_error = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100.
err += np.sum(mult_corr_error**2)
total_uncorr.append(np.sqrt(err))
total_error[dataset] = np.array(total_uncorr)
# Load fisher YAML file
yaml_file = f'{self.path}/smefit/analyze/fisher_groups.yaml'
with open(yaml_file) as f:
data_groups = yaml.safe_load(f)
# Compute linear Fisher information
fisher = {}
norm_coeff = {}
norm_data = {}
for k in self.fits:
if k not in list(fisher.keys()):
fisher[k],norm_coeff[k],norm_data[k]={},{},{}
for d in data_groups:
if np.all([dataset not in self.dataset[k].ExpNames for dataset in data_groups[d]]):
continue
if d not in list(fisher.keys()):
fisher[k][d] = {}
if d not in list(norm_data[k].keys()):
norm_data[k][d] = 0.
for dataset in data_groups[d]:
if dataset not in self.dataset[k].ExpNames:
continue
for i,c in enumerate(corrections_dict[k][dataset]):
if c not in new_coeff_dict[k]:
continue
if c=='OW':
c='OpWB'
elif c=='OB':
c='OpD'
if c not in list(fisher[k][d].keys()):
fisher[k][d][c] = 0
if c not in list(norm_coeff[k].keys()):
norm_coeff[k][c] = 0
val = corrections_val[k][dataset].T[i]**2/total_error[dataset]**2
norm_coeff[k][c] += np.sum(val)
norm_data[k][d] += np.sum(val)
fisher[k][d][c] += np.sum(val)
# TODO: prevent computing quadratic fisher when all fits are NHO.
# Compute quadratic Fisher information
print('Computing quadratic Fisher information...this may take a few minutes...')
quad_fisher = {}
quad_norm_coeff = {}
quad_norm_data = {}
for k in self.fits:
if k not in quad_fisher:
quad_fisher[k],quad_norm_coeff[k],quad_norm_data[k] = {},{},{}
nrep = len(self.full_coeffs[k])
for group in data_groups:
if np.all([dataset not in self.dataset[k].ExpNames for dataset in data_groups[group]]):
continue
if group not in quad_fisher[k]:
quad_fisher[k][group] = {}
if group not in quad_norm_data[k]:
quad_norm_data[k][group] = np.zeros(nrep)
for dataset in data_groups[group]:
if dataset not in self.dataset[k].ExpNames:
continue
ndata = len(data_exp[k][dataset])
for i,c in enumerate(corrections_dict[k][dataset]):
if c not in new_coeff_dict[k]:
continue
if c not in self.full_labels[k]:
continue
if c=='OW':
coeff='OpWB'
elif c=='OB':
coeff='OpD'
else:
coeff = c
if coeff not in quad_fisher[k][group]:
quad_fisher[k][group][coeff] = np.zeros(nrep)
if coeff not in quad_norm_coeff[k]:
quad_norm_coeff[k][coeff] = np.zeros(nrep)
idx = np.where(self.full_labels[k]==c)[0][0]
idx_quad = np.where(HOcorrections_dict[k][dataset]==c+'*'+c)[0][0]
dat = np.tile(data_exp[k][dataset],(nrep,1))
diff = theory_reps[k][dataset].T - dat
term1 = np.einsum('ij,j,j->ij',diff,HOcorrections_val[k][dataset].T[idx_quad],1./(total_error[dataset]**2))
term2 = np.tile(corrections_val[k][dataset].T[i],(nrep,1))
for j,c2 in enumerate(corrections_dict[k][dataset]):
if c2 not in new_coeff_dict[k]:
continue
if c2 not in self.full_labels[k]:
continue
if c+'*'+c2 not in new_HOcoeff_dict[k]\
and c2+'*'+c not in new_HOcoeff_dict[k]:
continue
if c+'*'+c2 in HOcorrections_dict[k][dataset]:
idx2 = np.where(c+'*'+c2==HOcorrections_dict[k][dataset])[0][0]
term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2])
elif c2+'*'+c in HOcorrections_dict[k][dataset] and c2!=c:
idx2 = np.where(c2+'*'+c==HOcorrections_dict[k][dataset])[0][0]
term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2])
term2 = np.einsum('ij,j->ij',term2**2,1./total_error[dataset]**2)
quad_fisher[k][group][coeff] += np.sum(term1+term2,axis=1)
quad_norm_data[k][group] += np.sum(term1+term2,axis=1)
quad_norm_coeff[k][coeff] += np.sum(term1+term2,axis=1)
# Compute means
for k in self.fits:
for c in quad_norm_coeff[k]:
quad_norm_coeff[k][c] = np.mean(quad_norm_coeff[k][c])
for k in self.fits:
for d in data_groups:
if d not in quad_fisher[k]:
continue
quad_norm_data[k][d] = np.mean(quad_norm_data[k][d])
for c in self.dataset[k].CorrectionsDICT:
if c=='OW':
c='OpWB'
elif c=='OB':
c='OpD'
if c not in quad_fisher[k][d]:
continue
quad_fisher[k][d][c] = np.mean(quad_fisher[k][d][c])
# Writing Fisher table
# ================ Fisher latex table =====================
L = latex_packages()
L.append(r"\begin{document}")
# Uses quadratic fit if included in report, otherwise computes
# Fisher with linear fit
for k in self.fits:
if '_HO' in k:
fit = k
break
else:
fit = self.fits[0]
fisher_matrix = np.zeros((len(new_coeff_dict[fit]),len(fisher[fit].keys())))
quad_fisher_matrix = np.zeros((len(new_coeff_dict[fit]),len(fisher[fit].keys())))
# QUAD Fisher table 1
L.append(r"\begin{landscape}")
L.append(r"\begin{table}[H]")
L.append(r"\tiny")
L.append(r"\centering")
temp = r"\begin{tabular}{|c|c|"
for i in range(len(fisher[fit].keys())):
temp += "c|"
temp+="}"
L.append(temp)
L.append(r"\hline")
L.append(r" \multicolumn{2}{|c|}{} & \multicolumn{%d}{c|}{Processes} \\ \hline"%len(fisher[fit].keys()))
temp = r' Class & Coefficient & '
for i in range(len(fisher[fit].keys())):
if i != len(fisher[fit].keys())-1:
temp+=r'${\rm %s}$ &'%list(fisher[fit].keys())[i]
else:
temp += r'${\rm %s}$ \\ \hline'%list(fisher[fit].keys())[i]
L.append(temp)
ic=0
ordered_coeffs = []
for group in coeff_org.keys():
if len(coeff_org[group])==0:
continue
L.append('\multirow{%d}{*}{%s}'%(len(coeff_org[group]),group))
for isub,coeff in enumerate(coeff_org[group]):
if coeff not in new_coeff_dict[fit]:
continue
idx = np.where(coeff==new_coeff_dict[fit])[0][0]
ordered_coeffs.append(idx)
if coeff=='OW':
coeff='OpWB'
elif coeff=='OB':
coeff='OpD'
temp = r" & {\rm "+ coeff.replace('O','c') + r"}"
idj = 0
for i,d in enumerate(fisher[fit].keys()):
if d in fisher[fit] and coeff in fisher[fit][d] and fisher[fit][d][coeff]!=0:
value = fisher[fit][d][coeff]/norm_coeff[fit][coeff]*100
if value > 10:
temp += r" & {\color{blue} %0.2f}"%(value)
else:
temp += r" & %0.2f"%(value)
else:
temp += r" & $\times$"
if d in quad_fisher[fit] and coeff in quad_fisher[fit][d] and quad_fisher[fit][d][coeff]!=0:
value = quad_fisher[fit][d][coeff]/quad_norm_coeff[fit][coeff]*100.
if value > 10:
temp += r"{\color{blue} (%0.2f)}"%(value)
else:
temp += r"(%0.1f) "%(value)
else:
temp += r"($\times$) "
if i == len(fisher[fit].keys())-1 and isub!=len(coeff_org[group])-1:
temp += r" \\ \cline{2-%d}"%(2+len(fisher[fit].keys()))
elif i==len(fisher[fit].keys())-1 and isub==len(coeff_org[group])-1:
temp += r"\\ \hline"
if d in fisher[fit] and coeff in fisher[fit][d] and fisher[fit][d][coeff]!=0:
fisher_matrix[ic][idj] = fisher[fit][d][coeff]/norm_coeff[fit][coeff]*100.
else:
fisher_matrix[ic][idj] = 0.
if d in quad_fisher[fit] and coeff in quad_fisher[fit][d]:
quad_fisher_matrix[ic][idj] = quad_fisher[fit][d][coeff]/quad_norm_coeff[fit][coeff]*100.
else:
quad_fisher_matrix[ic][idj] = 0.
idj += 1
ic+=1
L.append(temp)
L.append(r"\end{tabular}")
L.append(r"\caption{Fisher information normalized per coefficient}")
L.append(r"\end{table}")
L.append(r"\end{landscape}")
# Uncomment to compute fisher table per dataset
# # Compute linear Fisher information per datset
# fisher_d = {}
# norm_coeff_d = {}
# norm_data_d = {}
# for k in self.fits:
# if k not in fisher_d:
# fisher_d[k],norm_coeff_d[k],norm_data_d[k]={},{},{}
# for dataset in self.dataset[k].ExpNames:
# if dataset not in fisher_d:
# fisher_d[k][dataset] = {}
# if dataset not in norm_data_d[k]:
# norm_data_d[k][dataset] = 0.
# for i,c in enumerate(corrections_dict[k][dataset]):
# if c not in new_coeff_dict[k]:
# continue
# if c=='OW':
# c='OpWB'
# elif c=='OB':
# c='OpD'
# if c not in list(fisher_d[k][dataset].keys()):
# fisher_d[k][dataset][c] = 0
# if c not in list(norm_coeff_d[k].keys()):
# norm_coeff_d[k][c] = 0
# val = corrections_val[k][dataset].T[i]**2/total_error[dataset]**2
# norm_coeff_d[k][c] += np.sum(val)
# norm_data_d[k][dataset] += np.sum(val)
# fisher_d[k][dataset][c] = np.sum(val)
# quad_fisher_d = {}
# quad_norm_coeff_d = {}
# quad_norm_data_d = {}
# # Compute quadratic Fisher information per datset
# bar = Bar(r'Computing quadratic Fisher information per dataset', max=len(self.dataset[k].ExpNames))
# k = self.fits[0]
# if k not in quad_fisher_d:
# quad_fisher_d[k],quad_norm_coeff_d[k],quad_norm_data_d[k] = {},{},{}
# nrep = len(self.full_coeffs[k])
# for dataset in self.dataset[k].ExpNames:
# if dataset not in quad_fisher_d[k]:
# quad_fisher_d[k][dataset] = {}
# if dataset not in quad_norm_data_d[k]:
# quad_norm_data_d[k][dataset] = np.zeros(nrep)
# ndata = len(data_exp[k][dataset])
# for i,c in enumerate(corrections_dict[k][dataset]):
# if c not in new_coeff_dict[k]:
# continue
# if c not in self.full_labels[k]:
# continue
# if c=='OW':
# coeff='OpWB'
# elif c=='OB':
# coeff='OpD'
# else:
# coeff = c
# if coeff not in quad_fisher_d[k][dataset]:
# quad_fisher_d[k][dataset][coeff] = np.zeros(nrep)
# if coeff not in quad_norm_coeff_d[k]:
# quad_norm_coeff_d[k][coeff] = np.zeros(nrep)
# idx = np.where(self.full_labels[k]==c)[0][0]
# idx_quad = np.where(HOcorrections_dict[k][dataset]==c+'*'+c)[0][0]
# dat = np.tile(data_exp[k][dataset],(nrep,1))
# diff = theory_reps[k][dataset].T - dat
# term1 = np.einsum('ij,j,j->ij',diff,HOcorrections_val[k][dataset].T[idx_quad],1./(total_error[dataset]**2))
# term2 = np.tile(corrections_val[k][dataset].T[i],(nrep,1))
# for j,c2 in enumerate(corrections_dict[k][dataset]):
# if c2 not in new_coeff_dict[k]:
# continue
# if c2 not in self.full_labels[k]:
# continue
# if c+'*'+c2 not in new_HOcoeff_dict[k]\
# and c2+'*'+c not in new_HOcoeff_dict[k]:
# continue
# if c+'*'+c2 in HOcorrections_dict[k][dataset]:
# idx2 = np.where(c+'*'+c2==HOcorrections_dict[k][dataset])[0][0]
# term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2])
# elif c2+'*'+c in HOcorrections_dict[k][dataset] and c2!=c:
# idx2 = np.where(c2+'*'+c==HOcorrections_dict[k][dataset])[0][0]
# term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2])
# term2 = np.einsum('ij,j->ij',term2**2,1./total_error[dataset]**2)
# quad_fisher_d[k][dataset][coeff] = np.sum(term1+term2,axis=1)
# quad_norm_data_d[k][dataset] += np.sum(term1+term2,axis=1)
# quad_norm_coeff_d[k][coeff] += np.sum(term1+term2,axis=1)
# bar.next()
# bar.finish()
# # Compute means
# for c in quad_norm_coeff_d[k]:
# quad_norm_coeff_d[k][c] = np.mean(quad_norm_coeff_d[k][c])
# for dataset in self.dataset[k].ExpNames:
# quad_norm_data_d[k][d] = np.mean(quad_norm_data_d[k][dataset])
# for c in self.dataset[k].CorrectionsDICT:
# if c=='OW':
# c='OpWB'
# elif c=='OB':
# c='OpD'
# if c not in quad_fisher_d[k][dataset]:
# continue
# quad_fisher_d[k][dataset][c] = np.mean(quad_fisher_d[k][dataset][c])
# # Tables Fisher per grouped dataset
# L.append(r"\begin{landscape}")
# fit = self.fits[0]
# for data_group, data_list in data_groups.items():
# L.append(r"\begin{table}[H]")
# L.append(r"\tiny")
# L.append(r"\centering")
# temp = r"\begin{tabular}{|c|c|"
# temp += "c|" * len(data_list)
# temp+="}"
# L.append(temp)
# L.append(r"\hline")
# temp = r" \multicolumn{2}{|c|}{} "
# for data in data_list:
# temp += r" & { \rm " + data.replace('_', r'\_' ) + "}"
# temp += r"\\ \hline "
# L.append(temp)
# # loop on coeffficients
# for coeff_group, coeff_list in coeff_org.items():
# L.append( r'\multirow{%d}{*}{%s}'%(len(coeff_list),coeff_group) )
# for i, coeff in enumerate(coeff_list):
# if coeff=='OW':
# coeff='OpWB'
# elif coeff=='OB':
# coeff='OpD'
# temp = r" & %s"%coeff
# for dataset in data_list:
# if dataset in fisher_d[fit] and coeff in fisher_d[fit][dataset] and fisher_d[fit][dataset][coeff]!=0:
# value = fisher_d[fit][dataset][coeff]/norm_coeff_d[fit][coeff]*100
# if value > 10:
# temp += r" & {\color{blue} %0.2f}"%(value)
# else:
# temp += r" & %0.2f"%(value)
# else:
# temp += r" & $\times$"
# if dataset in quad_fisher_d[fit] and coeff in quad_fisher_d[fit][dataset] and quad_fisher_d[fit][dataset][coeff]!=0:
# value = quad_fisher_d[fit][dataset][coeff]/quad_norm_coeff_d[fit][coeff]*100.
# if value > 10:
# temp += r"{\color{blue} (%0.2f)}"%(value)
# else:
# temp += r"(%0.2f)"%(value)
# else:
# temp += r"($\times$)"
# if i != len(coeff_list) -1:
# temp += r" \\ \cline{2-%d}"%(2+len(data_list))
# else:
# temp += r" \\ \hline"
# L.append(temp)
# L.append(r"\end{tabular}")
# L.append(r"\caption{Fisher information normalized per coefficient in %s datasets}" % data_group)
# L.append(r"\end{table}")
# L.append(r"\end{landscape}")
self.run_pdflatex(L, 'fisher')
# Heat Map of Fisher table
fig = py.figure(figsize=(10,10))
ax = fig.add_subplot(121)
colors = py.rcParams['axes.prop_cycle'].by_key()['color']
cmap = matplotlib.colors.ListedColormap(['lightgrey',colors[3], colors[2], colors[1], colors[0]])
bounds = [0, 10, 25, 50, 75, 100]
norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
cax = ax.matshow(fisher_matrix, vmin=0, vmax=100, cmap=cmap, norm=norm)
for i, row in enumerate(fisher_matrix):
for j, elem in enumerate(row):
c = elem
if c > 10:
ax.text(j, i, str(np.round(c,1)), va='center', ha='center',fontsize=8)
coeff_list = [c.replace('O','c') for c in list(new_coeff_dict[fit][np.array(ordered_coeffs)])]
temp_list = ['cpD' if c=='cB' else c for c in coeff_list]
new_coeff_list = ['cpWB' if c=='cW' else c for c in temp_list]
ticks1 = np.arange(0,len(fisher_matrix[0]),1)
ticks2 = np.arange(0,len(fisher_matrix),1)
ax.set_xticks(ticks1)
ax.set_yticks(ticks2)
ax.set_xticklabels(fisher[fit].keys())
ax.set_yticklabels(new_coeff_list)
ax.tick_params(axis='x',rotation=90,labelsize=15)
ax.tick_params(axis='y',labelsize=15)
ax.tick_params(which='major',labeltop=True,top=True,labelbottom=False,bottom=False)
ax = fig.add_subplot(122)
colors = py.rcParams['axes.prop_cycle'].by_key()['color']
cmap = matplotlib.colors.ListedColormap(['lightgrey',colors[3], colors[2], colors[1], colors[0]])
bounds = [0, 10, 25, 50, 75, 100]
norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
cax = ax.matshow(quad_fisher_matrix, vmin=0, vmax=100, cmap=cmap, norm=norm)
divider = make_axes_locatable(ax)
cax1 = divider.append_axes("right", size="5%", pad=0.1)
cbar = fig.colorbar(cax, cax=cax1)
cbar.set_label(r'${\rm Normalized\ Fisher\ Value}$',fontsize=25,labelpad=30,rotation=270)
cbar.ax.tick_params(labelsize=15)
for i, row in enumerate(quad_fisher_matrix):
for j, elem in enumerate(row):
c = elem
if c > 10:
ax.text(j, i, str(np.round(c,1)), va='center', ha='center',fontsize=8)
ticks1 = np.arange(0,len(fisher_matrix[0]),1)
ax.set_xticks(ticks1)
ax.set_xticklabels(fisher[fit].keys())
ax.tick_params(axis='x',rotation=90,labelsize=15)
ax.tick_params(which='major',labeltop=True,top=True,labelbottom=False,bottom=False,labelleft=False,left=False)
py.tight_layout()
py.savefig(f'reports/{self.outputname}/Fisher_heat.pdf')
[docs] def write_PCA_table(self):
"""
Computes and writes PCA table and heat map.
Uses coeff_groups.yaml YAML file to get organized list of parameters.
Note: matrix being decomposd by SVD are the kappas
divided by the total experimental error
Note: special treatment is done with cW and cB, degrees
of freedom that are included at the fit level, but not at the presentation
level. In this case, cW and cB are re-expressed as cpWB and cpD.
"""
corrections_val={}
corrections_dict={}
HOcorrections_val={}
HOcorrections_dict={}
theory_reps = {}
data_exp = {}
#Load coefficient yaml card
yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml'
with open(yaml_file) as f:
coeff_config = yaml.safe_load(f)
coeff_org = {}
coeff_org_fixed = {}
coeff_list = []
for group in coeff_config.keys():
if group not in coeff_org:
coeff_org[group]=[]
coeff_org_fixed[group]=[]
for c in coeff_config[group]:
if c=='OPff':
c='Off'
if c not in coeff_list and\
np.any([c in self.config[k]['coefficients'] for k in self.fits]) and\
np.any([self.config[k]['coefficients'][c]['fixed'] is False for k in self.fits if c in self.config[k]['coefficients']]):
coeff_org[group].append(c)
coeff_list.append(c)
# Read data and SM theory files
datalines={}
systypes={}
total_error={}
for k in self.fits:
total_error[k]=[]
for dataset in self.dataset[k].ExpNames:
datalines[dataset], systypes[dataset] = self.read_dataset(dataset)
# Gather data from data tables
x_vals = []
central_data = []
stat_error = []
sys_errors = []
for line in datalines[dataset]:
ncols = len(line.split())
if ncols==3:
ndata = int(line.split()[-1])
if ncols>3:
x_vals.append(float(line.split()[2]))
central_data.append(float(line.split()[5]))
stat_error.append(float(line.split()[6]))
sys_errors.append([float(line.split()[i]) for i in range(7,ncols)])
add_corr_index=[]
mult_corr_index=[]
add_uncorr_index=[]
mult_uncorr_index=[]
for i in range(1,len(systypes[dataset])):
if systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]=='UNCORR':
add_uncorr_index.append(int(systypes[dataset][i].split()[0]))
elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]=='UNCORR':
mult_uncorr_index.append(int(systypes[dataset][i].split()[0]))
elif systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]!='UNCORR':
add_corr_index.append(int(systypes[dataset][i].split()[0]))
elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]!='UNCORR':
mult_corr_index.append(int(systypes[dataset][i].split()[0]))
add_uncorr_index = 2*np.array(add_uncorr_index)-2
mult_uncorr_index = 2*np.array(mult_uncorr_index)-1
add_corr_index = 2*np.array(add_corr_index)-2
mult_corr_index = 2*np.array(mult_corr_index)-1
# corr_index = np.concatenate([add_corr_index,mult_corr_index],0)
# corr_index_sorted = np.argsort(corr_index)
# Compute total uncorrelated uncertainty
for i in range(ndata):
err=0
err += stat_error[i]**2
if len(add_uncorr_index)>0:
add_uncorr_error = np.array(sys_errors)[i][add_uncorr_index]
err += np.sum(add_uncorr_error**2)
if len(mult_uncorr_index)>0:
mult_uncorr_error = np.array(sys_errors)[i][mult_uncorr_index]*np.array(central_data)[i]/100.
err += np.sum(mult_uncorr_error**2)
if len(add_corr_index)>0:
add_corr_error = np.array(sys_errors)[i][add_corr_index]
err += np.sum(add_corr_error**2)
if len(mult_corr_index)>0:
mult_corr_error = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100.
err += np.sum(mult_corr_error**2)
total_error[k].append(np.sqrt(err))
total_error[k] = np.array(total_error[k])
for k in self.fits:
theory_reps[k]={}
data_exp[k]={}
corrections_val[k]={}
corrections_dict[k]={}
HOcorrections_val[k]={}
HOcorrections_dict[k]={}
# Construct theory arrays per experiment
cnt=0
for i in range(len(self.dataset[k].ExpNames)):
set_name=self.dataset[k].ExpNames[i]
ndat=self.dataset[k].NdataExp[i]
if set_name not in corrections_val[k]:
corrections_val[k][set_name] = self.dataset[k].CorrectionsVAL[cnt:cnt+ndat]
if set_name not in HOcorrections_val[k]:
HOcorrections_val[k][set_name] = self.dataset[k].HOcorrectionsVAL[cnt:cnt+ndat]
if set_name not in corrections_dict[k]:
corrections_dict[k][set_name] = self.dataset[k].CorrectionsDICT
if set_name not in HOcorrections_dict[k]:
HOcorrections_dict[k][set_name] = self.dataset[k].HOcorrectionsDICT
data_exp[k][set_name] = self.dataset[k].Commondata[cnt:cnt+ndat]
cnt+=ndat
X = {}
new_coeff_dict = {}
# Construct theory matrix used in PCA
for k in self.fits:
fixed_params=[]
for j,c in enumerate(self.dataset[k].CorrectionsDICT):
if self.config[k]['coefficients'][c]['fixed'] is False:
idx = np.where(self.dataset[k].CorrectionsDICT==c)[0][0]
self.dataset[k].CorrectionsVAL.T[idx]/=total_error[k]
continue
elif self.config[k]['coefficients'][c]['fixed'] is not True:
if isinstance(self.config[k]['coefficients'][c]['value'],Iterable):
for m,coeff in enumerate(self.config[k]['coefficients'][c]['fixed']):
idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0]
corr = float(self.config[k]['coefficients'][c]['value'][m])
self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr/total_error[k]
else:
coeff = self.config[k]['coefficients'][c]['fixed']
idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0]
corr = float(self.config[k]['coefficients'][c]['value'])
self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr/total_error[k]
elif self.config[k]['coefficients'][c]['fixed'] is True:
corr = float(self.config[k]['coefficients'][c]['value'])
self.dataset[k].CorrectionsVAL.T[j]*=corr/total_error[k]
fixed_params.append(j)
if 'OW' in self.dataset[k].CorrectionsDICT and 'OB' in self.dataset[k].CorrectionsDICT:
OW_idx = np.where(self.dataset[k].CorrectionsDICT=='OW')[0][0]
OB_idx = np.where(self.dataset[k].CorrectionsDICT=='OB')[0][0]
OW_vals = self.dataset[k].CorrectionsVAL.T[OW_idx]
OB_vals = self.dataset[k].CorrectionsVAL.T[OB_idx]
self.dataset[k].CorrectionsVAL.T[OW_idx] = -(1./.088625)*OW_vals
self.dataset[k].CorrectionsVAL.T[OB_idx] = (1./.3545)*OB_vals - (1./.088625)*(.1626)*(1./.3545)*OW_vals
X[k] = np.delete(self.dataset[k].CorrectionsVAL.T,[j for j in fixed_params],0).T
new_coeff_dict[k] = np.delete(self.dataset[k].CorrectionsDICT,[j for j in fixed_params],0)
coeffs_kept = {}
pc_vals = {}
s_vals = {}
SVs = {}
pc_matrix = {}
# Decompose matrix with SVD and identify PCs
for k in self.fits:
npar = len(new_coeff_dict[k])
pc_matrix[k] = np.zeros((npar,npar))
coeffs_kept[k]={}
pc_vals[k]={}
s_vals[k]=[]
SVs[k] = {}
X_centered = X[k] - X[k].mean(axis=0)
_, W, Vt = np.linalg.svd(X_centered)
for i in range(len(Vt)):
name = r'${\rm PC\ }'+str(i+1)+r'$'
pc = Vt.T[:,i]
SV = W[i]
s_vals[k].append(SV)
SVs[k][name]=SV
coeffs_kept[k][i+1]=[]
pc_vals[k][i+1]=[]
pc_sorted = np.argsort(-np.abs(pc))
for j in pc_sorted:
pc_matrix[k][i][j] = pc[j]**2
if (pc[j]**2)>0.05:
if new_coeff_dict[k][j]=='OW':
coeffs_kept[k][i+1].append('OpWB')
elif new_coeff_dict[k][j]=='OB':
coeffs_kept[k][i+1].append('OpD')
else:
coeffs_kept[k][i+1].append(new_coeff_dict[k][j])
pc_vals[k][i+1].append(pc[j])
# Writing PCA file
# ================ PCA latex file =====================
L = latex_packages()
L.append(r"\usepackage{underscore}")
L.append(r"\allowdisplaybreaks")
L.append(r"\begin{document}")
for k in self.fits:
if '_NHO' in k:
fit = k
else:
fit = self.fits[0]
# PCA Table
L.append(r"\begin{align}")
for i in range(len(pc_vals[fit])):
L.append(r" &{\underline {\bf {\color{red} %0.2e}}}: "%s_vals[fit][i])
if pc_vals[fit][i+1][0]<0:
factor=-1.
if pc_vals[fit][i+1][0]>0:
factor=1.
for j in range(len(pc_vals[fit][i+1])):
if np.abs(pc_vals[fit][i+1][j]) > 0.9:
clr = r"blue"
else:
clr = r"black"
if j==0 and j!=len(pc_vals[fit][i+1])-1:
L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} + "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c')))
elif j==0 and j==len(pc_vals[fit][i+1])-1:
L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c')))
elif j!=len(pc_vals[fit][i+1])-1 and (j+1)%7==0:
L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} + \nonumber \\ \nonumber \\"%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c')))
elif j!=len(pc_vals[fit][i+1])-1 and j%7==0:
L.append(r"&{\color{"+clr+r"} %0.3f} * {\rm %s} + "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c')))
elif j==len(pc_vals[fit][i+1])-1 and j%7==0:
L.append(r"&{\color{"+clr+r"} %0.3f} * {\rm %s}"%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c')))
elif j==len(pc_vals[fit][i+1])-1 and j%7!=0:
L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s}"%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c')))
elif j!=len(pc_vals[fit][i+1])-1:
L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} + "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c')))
if i!=len(pc_vals[fit])-1:
L.append(r" \nonumber \\ \nonumber \\ ")
else:
L.append(r" \nonumber")
L.append(r"\end{align}")
self.run_pdflatex(L, 'pca')
# Bar Plot of Singular Values
nrows,ncols=1,1
py.figure(figsize=(nrows*10,ncols*5))
py.bar(range(len(SVs[fit])), list(SVs[fit].values()), align='center')
py.xticks(range(len(SVs[fit])), list(SVs[fit].keys()),fontsize=15)
py.xticks(rotation=90)
py.tick_params(axis='y',direction='in',labelsize=15)
py.yscale('log')
py.ylim(1e-3,1e5)
py.ylabel(r'${\rm Singular\ Values}$',fontsize=20)
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/PCA_SVs.pdf')
# Heat Map of PC coefficients
fig = py.figure(figsize=(10,10))
ax = fig.add_subplot(111)
colors = py.rcParams['axes.prop_cycle'].by_key()['color']
cmap = matplotlib.colors.ListedColormap(['lightgrey',colors[3], colors[2], colors[1], colors[0]])
bounds = [0, 0.10, 0.25, 0.50, 0.75, 1.0]
norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
cax = ax.matshow(pc_matrix[fit], vmin=0, vmax=1, cmap=cmap, norm=norm)
divider = make_axes_locatable(ax)
cax1 = divider.append_axes("right", size="5%", pad=0.1)
cbar = fig.colorbar(cax, cax=cax1)
cbar.set_label(r'${\rm a}_i^2$',fontsize=25,labelpad=30,rotation=270)
cbar.ax.tick_params(labelsize=15)
for i in range(len(pc_matrix[fit])):
for j in range(len(pc_matrix[fit][i])):
c = pc_matrix[fit][i][j]
if c > 0.1:
ax.text(j, i, str(np.round(c,1)), va='center', ha='center',fontsize=10)
ticks = np.arange(0,len(pc_matrix[fit]),1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
coeff_list = [c.replace('O','c') for c in new_coeff_dict[fit]]
temp_list = ['cpD' if c=='cB' else c for c in coeff_list]
new_coeff_list = ['cpWB' if c=='cW' else c for c in temp_list]
ax.set_xticklabels([r'${\rm '+coeff+r'}$' for coeff in new_coeff_list])
ax.set_yticklabels(list(SVs[fit].keys()))
ax.tick_params(axis='x',rotation=90,labelsize=15)
ax.tick_params(axis='y',labelsize=15)
ax.tick_params(which='major',labeltop=True,top=True,labelbottom=False,bottom=False)
py.tight_layout()
py.savefig('reports/%s/PCA_PCs.pdf'%(self.outputname))
# Combine plots in tex file
# ================ Coefficients latex file =====================
L = latex_packages()
L.append(r"\begin{document}")
self.combine_plots(L, 'pca_plots', 'PCA_')
[docs] def plot_chi2(self):
"""
Plots a bar plot of the chi2 values per experiment,
and also the distribution of chi2 values.
"""
# Plot
nrows,ncols=1,1
py.figure(figsize=(nrows*10,ncols*5))
nexp = len(self.data_list)
chi2_bar = {}
chi2_bar[r'${\rm SM}$'] = np.zeros(nexp)
chi2_hist = {}
labels = []
for k in self.fits:
name=self.fit_labels[k]
chi2_bar[name]=np.zeros(nexp)
cnt=0
for exp in self.data_list:
if exp in self.chi2[k]:
chi2_bar[name][cnt] = self.chi2[k][exp]/self.ndat[k][exp]
if exp in self.chi2_sm[k]:
chi2_bar[r'${\rm SM}$'][cnt] = self.chi2_sm[k][exp]/self.ndat[k][exp]
if exp.replace('_','\_') not in labels:
labels.append(exp.replace('_','\_'))
cnt+=1
for k in self.fits:
name=self.fit_labels[k]
chi2_hist[name] = self.chi2_total_rep[k]/self.ndat_total[k]
# Chi2 bar plot per experiment
df = pd.DataFrame.from_dict(chi2_bar,orient='index',columns=labels)
new_df = df.T
ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5))
py.plot(np.linspace(-1,nexp*10,2),np.ones(2),'k--',alpha=0.7)
py.xticks(rotation=90)
py.tick_params(axis='y',direction='in',labelsize=15)
py.ylabel(r'$\chi^2 {\rm value}$',fontsize=11)
py.ylim(0,4)
py.legend(loc=2,frameon=False,prop={'size':11})
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/Chi2_Bar.pdf')
# Chi2 distribution
py.figure(figsize=(7,5))
ax = py.subplot(111)
colors = py.rcParams['axes.prop_cycle'].by_key()['color']
i=0
for name in chi2_hist:
ax.hist(chi2_hist[name],bins='fd',density=True,color=colors[i],edgecolor='k',alpha=0.3,label=name)
i+=1
py.tick_params(axis='x',direction='in',labelsize=15)
py.tick_params(axis='y',direction='in',labelsize=15)
py.xlabel(r'$\chi^2 {\rm value}$',fontsize=11)
py.legend(loc=2,frameon=False,prop={'size':11})
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/Chi2_Hist.pdf')
# Combine plots in tex file
# ================ Coefficients latex file =====================
L = latex_packages()
L.append(r"\begin{document}")
self.combine_plots(L, 'chi2_plots', 'Chi2_')
[docs] def plot_coefficients(self):
"""
Plots central values + 95% CL errors, 95% CL bounds,
probability distributions, residuals,
residual distribution, and energy reach.
Also writes a table displaying values for
68% CL bounds and central value + 95% errors.
Takes into account parameter constraints and displays
all non-zero parameters.
Uses coeff_groups.yaml YAML file to organize parameters
in a meaningful way.
Note: coefficients that are known to have disjoint
probability distributions (i.e. multiple solutions)
are manually separated by including the coefficient name
in disjointed_list for disjointed_list2
for global and single fit results, respectively.
Note: Probability distributions for single parameter
fits are included for all coefficients EXCEPT those
constrained as a linear combination of two or more parameters
(since they have different numbers of posterior samples)
"""
#Load coefficient yaml card
yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml'
with open(yaml_file) as f:
coeff_config = yaml.safe_load(f)
# SMEFiT logo
# TODO: fix this
logo = py.imread(f'{self.path}/smefit/analyze/logo.png')
coeff_list = []
for group in coeff_config.keys():
for c in coeff_config[group]:
if c=='OPff':
c='Off'
if c in ['OW','OB']:
continue
if np.any(['SNS_c' in k for k in self.fits])\
and np.any([self.config[k]['coefficients'][c]['fixed'] is not False for k in self.fits if c in self.config[k]['coefficients']]):
continue
if c not in coeff_list and\
np.any([c in self.config[k]['coefficients'] for k in self.fits]) and\
np.any([self.config[k]['coefficients'][c]['fixed'] is not True for k in self.fits if c in self.config[k]['coefficients']]):
coeff_list.append(c)
npar=len(coeff_list)
labels=coeff_list
disjointed_list = ['Otap','Obp']
disjointed_list2 = ['Otap','Obp','Opd','OpB','OpW']
if any(['TOP19' in k for k in self.fits]):
disjointed_list.append('Otp')
# Plot central value + 95% CL errors
nrows,ncols=1,1
py.figure(figsize=(nrows*10,ncols*5))
ax=py.subplot(111)
# X-axis
X=np.array([2*i for i in range(npar)])
# Spacing between fit results
val = np.linspace(-0.1*len(self.coeffs),0.1*len(self.coeffs),len(self.coeffs))
i=0
colors = py.rcParams['axes.prop_cycle'].by_key()['color']
lower_bound68={}
upper_bound68={}
lower_bound95={}
upper_bound95={}
mean_coeff = {}
error68={}
error95={}
error95_size={}
second_err={}
energy68={}
energy95={}
residual68={}
residual95={}
for k in self.fits:
name=self.fit_labels[k]#r'${\rm %s}$'%k.replace('_','\_')
lower_bound68[k] = np.zeros(npar)
upper_bound68[k] = np.zeros(npar)
lower_bound95[k] = np.zeros(npar)
upper_bound95[k] = np.zeros(npar)
error68[name]=np.zeros(npar)
error95[name]=np.zeros(npar)
error95_size[name]=np.zeros(npar)
second_err[name]=np.zeros(npar)
lower_bound68[k+'(2)'] = np.zeros(npar)
upper_bound68[k+'(2)'] = np.zeros(npar)
lower_bound95[k+'(2)'] = np.zeros(npar)
upper_bound95[k+'(2)'] = np.zeros(npar)
energy68[name]=np.zeros(npar)
energy95[name]=np.zeros(npar)
residual68[name]=np.zeros(npar)
residual95[name]=np.zeros(npar)
mean_coeff[k]=np.zeros(npar)
mean_coeff[k+'(2)']=np.zeros(npar)
cnt=0
for l in coeff_list:
if l in self.full_labels[k]:
idx=np.where(np.array(self.full_labels[k])==l)[0][0]
else:
cnt+=1
continue
# Compute 95% CL and central value
if 'SNS_' in k:
if self.config[k]['coefficients'][l]['fixed'] is not False and self.config[k]['coefficients'][l]['fixed'] is not True:
if isinstance(self.config[k]['coefficients'][l]['value'],Iterable):
mid=0
error=0
error2=0
for j in range(len(self.config[k]['coefficients'][l]['value'])):
coeff = self.config[k]['coefficients'][l]['fixed'][j]
coeff_idx = np.where(np.array(self.labels[k])==coeff)[0][0]
temp = np.array(self.coeffs[k][coeff_idx])
low = np.nanpercentile(temp,16)
high = np.nanpercentile(temp,84)
low2 = np.nanpercentile(temp,2.5)
high2 = np.nanpercentile(temp,97.5)
mid += (np.mean(temp,axis=0)*self.config[k]['coefficients'][l]['value'][j])/len(self.config[k]['coefficients'][l]['value'])
error += ((high-low)/2.)**2
error2 += ((high2-low2)/2.)**2
error = np.sqrt(error)
error2 = np.sqrt(error2)
else:
coeff = self.config[k]['coefficients'][l]['fixed']
coeff_idx = np.where(np.array(self.labels[k])==coeff)[0][0]
temp = np.array(self.coeffs[k][coeff_idx])*self.config[k]['coefficients'][l]['value']
low = np.nanpercentile(temp,16)
high = np.nanpercentile(temp,84)
low2 = np.nanpercentile(temp,2.5)
high2 = np.nanpercentile(temp,97.5)
mid = np.mean(temp,axis=0)
error = (high-low)/2.
error2 = (high2-low2)/2.
else:
if '_HO' in k and l in disjointed_list2:
idx2 = np.where(np.array(self.labels[k])==l)[0][0]
min_val = min(self.coeffs[k][idx2])
max_val = max(self.coeffs[k][idx2])
mid = (max_val+min_val)/2.
full_solution = np.array(self.coeffs[k][idx2])
if l=='Obp' or l=='Opd':
solution1 = full_solution[full_solution>mid]
solution2 = full_solution[full_solution<mid]
else:
solution1 = full_solution[full_solution<mid]
solution2 = full_solution[full_solution>mid]
# First solution
low = np.nanpercentile(solution1,16)
high = np.nanpercentile(solution1,84)
low2 = np.nanpercentile(solution1,2.5)
high2 = np.nanpercentile(solution1,97.5)
mid = np.mean(solution1,axis=0)
error = (high-low)/2.
error2 = (high2-low2)/2.
# Second solution
low_2 = np.nanpercentile(solution2,16)
high_2 = np.nanpercentile(solution2,84)
low2_2 = np.nanpercentile(solution2,2.5)
high2_2 = np.nanpercentile(solution2,97.5)
mid2 = np.mean(solution2,axis=0)
error+=(high_2-low_2)/2.
error2+=(high2_2-low2_2)/2.
second_err[name][cnt] = (high2_2-low2_2)/2.
lower_bound68[k+'(2)'][cnt] = low_2
upper_bound68[k+'(2)'][cnt] = high_2
lower_bound95[k+'(2)'][cnt] = low2_2
upper_bound95[k+'(2)'][cnt] = high2_2
mean_coeff[k+'(2)'][cnt] = mid2
else:
idx2 = np.where(np.array(self.labels[k])==l)[0][0]
low = np.nanpercentile(np.array(self.coeffs[k][idx2]),16)
high = np.nanpercentile(np.array(self.coeffs[k][idx2]),84)
low2 = np.nanpercentile(np.array(self.coeffs[k][idx2]),2.5)
high2 = np.nanpercentile(np.array(self.coeffs[k][idx2]),97.5)
mid = np.mean(np.array(self.coeffs[k][idx2]),axis=0)
error = (high-low)/2.
error2 = (high2-low2)/2.
else:
if '_HO' in k and l in disjointed_list:
min_val = min(np.transpose(np.array(self.full_coeffs[k]))[idx])
max_val = max(np.transpose(np.array(self.full_coeffs[k]))[idx])
mid = (max_val+min_val)/2.
full_solution = np.transpose(np.array(self.full_coeffs[k]))[idx]
if l=='Obp' or l=='Opd':
solution1 = full_solution[full_solution>mid]
solution2 = full_solution[full_solution<mid]
else:
solution1 = full_solution[full_solution<mid]
solution2 = full_solution[full_solution>mid]
# First solution
low = np.nanpercentile(solution1,16)
high = np.nanpercentile(solution1,84)
low2 = np.nanpercentile(solution1,2.5)
high2 = np.nanpercentile(solution1,97.5)
mid = np.mean(solution1,axis=0)
error = (high-low)/2.
error2 = (high2-low2)/2.
# Second solution
low_2 = np.nanpercentile(solution2,16)
high_2 = np.nanpercentile(solution2,84)
low2_2 = np.nanpercentile(solution2,2.5)
high2_2 = np.nanpercentile(solution2,97.5)
mid2 = np.mean(solution2,axis=0)
error+=(high_2-low_2)/2.
error2+=(high2_2-low2_2)/2.
second_err[name][cnt]=(high2_2-low2_2)/2.
lower_bound68[k+'(2)'][cnt] = low_2
upper_bound68[k+'(2)'][cnt] = high_2
lower_bound95[k+'(2)'][cnt] = low2_2
upper_bound95[k+'(2)'][cnt] = high2_2
mean_coeff[k+'(2)'][cnt] = mid2
else:
low = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],16)
high = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],84)
low2 = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],2.5)
high2 = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],97.5)
mid = np.mean(np.transpose(np.array(self.full_coeffs[k]))[idx],axis=0)
error = (high-low)/2.
error2 = (high2-low2)/2.
lower_bound68[k][cnt] = low
upper_bound68[k][cnt] = high
lower_bound95[k][cnt] = low2
upper_bound95[k][cnt] = high2
error68[name][cnt]=error
error95[name][cnt]=error2
error95_size[name][cnt] = np.abs(high2-low2)
energy68[name][cnt]=1./np.sqrt(error)
energy95[name][cnt]=1./np.sqrt(error2)
residual68[name][cnt] = mid/error68[name][cnt]
residual95[name][cnt] = mid/error95[name][cnt]
mean_coeff[k][cnt] = mid
err = np.array([[mid-low2,high2-mid]]).T
# Plot central value + 95% CL asymmetric
if cnt==0:
ax.errorbar(X[cnt]+val[i],y=mid,yerr=err,color=colors[i],fmt='.',elinewidth=2,label=name)
if '_HO' in k and (l in disjointed_list or l in disjointed_list2) and second_err[name][cnt]!=0:
s_err = np.array([[mid2-lower_bound95[k+'(2)'][cnt], upper_bound95[k+'(2)'][cnt]-mid2]]).T
ax.errorbar(X[cnt]+val[i]+0.5,y=mid2,yerr=s_err,color=colors[i],\
fmt='.',elinewidth=2)
elif cnt == 6 and 'HIGGS' in k:
ax.errorbar(X[cnt]+val[i],y=mid,yerr=err,color=colors[i],fmt='.',elinewidth=2,label=name)
if '_HO' in k and (l in disjointed_list or l in disjointed_list2) and second_err[name][cnt]!=0:
s_err = np.array([[mid2-lower_bound95[k+'(2)'][cnt], upper_bound95[k+'(2)'][cnt]-mid2]]).T
ax.errorbar(X[cnt]+val[i]+0.5,y=mid2,yerr=s_err,color=colors[i],\
fmt='.',elinewidth=2)
else:
ax.errorbar(X[cnt]+val[i],y=mid,yerr=err,color=colors[i],fmt='.',elinewidth=2)
if '_HO' in k and (l in disjointed_list or l in disjointed_list2) and second_err[name][cnt]!=0:
s_err = np.array([[mid2-lower_bound95[k+'(2)'][cnt], upper_bound95[k+'(2)'][cnt]-mid2]]).T
ax.errorbar(X[cnt]+val[i]+0.5,y=mid2,yerr=s_err,color=colors[i],fmt='.',elinewidth=2)
cnt+=1
i+=1
ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.775,.975,.05,.2],zorder=-1, alpha=1)
py.plot([i for i in range(-1,200)],np.zeros(201),'k--',alpha=0.7)
py.xlim(-1,(npar)*2-1)
py.yscale('symlog',linthreshy=1e-1)
py.ylim(-400,400)
py.yticks([-100,-10,-1,-0.1,0,0.1,1,10,100],[r'$-100$',r'$-10$',r'$-1$',r'$-0.1$',r'$0$',r'$0.1$',r'$1$',r'$10$',r'$100$'])
py.ylabel(r'$c_i/\Lambda^2\ ({\rm TeV}^{-2})$',fontsize=18)
py.tick_params(which='major',direction='in',labelsize=13)
my_xticks = [c.replace('O','c') for c in coeff_list]
py.xticks(X,my_xticks,rotation=90)
py.legend(loc=0,frameon=False,prop={'size':11})
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Central.pdf', dpi=1000)
# Plot 95% CLs for coefficients (bar plot)
nrows,ncols=1,1
py.figure(figsize=(10,7))
df = pd.DataFrame.from_dict(error95_size,orient='index',columns=[c.replace('O','c') for c in coeff_list])
new_df = df.T
ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5))
ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.775,.975,.825,.975],zorder=-1, alpha=1)
#Hard cutoff
py.plot(np.linspace(-1,2*npar+1,2),400*np.ones(2),'k--',alpha=0.7,lw=2)
py.xticks(rotation=90)
py.tick_params(axis='y',direction='in',labelsize=15)
py.yscale('log')
py.ylabel(r'${\rm Magnitude\ of\ 95\%\ Confidence\ Level\ Bounds}\ (1/{\rm TeV}^2)$',fontsize=11)
py.ylim(1e-3,5e3)
py.legend(loc=2,frameon=False,prop={'size':11})
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Bar.pdf', dpi=1000)
# Plot posteriors (histograms)
if any(['GLOBAL_' in k for k in self.fits]):
gs=int(np.sqrt(npar))
else:
gs=int(np.sqrt(npar))+1
nrows,ncols=gs,gs
fig = py.figure(figsize=(nrows*4,ncols*3))
redundant_coeffs = ['Opl2','Opl3','O3pl2','O3pl3','Opmu','Opta','Opdi']
heights={}
total_cnt=[]
clr_cnt=0
print_labels=True
for k in self.fits:
heights[k]={}
if any(['GLOBAL_' in k for k in self.fits]):
cnt=8
else:
cnt=1
for l in coeff_list:
if l in redundant_coeffs:
continue
if l in self.full_labels[k]:
idx=np.where(np.array(self.full_labels[k])==l)[0][0]
else:
cnt+=1
continue
ax = py.subplot(ncols,nrows,cnt)
if 'SNS_' in k:
if self.config[k]['coefficients'][l]['fixed'] is not False and self.config[k]['coefficients'][l]['fixed'] is not True:
if isinstance(self.config[k]['coefficients'][l]['value'],Iterable):
# can't plot them here since the number of replicas is not the same for the 2 different dofs
cnt+=1
continue
else:
coeff = self.config[k]['coefficients'][l]['fixed']
coeff_idx = np.where(np.array(self.labels[k])==coeff)[0][0]
values = np.array(self.coeffs[k][coeff_idx])*self.config[k]['coefficients'][l]['value']
else:
if '_HO' in k and l in disjointed_list2:
idx2 = np.where(np.array(self.labels[k])==l)[0][0]
min_val = min(self.coeffs[k][idx2])
max_val = max(self.coeffs[k][idx2])
mid = (max_val+min_val)/2.
full_solution = np.array(self.coeffs[k][idx2])
if l=='Obp' or l=='Opd':
solution1 = full_solution[full_solution>mid]
solution2 = full_solution[full_solution<mid]
else:
solution1 = full_solution[full_solution<mid]
solution2 = full_solution[full_solution>mid]
values=solution1
ax.hist(solution2,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3)
else:
idx2 = np.where(np.array(self.labels[k])==l)[0][0]
values = np.array(self.coeffs[k][idx2])
ax.hist(values,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3)
lbl = r'${\rm {\bf '+l.replace('O','c')+'}}$'
if print_labels and l not in ['Opl1','O3pl1','Ope','Opui']:
ax.text(0.05,0.85,lbl,transform=ax.transAxes,fontsize=20)
elif print_labels and l=='Opl1':
ax.text(0.05,0.85,r'${\rm \bf{cpl1}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{cpl2}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.55,r'${\rm \bf{cpl3}}$',transform=ax.transAxes,fontsize=20)
elif print_labels and l=='O3pl1':
ax.text(0.05,0.85,r'${\rm \bf{c3pl1}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{c3pl2}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.55,r'${\rm \bf{c3pl3}}$',transform=ax.transAxes,fontsize=20)
elif print_labels and l=='Ope':
ax.text(0.05,0.85,r'${\rm \bf{cpe}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{cpmu}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.55,r'${\rm \bf{cpta}}$',transform=ax.transAxes,fontsize=20)
elif print_labels and l=='Opui':
ax.text(0.05,0.85,r'${\rm \bf{cpui}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{-2cpdi}}$',transform=ax.transAxes,fontsize=20)
else:
if '_HO' in k and l in disjointed_list:
min_val = min(np.transpose(np.array(self.full_coeffs[k]))[idx])
max_val = max(np.transpose(np.array(self.full_coeffs[k]))[idx])
mid = (max_val+min_val)/2.
full_solution = np.transpose(np.array(self.full_coeffs[k]))[idx]
if l=='Obp' or l=='Opd':
solution1 = full_solution[full_solution>mid]
solution2 = full_solution[full_solution<mid]
else:
solution1 = full_solution[full_solution<mid]
solution2 = full_solution[full_solution>mid]
heights[k][cnt]=ax.hist(solution1,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3)
ax.hist(solution2,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3)
else:
heights[k][cnt]=ax.hist(np.transpose(self.full_coeffs[k])[idx],bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3,\
label=self.fit_labels[k])
lbl = r'${\rm {\bf '+l.replace('O','c')+'}}$'
if print_labels and l not in ['Opl1','O3pl1','Ope','Opui']:
ax.text(0.05,0.85,lbl,transform=ax.transAxes,fontsize=20)
elif print_labels and l=='Opl1':
ax.text(0.05,0.85,r'${\rm \bf{cpl1}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{cpl2}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.55,r'${\rm \bf{cpl3}}$',transform=ax.transAxes,fontsize=20)
elif print_labels and l=='O3pl1':
ax.text(0.05,0.85,r'${\rm \bf{c3pl1}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{c3pl2}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.55,r'${\rm \bf{c3pl3}}$',transform=ax.transAxes,fontsize=20)
elif print_labels and l=='Ope':
ax.text(0.05,0.85,r'${\rm \bf{cpe}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{cpmu}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.55,r'${\rm \bf{cpta}}$',transform=ax.transAxes,fontsize=20)
elif print_labels and l=='Opui':
ax.text(0.05,0.85,r'${\rm \bf{cpui}}$',transform=ax.transAxes,fontsize=20)
ax.text(0.05,0.7,r'${\rm \bf{-2cpdi}}$',transform=ax.transAxes,fontsize=20)
ax.tick_params(which='both',direction='in',labelsize=22.5)
ax.tick_params(labelleft=False)
if cnt not in total_cnt:
total_cnt.append(cnt)
cnt+=1
clr_cnt+=1
print_labels = False
for cnt in total_cnt:
ax = py.subplot(ncols,nrows,cnt)
if len(self.fits)>1:
fit1=self.fits[0]
fit2=self.fits[1]
if cnt in heights[fit1] and cnt in heights[fit2]:
max_height = max(max(heights[fit1][cnt][0]),max(heights[fit2][cnt][0]))
elif cnt in heights[fit1]:
max_height = max(heights[fit1][cnt][0])
elif cnt in heights[fit2]:
max_height = max(heights[fit2][cnt][0])
else:
max_height=0
else:
#max_height = max(heights[self.fits[0]][cnt][0])
max_height=0
if max_height!=0:
ax.set_ylim(0,1.2*max_height)
lines, labels = fig.axes[-2].get_legend_handles_labels()
if any(['GLOBAL_' in k for k in self.fits]):
fig.legend(lines, labels, loc = 'upper center',ncol=2,prop={'size':35},bbox_to_anchor=(0.5,0.92))
else:
fig.legend(lines, labels, loc = 'upper center',ncol=2,prop={'size':35},bbox_to_anchor=(0.5,0.05))
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Hist.pdf')
# Plot residuals at 68% CL (bar plot)
nrows,ncols=1,1
py.figure(figsize=(30,20))
df = pd.DataFrame.from_dict(residual68,orient='index',columns=[c.replace('O','c') for c in coeff_list])
new_df = df.T
ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5))
ax.plot([-1,npar+1],np.zeros(2),'k--',lw=2)
ax.plot([-1,npar+1],np.ones(2),'k--',lw=2,alpha=0.3)
ax.plot([-1,npar+1],-1.*np.ones(2),'k--',lw=2,alpha=0.3)
ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.775,.975,.825,.975],zorder=-1,alpha=1)
py.xticks(rotation=90)
py.tick_params(axis='y',direction='in',labelsize=15)
py.ylabel(r'${\rm Residuals\ (68\%)}$',fontsize=15)
py.ylim(-4,4)
py.legend(loc=2,frameon=False,prop={'size':11})
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Residuals.pdf', dpi=1000 )
# Plot residuals at 68% CL (histogram)
nrows,ncols=1,1
py.figure(figsize=(10,7))
ax = py.subplot(111)
i=0
for name in residual68:
ax.hist(residual68[name],bins='fd',density=True,color=colors[i],edgecolor='k',alpha=0.3,label=name)
i+=1
ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.675,.975,.825,.975],zorder=-1, alpha=1)
py.xticks(rotation=90)
py.tick_params(which='both',direction='in',labelsize=15)
py.ylabel(r'${\rm Normalized\ Yield}$',fontsize=15)
py.xlabel(r'${\rm Residuals\ (68\%)}$',fontsize=15)
py.ylim(0,1)
py.xlim(-3,3)
py.legend(loc=2,frameon=False,prop={'size':11})
py.tight_layout()
py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Residuals_Hist.pdf', dpi=1000)
# Plot 95% CL for energy reach (bar plot)
# nrows,ncols=1,1
# py.figure(figsize=(7,5))
# df = pd.DataFrame.from_dict(energy95,orient='index',columns=coeff_list)
# new_df = df.T
# ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5))
# py.xticks(rotation=90)
# py.tick_params(axis='y',direction='in',labelsize=15)
# py.ylabel(r'$\Lambda/\sqrt{c_i}\ ({\rm TeV})\ {\rm at\ the\ 95\%\ CL}$',fontsize=11)
# py.ylim(0,5)
# py.legend(loc=2,frameon=False,prop={'size':11})
# py.tight_layout()
# py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Energy.pdf')
# Combine plots in tex file
# ================ Coefficients latex file =====================
L = latex_packages()
L.append(r"\begin{document}")
# Write coefficients table
L.append('\n')
L.append('\n')
L.append(r"\begin{table}[H]")
L.append(r"\centering")
temp = r"\begin{tabularx}{\textwidth}{|c|c|"
for i in range(len(self.fits)):
temp += "X|X|"
temp+="}"
L.append(temp)
L.append(r"\hline")
temp=' & & '
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
temp += r" \multicolumn{2}{c|}{%s} & "%self.fit_labels[k]#k.replace('_','\_')
else:
temp += r" \multicolumn{2}{c|}{%s} \\ \hline"%self.fit_labels[k]#k.replace('_','\_')
L.append(temp)
temp = r'Class & Coefficients &'
for i,k in enumerate(self.fits):
if i!= len(self.fits)-1:
temp += r" 68\% CL Bounds & 95\% CL Bounds &"
else:
temp += r" 68\% CL Bounds & 95\% CL Bounds \\ \hline"
L.append(temp)
#Load coefficient yaml card
yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml'
with open(yaml_file) as f:
coeff_config = yaml.safe_load(f)
new_coeff_list = np.array([c.replace('O','c') for c in coeff_list])
# Build lists for grouped coefficients
coeffs_organized = {}
for group in coeff_config.keys():
coeffs_organized[group]=[]
for c in coeff_config[group]:
if c=='OPff':
c='Off'
if c not in coeffs_organized[group] and c.replace('O','c') in new_coeff_list:
coeffs_organized[group].append(c.replace('O','c'))
for group in coeffs_organized.keys():
if len(coeffs_organized[group])==0:
continue
L.append('\multirow{%d}{*}{%s}'%(len(coeffs_organized[group]),group))
for isub,coeff in enumerate(coeffs_organized[group]):
temp=''
temp+=' & '+coeff+" & "
for i,k in enumerate(self.fits):
if i != len(self.fits)-1:
if coeff in new_coeff_list:
idx = np.where(new_coeff_list==coeff)[0][0]
error = (upper_bound95[k][idx]-lower_bound95[k][idx])/2.
if 'O'+coeff[1:] in disjointed_list or 'O'+coeff[1:] in disjointed_list2 and lower_bound68[k+'(2)'][idx]!=0:
error2 = (upper_bound95[k+'(2)'][idx]-lower_bound95[k+'(2)'][idx])/2.
temp += r"[%0.3f,%0.3f]\newline [%0.3f,%0.3f] & %0.3f $\pm$ %0.3f\newline %0.3f $\pm$ %0.3f] & "%(lower_bound68[k][idx],\
upper_bound68[k][idx],\
lower_bound68[k+'(2)'][idx],\
upper_bound68[k+'(2)'][idx],\
mean_coeff[k][idx],\
error,\
mean_coeff[k+'(2)'][idx],\
error2)
else:
temp += r"[%0.3f,%0.3f] & %0.3f $\pm$ %0.3f & "%(lower_bound68[k][idx],upper_bound68[k][idx],\
mean_coeff[k][idx],error)
else:
temp += r" & & "
elif i == len(self.fits)-1 and isub!=len(coeffs_organized[group])-1:
if coeff in new_coeff_list:
idx = np.where(new_coeff_list==coeff)[0][0]
error = (upper_bound95[k][idx]-lower_bound95[k][idx])/2.
if 'O'+coeff[1:] in disjointed_list or 'O'+coeff[1:] in disjointed_list2 and lower_bound68[k+'(2)'][idx]!=0:
error2 = (upper_bound95[k+'(2)'][idx]-lower_bound95[k+'(2)'][idx])/2.
temp += r"[%0.3f,%0.3f]\newline [%0.3f,%0.3f] & %0.3f $\pm$ %0.3f\newline %0.3f $\pm$ %0.3f\\ \cline{2-%d}"%(lower_bound68[k][idx],\
upper_bound68[k][idx],\
lower_bound68[k+'(2)'][idx],\
upper_bound68[k+'(2)'][idx],\
mean_coeff[k][idx],\
error,\
mean_coeff[k+'(2)'][idx],\
error2,\
2+2*len(self.fits))
else:
temp += r"[%0.3f,%0.3f] & %0.3f $\pm$ %0.3f \\ \cline{2-%d}"%(lower_bound68[k][idx],upper_bound68[k][idx],\
mean_coeff[k][idx],error,2+2*len(self.fits))
else:
temp += r" & \\ \cline{2-%d}"%(2+2*len(self.fits))
elif i==len(self.fits)-1 and isub==len(coeffs_organized[group])-1:
if coeff in new_coeff_list:
idx = np.where(new_coeff_list==coeff)[0][0]
error = (upper_bound95[k][idx]-lower_bound95[k][idx])/2.
if 'O'+coeff[1:] in disjointed_list or 'O'+coeff[1:] in disjointed_list2 and lower_bound68[k+'(2)'][idx]!=0:
error2 = (upper_bound95[k+'(2)'][idx]-lower_bound95[k+'(2)'][idx])/2.
temp += r"[%0.3f,%0.3f]\newline [%0.3f,%0.3f] & %0.3f $\pm$ %0.3f\newline %0.3f $\pm$ %0.3f\\ \hline"%(lower_bound68[k][idx],\
upper_bound68[k][idx],\
lower_bound68[k+'(2)'][idx],\
upper_bound68[k+'(2)'][idx],\
mean_coeff[k][idx],\
error,\
mean_coeff[k+'(2)'][idx],\
error2)
else:
temp += r"[%0.3f,%0.3f] & %0.3f $\pm$ %0.3f \\ \hline"%(lower_bound68[k][idx],upper_bound68[k][idx],\
mean_coeff[k][idx],error)
else:
temp += r" & \\ \hline"
L.append(temp)
L.append(r"\end{tabularx}")
L.append(r"\caption{Coefficient comparison.}")
L.append(r"\end{table}")
L.append('\n')
L.append('\n')
self.combine_plots(L, 'coefficient_plots', 'Coeffs_')
[docs] def plot_ellipse(self):
"""
Plot the 2 standard deviation ellipse of two parameters
(used for 2 parameter fits)
"""
mu={}
lbls=[]
hndls=[]
p1={}
p2={}
coeff_min={}
coeff_max={}
_, ax = py.subplots(figsize=(6, 6))
colors = py.rcParams['axes.prop_cycle'].by_key()['color']
i=0
for k in self.fits:
if k not in mu:
mu[k]=[]
if '-NS_c' in k:
coeff_names = k.split('_')[1]
else:
coeff_names = self.fits[-2].split('_')[1]
coeff_names = coeff_names.split('c')[1:]
coeff1 = 'O'+coeff_names[0]
coeff2 = 'O'+coeff_names[1]
if coeff1 in self.full_labels[k]:
if 'SNS_' in k:
idx1=np.where(np.array(self.labels[k])==coeff1)[0][0]
coeff1_values = np.array(self.coeffs[k][idx1])
else:
idx1=np.where(np.array(self.full_labels[k])==coeff1)[0][0]
coeff1_values = np.array(self.full_coeffs[k]).T[idx1]
mu[k].append(np.mean(coeff1_values,axis=0))
if coeff2 in self.full_labels[k]:
idx2=np.where(np.array(self.full_labels[k])==coeff2)[0][0]
if 'SNS_' in k:
idx2=np.where(np.array(self.labels[k])==coeff2)[0][0]
coeff2_values = np.array(self.coeffs[k][idx2])
else:
idx2=np.where(np.array(self.full_labels[k])==coeff2)[0][0]
coeff2_values = np.array(self.full_coeffs[k]).T[idx2]
mu[k].append(np.mean(coeff2_values,axis=0))
else:
coeff_min[k] = np.nanpercentile(coeff1_values,2.5)
coeff_max[k] = np.nanpercentile(coeff1_values,97.5)
vertical=True
if coeff1 not in self.full_labels[k]:
coeff_min[k] = np.nanpercentile(coeff2_values,2.5)
coeff_max[k] = np.nanpercentile(coeff2_values,97.5)
horizontal=True
if 'SNS_' in k:
if vertical:
p2[k]=ax.axvspan(coeff_min[k],coeff_max[k],alpha=0.3,facecolor=colors[i],edgecolor=None)
p1[k]=ax.axvspan(coeff_min[k],coeff_max[k],facecolor=(0,0,0,0),edgecolor=colors[i])
vertical=False
elif horizontal:
p2[k]=ax.axhspan(coeff_min[k],coeff_max[k],alpha=0.3,facecolor=colors[i],edgecolor=None)
p1[k]=ax.axhspan(coeff_min[k],coeff_max[k],facecolor=(0,0,0,0),edgecolor=colors[i])
horizontal=False
else:
p2[k]=confidence_ellipse(coeff1_values, coeff2_values, ax,n_std=2,alpha=0.3, facecolor=colors[i], edgecolor=None)
p1[k]=confidence_ellipse(coeff1_values, coeff2_values, ax,n_std=2,alpha=1, edgecolor=colors[i])
ax.scatter(mu[k][0], mu[k][1], c=colors[i], s=3)
py.xlabel(r'${\rm '+coeff1.replace('O','c')+'}$',fontsize=20)
py.ylabel(r'${\rm '+coeff2.replace('O','c')+'}$',fontsize=20)
py.tick_params(which='both',direction='in',labelsize=15)
if (coeff1.replace('O','c') in k) and ("GLOBAL" in k):
lbls.append(r'${\rm All\ Data\ (2D)}$')
elif (coeff1.replace('O','c') not in k) and "GLOBAL" in k:
lbls.append(r'${\rm All\ Data\ (Marg)}$')
else:
lbls.append(r'${\rm '+k.split('_')[2]+'}$')
hndls.append((p1[k],p2[k]))
i+=1
ax.scatter(0, 0, c='k',marker='s',s=10,zorder=10)
py.legend(loc='best',handles=hndls,labels=lbls,frameon=False,prop={'size':18})
py.title(r'${\rm 95\%\ Confidence\ Level\ Bounds}$',fontsize=15)
py.tight_layout()
py.savefig(f'{self.path}/reports/{(self.outputname)}/Ellipse_{coeff1}_{coeff2}.pdf')
[docs] def plot_correlations(self):
"""
Computes and displays the correlation coefficients
between parameters in a heat map
"""
for k in self.fits:
coeff_list = self.full_labels[k]
param_data = pd.DataFrame(self.full_coeffs[k])
correlations = param_data.corr()
rows_to_keep = []
for i in range(len(correlations)):
for j in range(len(correlations[i])):
if (coeff_list[i]!='OpWB' and coeff_list[i]!='OpD')\
and self.config[k]['coefficients'][coeff_list[i]]['fixed'] is not False:
continue
if coeff_list[i]=='OW' or coeff_list[i]=='OB':
continue
if i!=j and (correlations[i][j]>0.5 or correlations[i][j]<-0.5):
if i not in rows_to_keep:
rows_to_keep.append(i)
correlations = np.array(correlations)[np.array(rows_to_keep),:]
correlations = np.array(correlations)[:,np.array(rows_to_keep)]
labels = np.array(self.full_labels[k])[np.array(rows_to_keep)]
npar=len(labels)
fig = py.figure(figsize=(10,10))
ax = fig.add_subplot(111)
colors = py.rcParams['axes.prop_cycle'].by_key()['color']
cmap = matplotlib.colors.ListedColormap([colors[0], 'lightgrey', colors[1]])
bounds = [-1.0, -0.5, 0.5, 1.0]
norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N)
cax = ax.matshow(correlations, vmin=-1, vmax=1, cmap=cmap, norm=norm)
divider = make_axes_locatable(ax)
cax1 = divider.append_axes("right", size="5%", pad=0.1)
fig.colorbar(cax, cax=cax1)
for i in range(npar):
for j in range(npar):
c = correlations[j,i]
if (c>0.5 or c<-0.5):
ax.text(i, j, str(np.round(c,1)), va='center', ha='center',fontsize=10)
#ax.text(i, j, str(np.round(c,1)), va='center', ha='center',fontsize=10)
names = [l.replace('O','c') for l in labels]
ticks = np.arange(0,npar,1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(names)
ax.set_yticklabels(names)
ax.tick_params(axis='x',rotation=90,labelsize=15)
ax.tick_params(axis='y',labelsize=15)
py.tight_layout()
py.savefig('reports/%s/Coeffs_Corr_%s.pdf'%(self.outputname,k))
# Combine plots in tex file
# ================ Correlations latex file =====================
L = latex_packages()
L.append(r"\begin{document}")
self.combine_plots(L, 'correlation_plots', 'Coeffs_Corr_', caption=True )
[docs] def plot_data_vs_theory(self):
"""
Plots data vs theory. Datasets to be plotted
should be written in the plot_settings.yaml YAML file.
There you should also specify the various plot settings.
Note: special treatment is given to plots 14
and 16 (W helicity data and combined ATLAS+CMS Run I
signal strength data, respectively) to get them
to display properly. If these are moved in plot_settings,
they must be moved here as well.
Note: In some cases (e.g. fits with mass cuts) this
function throws an error that has yet to be resolved.
"""
mean_theory_exp={}
std_theory_exp={}
for k in self.fits:
datalines={}
systypes={}
smtheory={}
mean_theory_exp[k]={}
std_theory_exp[k]={}
# Read data and SM theory files
data_list=[]
for dataset in self.config[k]['datasets']:
if dataset in data_list:
continue
datalines[dataset], systypes[dataset] = self.read_dataset(dataset)
data_list.append(dataset)
# Construct theory arrays per experiment
cnt=0
for i in range(len(self.dataset[k].ExpNames)):
set_name=self.dataset[k].ExpNames[i]
ndat=self.dataset[k].NdataExp[i]
mean_theory_exp[k][set_name] = self.mean_theory[k][cnt:cnt+ndat]
std_theory_exp[k][set_name] = self.std_theory[k][cnt:cnt+ndat]
if set_name not in smtheory:
smtheory[set_name]=self.sm_theory[k][cnt:cnt+ndat]
cnt+=ndat
# Load plots YAML file
yaml_file = f'{self.path}/smefit/analyze/plot_settings.yaml'
with open(yaml_file) as f:
plots = yaml.safe_load(f)
for p in plots.keys():
# Skip datasets that were not included in fit
if np.any([dataset not in data_list for dataset in plots[p]['datasets']]):
continue
# Set figure sizes
if 'figsize' in plots[p]:
py.figure(figsize=(plots[p]['figsize'][0],plots[p]['figsize'][1]))
else:
py.figure(figsize=(4,3))
xcnt=0
for dataname in plots[p]['datasets']:
# Gather data from data tables
x_vals = []
central_data = []
stat_error = []
sys_errors = []
for line in datalines[dataname]:
ncols = len(line.split())
if ncols==3:
ndata = int(line.split()[-1])
if ncols>3:
x_vals.append(float(line.split()[2]))
central_data.append(float(line.split()[5]))
stat_error.append(float(line.split()[6]))
sys_errors.append([float(line.split()[i]) for i in range(7,ncols)])
add_corr_index=[]
mult_corr_index=[]
add_uncorr_index=[]
mult_uncorr_index=[]
for i in range(1,len(systypes[dataname])):
if systypes[dataname][i].split()[1]=='ADD' and systypes[dataname][i].split()[-1]=='UNCORR':
add_uncorr_index.append(int(systypes[dataname][i].split()[0]))
elif systypes[dataname][i].split()[1]=='MULT' and systypes[dataname][i].split()[-1]=='UNCORR':
mult_uncorr_index.append(int(systypes[dataname][i].split()[0]))
elif systypes[dataname][i].split()[1]=='ADD' and systypes[dataname][i].split()[-1]!='UNCORR':
add_corr_index.append(int(systypes[dataname][i].split()[0]))
elif systypes[dataname][i].split()[1]=='MULT' and systypes[dataname][i].split()[-1]!='UNCORR':
mult_corr_index.append(int(systypes[dataname][i].split()[0]))
add_uncorr_index = 2*np.array(add_uncorr_index)-2
mult_uncorr_index = 2*np.array(mult_uncorr_index)-1
add_corr_index = 2*np.array(add_corr_index)-2
mult_corr_index = 2*np.array(mult_corr_index)-1
corr_index = np.concatenate([add_corr_index,mult_corr_index],0)
# corr_index_sorted = np.argsort(corr_index)
shift_data=True
# Compute total uncorrelated uncertainty
total_uncorr=[]
total_error=[]
for i in range(ndata):
stat_err = 0.
sys_err = 0.
corr_sys_err = 0.
stat_err += stat_error[i]**2
if len(add_uncorr_index)>0:
add_uncorr_error = np.array(sys_errors)[i][add_uncorr_index]
sys_err += np.sum(add_uncorr_error**2)
if len(mult_uncorr_index)>0:
mult_uncorr_error = np.array(sys_errors)[i][mult_uncorr_index]*np.array(central_data)[i]/100.
sys_err += np.sum(mult_uncorr_error**2)
if len(mult_corr_index)>0:
mult_corr_error = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100.
corr_sys_err+=np.sum(mult_corr_error**2)
if len(add_corr_index)>0:
add_corr_error = np.array(sys_errors)[i][add_corr_index]
corr_sys_err+=np.sum(add_corr_error**2)
# If no uncorrelated errors are specified,
# do not shift data
if (stat_err+sys_err)==0 and p!='plot16':
shift_data = False
total_uncorr.append(np.sqrt(stat_err+sys_err))
total_error.append(np.sqrt(stat_err+sys_err+corr_sys_err))
total_uncorr = np.array(total_uncorr)
total_error = np.array(total_error)
# Gather correlataed uncertainties if available
corr_errors=[]
for i in range(ndata):
if len(mult_corr_index)>0:
mult_errors = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100.
if len(add_corr_index)>0:
add_errors = np.array(sys_errors)[i][add_corr_index]
corr_errors.append(np.concatenate([mult_errors,add_errors],0))
corr_errors = np.array(corr_errors)
# Last two points in Run I Higgs signal strengths do not have correlated errors
# Temporarily removing the last two points to compute shifts
if p=='plot16':
corr_errors=corr_errors[:-2]
total_uncorr_temp = total_uncorr
total_uncorr=total_uncorr[:-2]
central_data_temp = central_data
central_data = central_data[:-2]
mean_theory_temp={}
for k in self.fits:
if dataname not in mean_theory_exp[k]:
continue
mean_theory_temp[k]=mean_theory_exp[k][dataname]
mean_theory_exp[k][dataname]=mean_theory_exp[k][dataname][:-2]
# Number of correlated errors
if len(corr_errors)>0:
ncorr=len(corr_errors[0])
else:
ncorr=0
# Compute shift to data from correlated errors if present
# In multi-fit comparison, shift is taken to be theory of last fit
shift=[]
for k in self.fits:
if dataname in mean_theory_exp[k]:
fit_for_shift = k
if shift_data and ncorr>0:
A = np.diag(np.diag(np.ones((ncorr,ncorr)))) + np.einsum('ik,il,i->kl',corr_errors,corr_errors,1./(total_uncorr**2))
B = np.einsum('ik,i,i->k',corr_errors,central_data-mean_theory_exp[fit_for_shift][dataname],1./(total_uncorr**2))
try:
r=np.einsum('kl,l->k',np.linalg.inv(A),B)
except:
r=np.zeros(len(corr_errors[0]))
shift = np.einsum('k,ik->i',r,corr_errors)
# Set up x-values, spacing between data and theory points,
# and normalization for each plot
if 'xshift1' in plots[p]:
xshift1 = plots[p]['xshift1']
if isinstance(xshift1, Iterable):
xshift1=np.array(xshift1)
if 'xshift2' in plots[p]:
xshift2 = plots[p]['xshift2']
if isinstance(xshift2, Iterable):
xshift2=np.array(xshift2)
if 'normed' in plots[p]:
if plots[p]['normed']:
norm=smtheory[dataname]
else:
norm=np.ones(ndata)
if 'x_values' in plots[p]:
if plots[p]['x_values'] is not None:
x_vals = np.array(plots[p]['x_values'])
if len(plots[p]['datasets'])==len(x_vals):
x_vals = np.array([plots[p]['x_values'][xcnt]])
# Exceptions
if p=='plot14':
x_vals = np.array(x_vals[::2])
if p=='plot14' and dataname.startswith('CMS'):
x_vals = x_vals+0.5
if p=='plot16':
# Adding last two points back, last two points have zero shift
shift_temp = np.zeros(ndata)
for i, elem in enumerate(shift):
shift_temp[i]=elem
central_data = central_data_temp
total_uncorr = total_uncorr_temp
for k in self.fits:
if dataname not in mean_theory_exp[k]:
continue
mean_theory_exp[k][dataname]=mean_theory_temp[k]
shift=shift_temp
# Reset color cycle
py.gca().set_prop_cycle(None)
# Only need labels for one dataset in multi-dataset plots
if xcnt==0:
# Plot unshifted data
print(dataname)
py.errorbar(np.array(x_vals),central_data/norm,\
yerr=total_error/norm,fmt='.',markersize=5,\
label=r'${\rm Unshifted\ Data}$')
# Plot shifted data if correlated uncertainties exist
if len(corr_index)>0 and len(shift)>0:
py.errorbar(np.array(x_vals)+xshift1,(central_data-shift)/norm,\
yerr=total_uncorr/norm,fmt='.',markersize=5,\
label=r'${\rm Shifted\ Data}$')
else:
# Plot dummy point to keep consistent with colors/legend
py.errorbar(10000.,10000.,yerr=0.,fmt='.',markersize=5,label=r'${\rm Shifted\ Data}$')
# Plot theory
for i,k in enumerate(self.fits):
if dataname not in mean_theory_exp[k]:
continue
if 'NHO_vs_HO' in self.outputname:
if 'NHO' in k:
temp_label =r'${\rm EFT\ Lin}$'
elif 'HO' in k:
temp_label =r'${\rm EFT\ Quad}$'
else:
temp_label = self.fit_labels[k]
py.errorbar(np.array(x_vals)+xshift2*(i+1),(mean_theory_exp[k][dataname])/norm,\
yerr=std_theory_exp[k][dataname]/norm,fmt='.',markersize=5,\
label=temp_label) #r'${\rm Fit}%d$'%(i+1))
# Plot without labels
else:
# Plot unshifted data
py.errorbar(np.array(x_vals),central_data/norm,\
yerr=total_error/norm,fmt='.',markersize=5)
# Plot shifted data if correlated uncertainties exist
if len(corr_index)>0 and len(shift)>0:
py.errorbar(np.array(x_vals)+xshift1,(central_data-shift)/norm,\
yerr=total_uncorr/norm,fmt='.',markersize=5)
else:
# Plot dummy point to keep consistent with colors/legend
py.errorbar(10000.,10000.,yerr=0.,fmt='.',markersize=5)
# Plot theory
for i,k in enumerate(self.fits):
if dataname not in mean_theory_exp[k]:
continue
py.errorbar(np.array(x_vals)+xshift2*(i+1),(mean_theory_exp[k][dataname])/norm,\
yerr=std_theory_exp[k][dataname]/norm,fmt='.',markersize=5)
xcnt+=1
# Construct plot limits, labels, and titles
if 'plot_ones' in plots[p]:
if plots[p]['plot_ones']:
py.plot(np.linspace(-10000,10000,2),np.ones(2),'k--',alpha=0.7)
if 'xlim' in plots[p]:
py.xlim(plots[p]['xlim'][0],plots[p]['xlim'][1])
if 'ylim' in plots[p]:
py.ylim(plots[p]['ylim'][0],plots[p]['ylim'][1])
if 'xscale' in plots[p]:
py.xscale(plots[p]['xscale'])
if 'yscale' in plots[p]:
py.yscale(plots[p]['yscale'])
if 'xlabel' in plots[p]:
if plots[p]['xlabel'] is not None:
py.xlabel(r'%s'%plots[p]['xlabel'],fontsize=12)
if 'ylabel' in plots[p]:
if plots[p]['ylabel'] is not None:
py.ylabel(r'%s'%plots[p]['ylabel'],fontsize=10)
if 'title' in plots[p]:
if plots[p]['title'] is not None:
py.title(r'%s'%plots[p]['title'],fontsize=10)
if 'legend_loc' in plots[p]:
legend_loc = plots[p]['legend_loc']
py.tick_params(which='both',direction='in')
if 'x_labels' in plots[p]:
if plots[p]['x_labels'] is not None and 'rotate_xlabels' in plots[p]:
py.xticks(plots[p]['x_values'],plots[p]['x_labels'],rotation=plots[p]['rotate_xlabels'])
elif plots[p]['x_labels'] is not None:
py.xticks(plots[p]['x_values'],plots[p]['x_labels'])
py.legend(loc=legend_loc, frameon=False, prop={'size':10})
py.tight_layout()
if 'output' in plots[p]:
if plots[p]['output'] is not None:
filename=plots[p]['output']
else:
filename=plots[p]['datasets'][0]
py.savefig(f'{self.path}/reports/%s/DvT%s_%s.pdf'%(self.outputname,p[4:],filename))
# Combine plots in tex file
# ================ Data vs Theory latex file =====================
L = latex_packages()
L.append(r"\usepackage{multicol}")
L.append(r"\newenvironment{Figure}{\par\medskip\noindent\minipage{\linewidth}}{\endminipage\par\medskip}")
L.append(r"\begin{document}")
L.append(r"\begin{multicols}{2}")
for k in sorted(listdir(f'{self.path}/reports/{self.outputname}')):
if k.startswith('DvT') is False:
continue
L.append(r"\begin{Figure}")
L.append(r"\centering")
L.append(r"\includegraphics[width=0.99\textwidth]{%s/reports/%s/%s}"%(self.path,self.outputname,k))
L.append(r"\end{Figure}")
L.append(r"\end{multicols}")
self.run_pdflatex(L, 'dvt_plots')
self.move_to_meta('DvT*_*')