Source code for smefit.analyze.analyze

# -*- coding: utf-8 -*-
from os import listdir
import subprocess
import copy
import yaml
import numpy as np

import matplotlib
import matplotlib.pyplot as py
from matplotlib import rc
from matplotlib import colors as mcolors
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pandas as pd

from collections.abc import Iterable
from progress.bar import Bar
from ..optimize.optimize import OPTIMIZE
from ..core import chisquared as chi2
from ..core import compute_theory as pr
from .tools import confidence_ellipse, latex_packages

# global mathplotlib settings
matplotlib.use('PDF')
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=True)

[docs]class ANALYZE(object): # pylint disable=anomalous-backslash-in-string """ This python class object produces a fit report based on the result ID names given in the command line execution. Disclaimer by JJE as of 01/03/21: this code is not written well. It's a product of cowboy coding, but it gets the job done (in most cases). Improvement is needed (e.g. unifying notation, aggregating repeated calculations, etc.) """ # TODO: implement what is suggested in the description # splitting the code in different submodules def __init__(self, root_path, fits, outputname): self.outputname = outputname self.fits = fits self.fit_labels={} self.path = root_path cnt=1 for k in self.fits: lbl_list=k.split('_') # Fit labels for plots if "check" in self.outputname: if "260121-NS" in lbl_list[0]: lbl = r"${\rm final\ 2\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1]) else: lbl = r"${\rm final\ 3\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1]) elif 'cpQ3_NLO_NHO' in self.outputname: if "cpQ3fixed" in lbl_list: lbl = r"${\rm cqQ3 \ fixed,\ Linear,\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm cqQ3 \ fitted,\ Linear,\ %s \ EFT }$"%(lbl_list[-2]) elif 'cpQ3_NLO_HO' in self.outputname: if "cpQ3fixed" in lbl_list: lbl = r"${\rm cqQ3 \ fixed,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm cqQ3 \ fitted,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2]) elif 'noEWPO_NLO_NHO' in self.outputname: if "noEWPO" in lbl_list: lbl = r"${\rm w/o \ EWPO,\ Linear,\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm with\ EWPO,\ Linear,\ %s \ EFT }$"%(lbl_list[-2]) elif 'noEWPO_NLO_HO' in self.outputname: if "noEWPO" in lbl_list: lbl = r"${\rm w/o \ EWPO,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm with\ EWPO,\ Quadratic,\ %s \ EFT }$"%(lbl_list[-2]) elif 'NSvsMCFit' in self.outputname: if 'NS' in lbl_list[0]: lbl=r"${\rm EFT\ NS}$" elif 'MCFit' in lbl_list[0]: lbl=r"${\rm EFT\ MCfit}$" elif 'TOP19' in self.outputname: if 'GLOBAL' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'TOP19' in lbl_list: lbl = r"${\rm Top-only\ (Data\ '19),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Top-only\ (Data\ '21),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'TOPphilic' in self.outputname: if 'GLOBAL' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Top\ Philic,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'TOP' in self.outputname: if 'GLOBAL' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Top-only,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'HIGGS' in self.outputname: if 'GLOBAL' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Higgs-only,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'noVV' in self.outputname: if 'GLOBAL' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm TOP+Higgs,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'goodSM' in self.outputname: if 'GLOBAL' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Top+Higgs+VV\ (good\ SM),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'noHighE' in self.outputname: if 'GLOBAL' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Top+Higgs+VV\ (no\ high-E),\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'AC' in self.outputname: if 'AC' in lbl_list: lbl = r"${\rm with\ Top \ AC\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1]) else: lbl = r"${\rm w/out\ Top \ AC\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1]) elif 'individual' in self.outputname: if 'SNS' in lbl_list: lbl = r"${\rm Individual}$" elif 'NHO' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Linear\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) elif 'nott2D' in self.outputname: if 'nott2D' in lbl_list: lbl = r"${\rm w/out\ Top \ MttYtt\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1]) else: lbl = r"${\rm with\ Top \ MttYtt\ %s \ %s }$"%(lbl_list[-2], lbl_list[-1]) else: if 'NHO' in lbl_list: lbl = r"${\rm Top+Higgs+VV,\ Linear\ %s \ EFT }$"%(lbl_list[-2]) else: lbl = r"${\rm Top+Higgs+VV,\ Quadratic\ %s \ EFT }$"%(lbl_list[-2]) self.fit_labels[k] = lbl cnt+=1 self.load_results()
[docs] def load_results(self): """ Loads the fit results and computes the theoretical predictions for each data point that was included. Single NS (SNS) is treated separately. Only parameters are loaded in this case. """ # Load coefficients and labels self.coeffs = {} self.labels = {} for k in self.fits: if k not in self.coeffs: self.coeffs[k]=[] if k not in listdir(f'{self.path}/results'): raise NotADirectoryError(f'{k} not found in results directory. Exiting...') outputdir = f'{self.path}/results/{k}' if 'SNS_' in k: cnt=0 self.labels[k] = [fname for fname in sorted(listdir(outputdir)) if fname.startswith('O')] for f in sorted(listdir(outputdir)): if f.startswith('O') is False: continue self.coeffs[k].append([]) for filename in listdir(outputdir+'/%s'%f): if filename.startswith('SMEFT_coeffs_') is False or filename.endswith('_0.txt'): continue file = open(outputdir+'/%s/%s'%(f,filename),'r') data = file.readlines() lbls = list(data[0].split()) idx = np.where(np.array(lbls)==f)[0][0] self.coeffs[k][cnt].append(np.float(data[1].split()[idx])) file.close() cnt+=1 else: for f in listdir(outputdir): if f.startswith('SMEFT_coeffs_') is False or f.endswith('_0.txt'): continue file = open(outputdir+'/%s'%f,'r') data = file.readlines() self.labels[k] = [data[0].split()[i] for i in range(len(data[0].split()))] self.coeffs[k].append(np.array([np.float(data[1].split()[i]) for i in range(len(data[1].split()))])) file.close() # Load data and compute theory self.config = {} self.dataset = {} self.theory = {} self.mean_theory = {} self.std_theory = {} self.sm_theory = {} self.cv_flag={} self.boot_flag={} self.full_coeffs={} self.full_labels={} for k in self.fits: self.config[k] = {} # Load results YAML file yaml_file = f'{self.path}/results/{k}/{k}.yaml' with open(yaml_file) as f: self.config[k] = yaml.safe_load(f) self.config[k]['root_path']=f'{self.path}' # Turn off cross-validation and replica generation self.config[k]['rep']=0 self.cv_flag[k]=self.config[k]['cv'] self.config[k]['cv']=False self.boot_flag[k]=self.config[k]['bootstrap'] self.config[k]['bootstrap']=False # Set paths to theory tables self.config[k]['corrections_path'] = self.config[k]['root_path'] + '/tables/operator_res/' + self.config[k]['order'] + '/' self.config[k]['theory_path'] = self.config[k]['root_path'] + '/tables/theory/' # Set t0 to central data if not self.config[k]['CT0L']: self.config[k]['t0']='data' opt=OPTIMIZE(self.config[k]) opt.get_free_params() # Get free parameter indices # This is needed in the case in which # non-fitted coefficients and their values # are printed in the output files nonzero=[] free_labels = opt.free_param_labels for l in free_labels: nonzero.append(self.labels[k].index(l)) nonzero=np.array(nonzero) self.dataset[k]=opt.loaded_datasets # dat = self.dataset[k].Commondata # covmat_inv = np.linalg.inv(self.dataset[k].CovMat) self.theory[k] = [] self.full_coeffs[k]=[] self.full_labels[k]=opt.coefficients.labels if 'SNS_' in k: continue # Loop over replicas/samples and compute theory bar = Bar(r'Computing theory for each replica', max=len(self.coeffs[k])) for j in range(len(self.coeffs[k])): # Set free parameters in optimize class, propogate them # to opt.coefficients.values, and set constraints opt.free_params = self.coeffs[k][j][nonzero] opt.propogate_params() opt.set_constraints() theory_rep = pr.make_predictions(self.config[k],self.dataset[k],opt.coefficients.values,opt.coefficients.labels) # diff = dat + self.dataset[k].Noise - theory_rep # Noise is only necessary for closure test chi2 # chi2 = np.einsum('i,ij,j->',diff,covmat_inv,diff) self.theory[k].append(theory_rep) self.full_coeffs[k].append(copy.deepcopy(opt.coefficients.values)) bar.next() bar.finish() # Compute mean theory self.theory[k] = np.array(self.theory[k]) self.mean_theory[k] = np.mean(self.theory[k],axis=0) self.std_theory[k] = np.std(self.theory[k],axis=0) # Compute SM Theory self.sm_theory[k] = np.array(pr.make_predictions(self.config[k],self.dataset[k],np.zeros(len(opt.coefficients.values)),opt.coefficients.labels))
[docs] def read_dataset(self, dataset): """ Read datasets and its corresponding systypes""" with open(f'{self.path}/tables/exp_data/DATA_{dataset}.dat','r') as f: datalines = f.readlines() with open(f'{self.path}/tables/exp_data/SYSTYPE_{dataset}_DEFAULT.dat','r') as f: systypes = f.readlines() return datalines, systypes
[docs] def run_pdflatex(self, L, filename): """ Dump to file and run pdflatex Parameters ---------- L : list(str) latex lines filename : str file name """ L.append(r"\end{document}") L = [l+"\n" for l in L] latex_src = f'{self.path}/reports/{self.outputname}/{filename}.tex' file = open(latex_src, "w") file.writelines(L) file.close() subprocess.call(f"pdflatex -halt-on-error -output-directory {self.path}/reports/{self.outputname}/ {latex_src}", shell=True) subprocess.call(f"rm {self.path}/reports/{self.outputname}/*.log {self.path}/reports/{self.outputname}/*.aux {self.path}/reports/{self.outputname}/*.out", shell=True)
[docs] def move_to_meta(self, filename ): """ Move pdf files to meta folder Parameters ---------- filename : str file names to be moved """ if 'meta' not in listdir(f'{self.path}/reports/{self.outputname}'): subprocess.call(f"mkdir {self.path}/reports/{self.outputname}/meta", shell=True) subprocess.call(f"mv {self.path}/reports/{self.outputname}/{filename}.pdf {self.path}/reports/{self.outputname}/meta/.", shell=True)
[docs] def combine_plots(self, L, latex_src, pdfname, caption=False): """Combine plots and run pdflatex""" for k in listdir(f'{self.path}/reports/{self.outputname}'): if k.startswith(pdfname) is False: continue L.append(r"\begin{figure}[t]") L.append(r"\centering") L.append(r"\includegraphics[width=0.99\textwidth]{%s/reports/%s/%s}"%(self.path,self.outputname,k)) if caption is True: L.append(r"\caption{{\rm %s}}"%self.fit_labels[k[12:-4]] ) L.append(r"\end{figure}") self.run_pdflatex(L, latex_src ) self.move_to_meta(f"{pdfname}*")
[docs] def write_summary(self): """ Provides a summary of the fits included in the report: the fit settings, fitted parameters (including any parameter constraints), and datasets. Uses data_references.yaml, data_groups.yaml, and coeff_groups.yaml YAML files. The first gives the references used for hyperlinks, and the other two organizes the data and parameters into groups (top, higgs, etc.). """ self.data_list = [] arxiv = {} ndat_total={} coeff_list = [] # Load data references YAML file #TODO: plcae these files outside the package yaml_file = f'{self.path}/smefit/analyze/data_references.yaml' with open(yaml_file) as f: arxiv = yaml.safe_load(f) # Load data grouping YAML file yaml_file = f'{self.path}/smefit/analyze/data_groups.yaml' with open(yaml_file) as f: self.data_groups = yaml.safe_load(f) for k in self.fits: ndat_total[k]=0 for i in range(len(self.dataset[k].ExpNames)): set_name=self.dataset[k].ExpNames[i] ndat_exp=self.dataset[k].NdataExp[i] ndat_total[k]+=ndat_exp if set_name not in self.data_list: self.data_list.append(set_name) for coeff in self.config[k]['coefficients']: if coeff not in coeff_list: coeff_list.append(coeff) # Writing summary report # ================ Summary in latex ===================== L = latex_packages() L.append(r"\usepackage{underscore}") L.append(r"\begin{document}") # Write fit details for i,k in enumerate(self.fits): L.append(r"{\bf \underline{Fit%d:}} %s"%(i+1,self.fit_labels[k])+'\n') L.append("Number of replicas/samples: %d\n"%(len(self.coeffs[k]))) L.append("pQCD order: %s\n"%str(self.config[k]['order'])) if self.config[k]['HOlambda']=='HO': lbl='$\Lambda^{-4}$' elif self.config[k]['HOlambda']=='NHO': lbl='$\Lambda^{-2}$' L.append("SMEFT order: %s\n"%lbl) if self.config[k]['CT0L']: L.append('Closure Test Level: %s\n'%str(self.config[k]['ctlevel'])) L.append('Cross validation : %s\n'%str(self.cv_flag[k])) L.append("MC Replica Generation: %s\n"%str(self.boot_flag[k])) if 'bounds' in self.config[k].keys(): L.append("Parameter bounds taken from: %s\n"%str(self.config[k]['bounds'])) # Write dataset table for ig,group in enumerate(self.data_groups): L.append('\n') L.append(r"\begin{table}[H]") L.append(r"\centering") temp = r"\begin{tabular}{|l|" for i in range(len(self.fits)): temp += "c|" temp+="}" L.append(temp) L.append(r"\hline") temp=' Datasets & ' for i,k in enumerate(self.fits): if i != len(self.fits)-1: temp += r" %s &"%self.fit_labels[k] else: temp += r" %s \\ \hline"%self.fit_labels[k] L.append(temp) for isub,Dataset in enumerate(self.data_groups[group]): if np.all([Dataset not in self.config[k]['datasets'] for k in self.config]): continue temp = r"" if Dataset in arxiv.keys(): temp += "\href{"+arxiv[Dataset]+"}{"+Dataset+"} & " else: temp += Dataset + " & " for i,k in enumerate(self.fits): if i != len(self.fits)-1: if Dataset in self.config[k]['datasets']: temp += r"\checkmark & " else: temp += r" & " else: if Dataset in self.config[k]['datasets']: temp += r"\checkmark \\ \hline" else: temp += r" \\ \hline" if ig==len(self.data_groups)-1 and isub == len(self.data_groups[group])-1 and i==len(self.fits)-1: temp += r" \hline" L.append(temp) if ig==len(self.data_groups)-1: temp = r"" temp += r"Total number of data points & " for i, k in enumerate(self.fits): if i != len(self.fits)-1: temp += str(ndat_total[k])+ " & " else: temp += str(ndat_total[k])+ r" \\ \hline" L.append(temp) L.append(r"\end{tabular}") if ig==len(self.data_groups)-1: L.append(r"\caption{Dataset comparison.}") L.append(r"\end{table}") # Write coefficients table L.append('\n') L.append('\n') L.append(r"\begin{table}[H]") L.append(r"\centering") temp = r"\begin{tabular}{|c|c|" for i in range(len(self.fits)): temp += "c|c|" temp+="}" L.append(temp) L.append(r"\hline") temp=' & & ' for i,k in enumerate(self.fits): if i != len(self.fits)-1: temp += r" \multicolumn{2}{c|}{%s} & "%self.fit_labels[k] else: temp += r" \multicolumn{2}{c|}{%s} \\ \hline"%self.fit_labels[k] L.append(temp) temp = r'Class & Coefficients &' for i,k in enumerate(self.fits): if i!= len(self.fits)-1: temp += r" Fitted & Fixed &" else: temp += r" Fitted & Fixed \\ \hline" L.append(temp) #Load coefficient yaml card yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml' with open(yaml_file) as f: coeff_config = yaml.safe_load(f) # Build lists for grouped coefficients coeffs_organized = {} for group in coeff_config.keys(): coeffs_organized[group]=[] for c in coeff_config[group]: if c=='OPff': c='Off' if c not in coeffs_organized[group] and np.any([c in self.config[k]['coefficients'] for k in self.fits]): coeffs_organized[group].append(c) for ig,group in enumerate(coeffs_organized.keys()): if len(coeffs_organized[group])==0: continue L.append('\multirow{%d}{*}{%s}'%(len(coeffs_organized[group]),group)) for isub,coeff in enumerate(coeffs_organized[group]): temp='' temp+=' & '+coeff.replace('O','c')+" & " for i,k in enumerate(self.fits): if i != len(self.fits)-1: if coeff in self.config[k]['coefficients']: if self.config[k]['coefficients'][coeff]['fixed'] is False: temp += r"\checkmark & &" elif self.config[k]['coefficients'][coeff]['fixed'] is True: temp += r" & = %d &"%self.config[k]['coefficients'][coeff]['value'] else: temp+= r" & = " if isinstance(self.config[k]['coefficients'][coeff]['value'],Iterable): for i in range(len(self.config[k]['coefficients'][coeff]['value'])): temp+=r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'][i],\ self.config[k]['coefficients'][coeff]['fixed'][i]) if i!=len(self.config[k]['coefficients'][coeff]['value'])-1: temp+=r" + " else: temp += r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'],\ self.config[k]['coefficients'][coeff]['fixed']) temp+=r"&" else: temp += r" & & " elif i == len(self.fits)-1 and isub!=len(coeffs_organized[group])-1: if coeff in self.config[k]['coefficients']: if self.config[k]['coefficients'][coeff]['fixed'] is False: temp += r"\checkmark & \\ \cline{2-%d}"%(2+2*len(self.fits)) elif self.config[k]['coefficients'][coeff]['fixed'] is True: temp += r" & = %d \\ \cline{2-%d}"%(self.config[k]['coefficients'][coeff]['value'],\ 2+2*len(self.fits)) else: temp+= r" & = " if isinstance(self.config[k]['coefficients'][coeff]['value'],Iterable): for i in range(len(self.config[k]['coefficients'][coeff]['value'])): temp+=r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'][i],\ self.config[k]['coefficients'][coeff]['fixed'][i]) if i!=len(self.config[k]['coefficients'][coeff]['value'])-1: temp+=r" + " else: temp += r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'],\ self.config[k]['coefficients'][coeff]['fixed']) temp+=r"\\ \cline{2-%d}"%(2+2*len(self.fits)) else: temp += r" & \\ \cline{2-%d}"%(2+2*len(self.fits)) elif i==len(self.fits)-1 and isub==len(coeffs_organized[group])-1: if coeff in self.config[k]['coefficients']: if self.config[k]['coefficients'][coeff]['fixed'] is False: temp += r"\checkmark & \\ \hline" elif self.config[k]['coefficients'][coeff]['fixed'] is True: temp += r" & = %d \\ \hline"%(self.config[k]['coefficients'][coeff]['value']) else: temp+= r" & = " if isinstance(self.config[k]['coefficients'][coeff]['value'],Iterable): for i in range(len(self.config[k]['coefficients'][coeff]['fixed'])): temp+=r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'][i],\ self.config[k]['coefficients'][coeff]['fixed'][i]) if i!=len(self.config[k]['coefficients'][coeff]['value'])-1: temp+=r" + " else: temp += r"%0.2f%s "%(self.config[k]['coefficients'][coeff]['value'],\ self.config[k]['coefficients'][coeff]['fixed']) temp += r"\\ \hline" else: temp += r" & \\ \hline" L.append(temp) L.append(r"\end{tabular}") L.append(r"\caption{Coefficient comparison.}") L.append(r"\end{table}") self.run_pdflatex(L, 'summary' )
[docs] def write_chi2_table(self): """ Computes and writes the table of chi2 values for each fit included in the report. Uses data_references.yaml and data_groups.yaml YAML files. The former gives the references used to add a hyperlink while the latter groups the datasets in a logical way. """ # Load data YAML file yaml_file = f'{self.path}/smefit/analyze/data_references.yaml' with open(yaml_file) as f: arxiv = yaml.safe_load(f) # Load fisher YAML file yaml_file = f'{self.path}/smefit/analyze/fisher_groups.yaml' with open(yaml_file) as f: data_groups = yaml.safe_load(f) self.chi2 = {} self.chi2_sm = {} self.ndat = {} self.chi2_total = {} self.chi2_rep = {} self.chi2_total_rep = {} self.ndat_total ={} for k in self.fits: self.chi2[k]={} self.chi2_sm[k]={} self.ndat[k]={} self.chi2_rep[k]={} self.chi2_total[k]=0 self.chi2_total_rep[k]=[] self.ndat_total[k]=0 dat = self.dataset[k].Commondata diff = dat + self.dataset[k].Noise - self.mean_theory[k] # Noise is only necessary for closure test chi2 diff_sm = dat - self.sm_theory[k] covmat_inv = np.linalg.inv(self.dataset[k].CovMat) covmat_diff = np.einsum('ij,j->i',covmat_inv, diff) covmat_diff_sm = np.einsum('ij,j->i',covmat_inv, diff_sm) # Compute chi2 by replica for irep in range(len(self.theory[k])): diff_rep = dat + self.dataset[k].Noise - self.theory[k][irep] covmat_diff_rep = np.einsum('ij,j->i',covmat_inv,diff_rep) cnt=0 chi2_temp = 0. for i in range(len(self.dataset[k].ExpNames)): set_name=self.dataset[k].ExpNames[i] ndat_exp=self.dataset[k].NdataExp[i] if set_name not in self.chi2_rep[k]: self.chi2_rep[k][set_name] = [] self.chi2_rep[k][set_name].append(np.einsum('i,i->',diff_rep[cnt:cnt+ndat_exp],covmat_diff_rep[cnt:cnt+ndat_exp])) chi2_temp += np.einsum('i,i->',diff_rep[cnt:cnt+ndat_exp],covmat_diff_rep[cnt:cnt+ndat_exp]) cnt+=ndat_exp self.chi2_total_rep[k].append(chi2_temp) cnt=0 # Compute Pearson chi2 (<T>) per experiment for i in range(len(self.dataset[k].ExpNames)): set_name=self.dataset[k].ExpNames[i] ndat_exp=self.dataset[k].NdataExp[i] self.chi2[k][set_name] = np.einsum('i,i->',diff[cnt:cnt+ndat_exp],covmat_diff[cnt:cnt+ndat_exp]) self.chi2_sm[k][set_name] = np.einsum('i,i->',diff_sm[cnt:cnt+ndat_exp],covmat_diff_sm[cnt:cnt+ndat_exp]) self.ndat[k][set_name] = ndat_exp self.chi2_total[k]+=self.chi2[k][set_name] self.ndat_total[k]+=ndat_exp cnt+=ndat_exp print('Finished!') Entry={} Ndats={} data_list=[] # Loop over results chi2_sm_total={} ndat_sm_total={} for k in self.fits: chi2_sm_total[k]=0 ndat_sm_total[k]=0 Entry[k]={} for group in data_groups: for Dataset in data_groups[group]: if Dataset not in self.chi2[k]: continue Entry[k][Dataset]={} Ndat = self.ndat[k][Dataset] Entry[k][Dataset]["Dataset"] = Dataset Entry[k][Dataset]["Ndat"] = Ndat Entry[k][Dataset]["Chi2"] = str(round(self.chi2[k][Dataset], 4)) Entry[k][Dataset]["Chi2/Ndat"] = str(round(self.chi2[k][Dataset]/Ndat, 4)) chi2_std = np.std(self.chi2_rep[k][Dataset],axis=0) chi2_upper = (self.chi2[k][Dataset]+chi2_std) chi2_lower = (self.chi2[k][Dataset]-chi2_std) if self.chi2_sm[k][Dataset] > chi2_upper: Entry[k][Dataset]["Color"] = "blue" elif self.chi2_sm[k][Dataset] < chi2_lower: Entry[k][Dataset]["Color"] = "red" else: Entry[k][Dataset]["Color"] = "black" chi2_sm_total[k] += self.chi2_sm[k][Dataset] ndat_sm_total[k] += Ndat Ndats[Dataset] = Ndat if Dataset not in data_list: data_list.append(Dataset) Entry[Dataset]={} Entry[Dataset]["SM_Chi2"] = str(round(self.chi2_sm[k][Dataset], 4)) Entry[Dataset]["SM_Chi2/Ndat"] = str(round(self.chi2_sm[k][Dataset]/Ndat, 4)) if group not in Entry: Entry[group]={} Entry[group]['SM_Chi2'] = self.chi2_sm[k][Dataset] Entry[group]['Ndat'] = Ndat else: Entry[group]['SM_Chi2'] += self.chi2_sm[k][Dataset] Entry[group]['Ndat'] += Ndat if group not in Entry[k]: Entry[k][group]={} Entry[k][group]['Chi2'] = self.chi2[k][Dataset] Entry[k][group]['Ndat'] = Ndat else: Entry[k][group]['Chi2'] += self.chi2[k][Dataset] Entry[k][group]['Ndat'] += Ndat Entry[k]["total_Ndat"] = self.ndat_total[k] Entry[k]["total_chi2"] = str(round(self.chi2_total[k], 4)) Entry[k]["total_chi2/Ndat"] = str(round(self.chi2_total[k]/self.ndat_total[k], 4)) Entry[k]['total_sm_chi2/Ndat'] = str(round(chi2_sm_total[k]/ndat_sm_total[k], 4)) for k in self.fits: for group in data_groups: if group in Entry[k]: Entry[k][group]['Chi2/Ndat'] = str(round(Entry[k][group]['Chi2']/Entry[k][group]['Ndat'], 4)) if group in Entry: Entry[group]['SM_Chi2/Ndat'] = str(round(Entry[group]['SM_Chi2']/Entry[group]['Ndat'], 4)) # Writing Chi2 table # ================ Chi2 latex table ===================== L = latex_packages() L.append(r"\usepackage{underscore}") L.append(r"\begin{document}") # Chi2 table for ig,group in enumerate(self.data_groups): L.append(r"\begin{table}[H]") L.append(r"\centering") temp = r"\begin{tabular}{|l|c|c|" for i in range(len(self.fits)): temp += "C{2cm}|" temp+="}" L.append(temp) L.append(r"\hline") temp = r" \multicolumn{2}{|c|}{} & SM &" for i,k in enumerate(self.fits): if i != len(self.fits)-1: temp += r"Fit%d & "%(i+1) else: temp += r"Fit%d \\ \hline"%(i+1) L.append(temp) temp = r'Dataset & $N_{\rm data}$ & $\chi^ 2/N_{\rm data}$ &' for i in range(len(self.fits)): if i != len(self.fits)-1: temp+=r'$\chi^ 2/N_{data}$ &' else: temp += r'$\chi^ 2/N_{data}$ \\ \hline' L.append(temp) for isub,Dataset in enumerate(self.data_groups[group]): if np.all([Dataset not in self.config[k]['datasets'] for k in self.config]): continue temp = r"" temp += "\href{"+arxiv[Dataset]+"}{"+Dataset+"} & "+str(Ndats[Dataset])+" & "+Entry[Dataset]["SM_Chi2/Ndat"]+" & " for i,k in enumerate(self.fits): if i != len(self.chi2.keys())-1: if Dataset in Entry[k].keys(): temp += "{\color{"+Entry[k][Dataset]["Color"]+"} "+Entry[k][Dataset]["Chi2/Ndat"]+"} & " else: temp += " & " else: if Dataset in Entry[k].keys(): temp += "{\color{"+Entry[k][Dataset]["Color"]+"} "+Entry[k][Dataset]["Chi2/Ndat"]+ r"} \\ \hline" else: temp += r" \\ \hline" if ig==len(self.data_groups[group])-1 and isub == len(self.data_groups[group])-1: temp += r" \hline" L.append(temp) if ig==len(self.data_groups[group])-1: temp = r"Total & & & " for i, k in enumerate(self.fits): if i != len(self.fits)-1: temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+") & " else: temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+")"+r" \\ \hline" L.append(temp) L.append(r"\end{tabular}") if ig==len(self.data_groups[group])-1: L.append(r"\caption{$\chi^2$ table. In parenthesis is the total SM $\chi^2$ for the dataset included in the fit. Blue color text represents a value that is lower than the SM $\chi^2$ by more than one standard deviation of the $\chi^2$ distribution. Similarly, red color text represents values that are higher than the SM $\chi^2$ by more than one standard deviation.}") L.append(r"\end{table}") # Chi2 table for grouped data L.append('\n') L.append('\n') L.append(r"\begin{table}[H]") L.append(r"\centering") temp = r"\begin{tabular}{|l|c|c|" for i in range(len(self.fits)): temp += "C{2cm}|" temp+="}" L.append(temp) L.append(r"\hline") temp = r" \multicolumn{2}{|c|}{} & SM &" for i,k in enumerate(self.fits): if i != len(self.fits)-1: temp += r"Fit%d & "%(i+1) else: temp += r"Fit%d \\ \hline"%(i+1) L.append(temp) temp = r'Process & $N_{\rm data}$ & $\chi^ 2/N_{\rm data}$ &' for i in range(len(self.fits)): if i != len(self.fits)-1: temp+=r'$\chi^ 2/N_{data}$ &' else: temp += r'$\chi^ 2/N_{data}$ \\ \hline' L.append(temp) for ig,group in enumerate(data_groups): if group not in Entry: continue temp = r"" temp += group+" & "+str(Entry[group]['Ndat'])+" & "+Entry[group]["SM_Chi2/Ndat"]+" & " for i,k in enumerate(self.fits): if i != len(self.fits)-1: if group in Entry[k].keys(): temp += Entry[k][group]["Chi2/Ndat"]+" & " else: temp += " & " else: if group in Entry[k].keys(): temp += Entry[k][group]["Chi2/Ndat"]+ r" \\ \hline" else: temp += r" \\ \hline" if ig == len(data_groups)-1: temp += r" \hline" L.append(temp) temp = r"Total & & & " for i, k in enumerate(self.fits): if i != len(self.fits)-1: temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+") & " else: temp += Entry[k]["total_chi2/Ndat"]+" ("+Entry[k]["total_sm_chi2/Ndat"]+")"+r" \\ \hline" L.append(temp) L.append(r"\end{tabular}") L.append(r"\caption{$\chi^2$ table for grouped data}") L.append(r"\end{table}") self.run_pdflatex(L, 'chi2' )
[docs] def write_Fisher_table(self): """ Computes and writes the Fisher information table, and plots heat map. Uses coeff_groups.yaml and fisher_groups.yaml YAML files to organize the parameters and datasets in a meaningful way. Linear Fisher information depends only on the theoretical corrections (kappas), while quadratic information requires fit results. Therefore, do not trust numbers in parenthesis (quadratic information) if a quadratic fit is not included in the report. Parameter contraints are also taken into account. Only fitted degrees of freedom are shown in the table Note: special treatment is done with cW and cB, degrees of freedom that are included at the fit level, but not at the presentation level. In this case, cW and cB are re-expressed as cpWB and cpD. """ corrections_val={} corrections_dict={} HOcorrections_val={} HOcorrections_dict={} theory_reps = {} data_exp = {} #Load coefficient yaml card yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml' with open(yaml_file) as f: coeff_config = yaml.safe_load(f) # Get organized coefficient list coeff_org = {} coeff_org_fixed = {} for group in coeff_config.keys(): if group not in coeff_org: coeff_org[group]=[] coeff_org_fixed[group]=[] for c in coeff_config[group]: if c=='OPff': c='Off' if c not in coeff_org[group] and\ np.any([c in self.config[k]['coefficients'] for k in self.fits]) and\ np.any([self.config[k]['coefficients'][c]['fixed'] is False for k in self.fits if c in self.config[k]['coefficients']]): coeff_org[group].append(c) # Add correction factors from fixed parameters (i.e. impose indirect constraints) new_coeff_dict = {} new_HOcoeff_dict = {} for k in self.fits: fixed_params = [] for j,c in enumerate(self.dataset[k].CorrectionsDICT): if self.config[k]['coefficients'][c]['fixed'] is False: continue elif self.config[k]['coefficients'][c]['fixed'] is not True: if isinstance(self.config[k]['coefficients'][c]['value'],Iterable): for m,coeff in enumerate(self.config[k]['coefficients'][c]['fixed']): idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0] corr = float(self.config[k]['coefficients'][c]['value'][m]) self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr else: coeff = self.config[k]['coefficients'][c]['fixed'] idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0] corr = float(self.config[k]['coefficients'][c]['value']) self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr elif self.config[k]['coefficients'][c]['fixed'] is True: corr = float(self.config[k]['coefficients'][c]['value']) self.dataset[k].CorrectionsVAL.T[j]*=corr if self.config[k]['coefficients'][c]['fixed'] is not False: self.dataset[k].CorrectionsVAL.T[j]*=0. fixed_params.append(j) # Replace OW and OB with OpWB and OpD if 'OW' in self.dataset[k].CorrectionsDICT and 'OB' in self.dataset[k].CorrectionsDICT: OW_idx = np.where(self.dataset[k].CorrectionsDICT=='OW')[0][0] OB_idx = np.where(self.dataset[k].CorrectionsDICT=='OB')[0][0] OW_vals = self.dataset[k].CorrectionsVAL.T[OW_idx] OB_vals = self.dataset[k].CorrectionsVAL.T[OB_idx] self.dataset[k].CorrectionsVAL.T[OW_idx] = -(1./.088625)*OW_vals self.dataset[k].CorrectionsVAL.T[OB_idx] = (1./.3545)*OB_vals - (1./.088625)*(.1626)*(1./.3545)*OW_vals new_coeff_dict[k] = np.delete(self.dataset[k].CorrectionsDICT,[j for j in fixed_params],0) # Need to do the same for O(Lambda^-4) corrections fixed_HOparams = [] for j,c in enumerate(self.dataset[k].HOcorrectionsDICT): c1 = c.split('*')[0] c2 = c.split('*')[1] if self.config[k]['coefficients'][c1]['fixed'] is False and self.config[k]['coefficients'][c2]['fixed'] is False: continue # Both parameters fixed elif isinstance(self.config[k]['coefficients'][c1]['fixed'],bool) is False\ and isinstance(self.config[k]['coefficients'][c2]['fixed'],bool) is False: if isinstance(self.config[k]['coefficients'][c1]['value'],Iterable)\ and isinstance(self.config[k]['coefficients'][c2]['value'],Iterable): for m1,coeff1 in enumerate(self.config[k]['coefficients'][c1]['fixed']): for m2,coeff2 in enumerate(self.config[k]['coefficients'][c2]['fixed']): if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0] elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0] corr1 = float(self.config[k]['coefficients'][c1]['value'][m1]) corr2 = float(self.config[k]['coefficients'][c2]['value'][m2]) self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2 elif isinstance(self.config[k]['coefficients'][c1]['value'],Iterable): coeff2 = self.config[k]['coefficients'][c2]['fixed'] corr2 = float(self.config[k]['coefficients'][c2]['value']) for m1,coeff1 in enumerate(self.config[k]['coefficients'][c1]['fixed']): if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0] elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0] corr1 = float(self.config[k]['coefficients'][c1]['value'][m1]) self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2 elif isinstance(self.config[k]['coefficients'][c2]['value'],Iterable): coeff1 = self.config[k]['coefficients'][c1]['fixed'] corr1 = float(self.config[k]['coefficients'][c1]['value']) for m2,coeff2 in enumerate(self.config[k]['coefficients'][c2]['fixed']): if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0] elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0] corr2 = float(self.config[k]['coefficients'][c2]['value'][m2]) self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2 else: coeff1 = self.config[k]['coefficients'][c1]['fixed'] coeff2 = self.config[k]['coefficients'][c2]['fixed'] if coeff1 is False or coeff2 is False: continue if coeff1+'*'+coeff2 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff1+'*'+coeff2)[0][0] elif coeff2+'*'+coeff1 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff2+'*'+coeff1)[0][0] corr1 = float(self.config[k]['coefficients'][c1]['value']) corr2 = float(self.config[k]['coefficients'][c2]['value']) self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr1*corr2 fixed_HOparams.append(j) # One parameter fixed, the other free else: if isinstance(self.config[k]['coefficients'][c1]['fixed'],bool) is False: if isinstance(self.config[k]['coefficients'][c1]['value'],Iterable): for m,coeff in enumerate(self.config[k]['coefficients'][c1]['fixed']): corr = float(self.config[k]['coefficients'][c1]['value'][m]) if coeff+'*'+c2 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff+'*'+c2)[0][0] self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr else: self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,coeff+'*'+c2) self.dataset[k].HOcorrectionsVAL.T[j]*=corr else: coeff = self.config[k]['coefficients'][c1]['fixed'] corr = float(self.config[k]['coefficients'][c1]['value']) if coeff+'*'+c2 in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==coeff+'*'+c2)[0][0] self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr else: self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,coeff+'*'+c2) self.dataset[k].HOcorrectionsVAL.T[j]*=corr if isinstance(self.config[k]['coefficients'][c2]['fixed'],bool) is False: if isinstance(self.config[k]['coefficients'][c2]['value'],Iterable): for m,coeff in enumerate(self.config[k]['coefficients'][c2]['fixed']): corr = float(self.config[k]['coefficients'][c2]['value'][m]) if c1+'*'+coeff in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==c1+'*'+coeff)[0][0] self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr else: self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,c1+'*'+coeff) self.dataset[k].HOcorrectionsVAL.T[j]*=corr else: coeff = self.config[k]['coefficients'][c2]['fixed'] corr = float(self.config[k]['coefficients'][c2]['value']) if c1+'*'+coeff in self.dataset[k].HOcorrectionsDICT: idx = np.where(self.dataset[k].HOcorrectionsDICT==c1+'*'+coeff)[0][0] self.dataset[k].HOcorrectionsVAL.T[idx] += self.dataset[k].HOcorrectionsVAL.T[j]*corr else: self.dataset[k].HOcorrectionsDICT[j].replace(c1+'*'+c2,c1+'*'+coeff) self.dataset[k].HOcorrectionsVAL.T[j]*=corr # Replace OW*OW and OB*OB with OpWB*OpWB and OpD*OpD if 'OW' in self.dataset[k].CorrectionsDICT and 'OB' in self.dataset[k].CorrectionsDICT: OW_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OW*OW')[0][0] OB_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OB*OB')[0][0] if 'OW*OB' in self.dataset[k].HOcorrectionsDICT: OWOB_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OW*OB')[0][0] elif 'OB*OW' in self.dataset[k].HOcorrectionsDICT: OWOB_idx = np.where(self.dataset[k].HOcorrectionsDICT=='OB*OW')[0][0] OW_vals = self.dataset[k].HOcorrectionsVAL.T[OW_idx] OB_vals = self.dataset[k].HOcorrectionsVAL.T[OB_idx] OWOB_vals = self.dataset[k].HOcorrectionsVAL.T[OWOB_idx] self.dataset[k].HOcorrectionsVAL.T[OW_idx] = (1./0.088625)**2*OW_vals self.dataset[k].HOcorrectionsVAL.T[OB_idx] = (1./.3545)**2*OB_vals + (1./.088625)**2*(.1626)**2*(1./.3545)**2*OW_vals\ - (1./0.088625)*(0.1626)*(1./.3545)**2*OWOB_vals self.dataset[k].HOcorrectionsVAL.T[OWOB_idx] = -(1./0.088625)*(1./.3545)*OWOB_vals + (1./0.088625)**2*(0.1626)*(1./.3545)*OW_vals new_HOcoeff_dict[k] = np.delete(self.dataset[k].HOcorrectionsDICT,[j for j in fixed_HOparams],0) # Get theory and data by experiment for k in self.fits: theory_reps[k]={} data_exp[k]={} corrections_val[k]={} corrections_dict[k]={} HOcorrections_val[k]={} HOcorrections_dict[k]={} # Construct theory arrays per experiment cnt=0 for i in range(len(self.dataset[k].ExpNames)): set_name=self.dataset[k].ExpNames[i] ndat=self.dataset[k].NdataExp[i] if set_name not in corrections_val[k]: corrections_val[k][set_name] = self.dataset[k].CorrectionsVAL[cnt:cnt+ndat] if set_name not in HOcorrections_val[k]: HOcorrections_val[k][set_name] = self.dataset[k].HOcorrectionsVAL[cnt:cnt+ndat] if set_name not in corrections_dict[k]: corrections_dict[k][set_name] = self.dataset[k].CorrectionsDICT if set_name not in HOcorrections_dict[k]: HOcorrections_dict[k][set_name] = self.dataset[k].HOcorrectionsDICT theory_reps[k][set_name] = np.array(self.theory[k]).T[cnt:cnt+ndat] data_exp[k][set_name] = self.dataset[k].Commondata[cnt:cnt+ndat] cnt+=ndat datalines={} systypes={} # Read data and SM theory files total_error={} for dataset in self.data_list: datalines[dataset], systypes[dataset] = self.read_dataset(dataset) # Gather data from data tables x_vals = [] central_data = [] stat_error = [] sys_errors = [] for line in datalines[dataset]: ncols = len(line.split()) if ncols==3: ndata = int(line.split()[-1]) if ncols>3: x_vals.append(float(line.split()[2])) central_data.append(float(line.split()[5])) stat_error.append(float(line.split()[6])) sys_errors.append([float(line.split()[i]) for i in range(7,ncols)]) add_corr_index=[] mult_corr_index=[] add_uncorr_index=[] mult_uncorr_index=[] for i in range(1,len(systypes[dataset])): if systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]=='UNCORR': add_uncorr_index.append(int(systypes[dataset][i].split()[0])) elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]=='UNCORR': mult_uncorr_index.append(int(systypes[dataset][i].split()[0])) elif systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]!='UNCORR': add_corr_index.append(int(systypes[dataset][i].split()[0])) elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]!='UNCORR': mult_corr_index.append(int(systypes[dataset][i].split()[0])) add_uncorr_index = 2*np.array(add_uncorr_index)-2 mult_uncorr_index = 2*np.array(mult_uncorr_index)-1 add_corr_index = 2*np.array(add_corr_index)-2 mult_corr_index = 2*np.array(mult_corr_index)-1 # corr_index = np.concatenate([add_corr_index,mult_corr_index],0) # corr_index_sorted = np.argsort(corr_index) # Compute total uncorrelated uncertainty total_uncorr=[] for i in range(ndata): err=0 err += stat_error[i]**2 if len(add_uncorr_index)>0: add_uncorr_error = np.array(sys_errors)[i][add_uncorr_index] err += np.sum(add_uncorr_error**2) if len(mult_uncorr_index)>0: mult_uncorr_error = np.array(sys_errors)[i][mult_uncorr_index]*np.array(central_data)[i]/100. err += np.sum(mult_uncorr_error**2) if len(add_corr_index)>0: add_corr_error = np.array(sys_errors)[i][add_corr_index] err += np.sum(add_corr_error**2) if len(mult_corr_index)>0: mult_corr_error = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100. err += np.sum(mult_corr_error**2) total_uncorr.append(np.sqrt(err)) total_error[dataset] = np.array(total_uncorr) # Load fisher YAML file yaml_file = f'{self.path}/smefit/analyze/fisher_groups.yaml' with open(yaml_file) as f: data_groups = yaml.safe_load(f) # Compute linear Fisher information fisher = {} norm_coeff = {} norm_data = {} for k in self.fits: if k not in list(fisher.keys()): fisher[k],norm_coeff[k],norm_data[k]={},{},{} for d in data_groups: if np.all([dataset not in self.dataset[k].ExpNames for dataset in data_groups[d]]): continue if d not in list(fisher.keys()): fisher[k][d] = {} if d not in list(norm_data[k].keys()): norm_data[k][d] = 0. for dataset in data_groups[d]: if dataset not in self.dataset[k].ExpNames: continue for i,c in enumerate(corrections_dict[k][dataset]): if c not in new_coeff_dict[k]: continue if c=='OW': c='OpWB' elif c=='OB': c='OpD' if c not in list(fisher[k][d].keys()): fisher[k][d][c] = 0 if c not in list(norm_coeff[k].keys()): norm_coeff[k][c] = 0 val = corrections_val[k][dataset].T[i]**2/total_error[dataset]**2 norm_coeff[k][c] += np.sum(val) norm_data[k][d] += np.sum(val) fisher[k][d][c] += np.sum(val) # TODO: prevent computing quadratic fisher when all fits are NHO. # Compute quadratic Fisher information print('Computing quadratic Fisher information...this may take a few minutes...') quad_fisher = {} quad_norm_coeff = {} quad_norm_data = {} for k in self.fits: if k not in quad_fisher: quad_fisher[k],quad_norm_coeff[k],quad_norm_data[k] = {},{},{} nrep = len(self.full_coeffs[k]) for group in data_groups: if np.all([dataset not in self.dataset[k].ExpNames for dataset in data_groups[group]]): continue if group not in quad_fisher[k]: quad_fisher[k][group] = {} if group not in quad_norm_data[k]: quad_norm_data[k][group] = np.zeros(nrep) for dataset in data_groups[group]: if dataset not in self.dataset[k].ExpNames: continue ndata = len(data_exp[k][dataset]) for i,c in enumerate(corrections_dict[k][dataset]): if c not in new_coeff_dict[k]: continue if c not in self.full_labels[k]: continue if c=='OW': coeff='OpWB' elif c=='OB': coeff='OpD' else: coeff = c if coeff not in quad_fisher[k][group]: quad_fisher[k][group][coeff] = np.zeros(nrep) if coeff not in quad_norm_coeff[k]: quad_norm_coeff[k][coeff] = np.zeros(nrep) idx = np.where(self.full_labels[k]==c)[0][0] idx_quad = np.where(HOcorrections_dict[k][dataset]==c+'*'+c)[0][0] dat = np.tile(data_exp[k][dataset],(nrep,1)) diff = theory_reps[k][dataset].T - dat term1 = np.einsum('ij,j,j->ij',diff,HOcorrections_val[k][dataset].T[idx_quad],1./(total_error[dataset]**2)) term2 = np.tile(corrections_val[k][dataset].T[i],(nrep,1)) for j,c2 in enumerate(corrections_dict[k][dataset]): if c2 not in new_coeff_dict[k]: continue if c2 not in self.full_labels[k]: continue if c+'*'+c2 not in new_HOcoeff_dict[k]\ and c2+'*'+c not in new_HOcoeff_dict[k]: continue if c+'*'+c2 in HOcorrections_dict[k][dataset]: idx2 = np.where(c+'*'+c2==HOcorrections_dict[k][dataset])[0][0] term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2]) elif c2+'*'+c in HOcorrections_dict[k][dataset] and c2!=c: idx2 = np.where(c2+'*'+c==HOcorrections_dict[k][dataset])[0][0] term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2]) term2 = np.einsum('ij,j->ij',term2**2,1./total_error[dataset]**2) quad_fisher[k][group][coeff] += np.sum(term1+term2,axis=1) quad_norm_data[k][group] += np.sum(term1+term2,axis=1) quad_norm_coeff[k][coeff] += np.sum(term1+term2,axis=1) # Compute means for k in self.fits: for c in quad_norm_coeff[k]: quad_norm_coeff[k][c] = np.mean(quad_norm_coeff[k][c]) for k in self.fits: for d in data_groups: if d not in quad_fisher[k]: continue quad_norm_data[k][d] = np.mean(quad_norm_data[k][d]) for c in self.dataset[k].CorrectionsDICT: if c=='OW': c='OpWB' elif c=='OB': c='OpD' if c not in quad_fisher[k][d]: continue quad_fisher[k][d][c] = np.mean(quad_fisher[k][d][c]) # Writing Fisher table # ================ Fisher latex table ===================== L = latex_packages() L.append(r"\begin{document}") # Uses quadratic fit if included in report, otherwise computes # Fisher with linear fit for k in self.fits: if '_HO' in k: fit = k break else: fit = self.fits[0] fisher_matrix = np.zeros((len(new_coeff_dict[fit]),len(fisher[fit].keys()))) quad_fisher_matrix = np.zeros((len(new_coeff_dict[fit]),len(fisher[fit].keys()))) # QUAD Fisher table 1 L.append(r"\begin{landscape}") L.append(r"\begin{table}[H]") L.append(r"\tiny") L.append(r"\centering") temp = r"\begin{tabular}{|c|c|" for i in range(len(fisher[fit].keys())): temp += "c|" temp+="}" L.append(temp) L.append(r"\hline") L.append(r" \multicolumn{2}{|c|}{} & \multicolumn{%d}{c|}{Processes} \\ \hline"%len(fisher[fit].keys())) temp = r' Class & Coefficient & ' for i in range(len(fisher[fit].keys())): if i != len(fisher[fit].keys())-1: temp+=r'${\rm %s}$ &'%list(fisher[fit].keys())[i] else: temp += r'${\rm %s}$ \\ \hline'%list(fisher[fit].keys())[i] L.append(temp) ic=0 ordered_coeffs = [] for group in coeff_org.keys(): if len(coeff_org[group])==0: continue L.append('\multirow{%d}{*}{%s}'%(len(coeff_org[group]),group)) for isub,coeff in enumerate(coeff_org[group]): if coeff not in new_coeff_dict[fit]: continue idx = np.where(coeff==new_coeff_dict[fit])[0][0] ordered_coeffs.append(idx) if coeff=='OW': coeff='OpWB' elif coeff=='OB': coeff='OpD' temp = r" & {\rm "+ coeff.replace('O','c') + r"}" idj = 0 for i,d in enumerate(fisher[fit].keys()): if d in fisher[fit] and coeff in fisher[fit][d] and fisher[fit][d][coeff]!=0: value = fisher[fit][d][coeff]/norm_coeff[fit][coeff]*100 if value > 10: temp += r" & {\color{blue} %0.2f}"%(value) else: temp += r" & %0.2f"%(value) else: temp += r" & $\times$" if d in quad_fisher[fit] and coeff in quad_fisher[fit][d] and quad_fisher[fit][d][coeff]!=0: value = quad_fisher[fit][d][coeff]/quad_norm_coeff[fit][coeff]*100. if value > 10: temp += r"{\color{blue} (%0.2f)}"%(value) else: temp += r"(%0.1f) "%(value) else: temp += r"($\times$) " if i == len(fisher[fit].keys())-1 and isub!=len(coeff_org[group])-1: temp += r" \\ \cline{2-%d}"%(2+len(fisher[fit].keys())) elif i==len(fisher[fit].keys())-1 and isub==len(coeff_org[group])-1: temp += r"\\ \hline" if d in fisher[fit] and coeff in fisher[fit][d] and fisher[fit][d][coeff]!=0: fisher_matrix[ic][idj] = fisher[fit][d][coeff]/norm_coeff[fit][coeff]*100. else: fisher_matrix[ic][idj] = 0. if d in quad_fisher[fit] and coeff in quad_fisher[fit][d]: quad_fisher_matrix[ic][idj] = quad_fisher[fit][d][coeff]/quad_norm_coeff[fit][coeff]*100. else: quad_fisher_matrix[ic][idj] = 0. idj += 1 ic+=1 L.append(temp) L.append(r"\end{tabular}") L.append(r"\caption{Fisher information normalized per coefficient}") L.append(r"\end{table}") L.append(r"\end{landscape}") # Uncomment to compute fisher table per dataset # # Compute linear Fisher information per datset # fisher_d = {} # norm_coeff_d = {} # norm_data_d = {} # for k in self.fits: # if k not in fisher_d: # fisher_d[k],norm_coeff_d[k],norm_data_d[k]={},{},{} # for dataset in self.dataset[k].ExpNames: # if dataset not in fisher_d: # fisher_d[k][dataset] = {} # if dataset not in norm_data_d[k]: # norm_data_d[k][dataset] = 0. # for i,c in enumerate(corrections_dict[k][dataset]): # if c not in new_coeff_dict[k]: # continue # if c=='OW': # c='OpWB' # elif c=='OB': # c='OpD' # if c not in list(fisher_d[k][dataset].keys()): # fisher_d[k][dataset][c] = 0 # if c not in list(norm_coeff_d[k].keys()): # norm_coeff_d[k][c] = 0 # val = corrections_val[k][dataset].T[i]**2/total_error[dataset]**2 # norm_coeff_d[k][c] += np.sum(val) # norm_data_d[k][dataset] += np.sum(val) # fisher_d[k][dataset][c] = np.sum(val) # quad_fisher_d = {} # quad_norm_coeff_d = {} # quad_norm_data_d = {} # # Compute quadratic Fisher information per datset # bar = Bar(r'Computing quadratic Fisher information per dataset', max=len(self.dataset[k].ExpNames)) # k = self.fits[0] # if k not in quad_fisher_d: # quad_fisher_d[k],quad_norm_coeff_d[k],quad_norm_data_d[k] = {},{},{} # nrep = len(self.full_coeffs[k]) # for dataset in self.dataset[k].ExpNames: # if dataset not in quad_fisher_d[k]: # quad_fisher_d[k][dataset] = {} # if dataset not in quad_norm_data_d[k]: # quad_norm_data_d[k][dataset] = np.zeros(nrep) # ndata = len(data_exp[k][dataset]) # for i,c in enumerate(corrections_dict[k][dataset]): # if c not in new_coeff_dict[k]: # continue # if c not in self.full_labels[k]: # continue # if c=='OW': # coeff='OpWB' # elif c=='OB': # coeff='OpD' # else: # coeff = c # if coeff not in quad_fisher_d[k][dataset]: # quad_fisher_d[k][dataset][coeff] = np.zeros(nrep) # if coeff not in quad_norm_coeff_d[k]: # quad_norm_coeff_d[k][coeff] = np.zeros(nrep) # idx = np.where(self.full_labels[k]==c)[0][0] # idx_quad = np.where(HOcorrections_dict[k][dataset]==c+'*'+c)[0][0] # dat = np.tile(data_exp[k][dataset],(nrep,1)) # diff = theory_reps[k][dataset].T - dat # term1 = np.einsum('ij,j,j->ij',diff,HOcorrections_val[k][dataset].T[idx_quad],1./(total_error[dataset]**2)) # term2 = np.tile(corrections_val[k][dataset].T[i],(nrep,1)) # for j,c2 in enumerate(corrections_dict[k][dataset]): # if c2 not in new_coeff_dict[k]: # continue # if c2 not in self.full_labels[k]: # continue # if c+'*'+c2 not in new_HOcoeff_dict[k]\ # and c2+'*'+c not in new_HOcoeff_dict[k]: # continue # if c+'*'+c2 in HOcorrections_dict[k][dataset]: # idx2 = np.where(c+'*'+c2==HOcorrections_dict[k][dataset])[0][0] # term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2]) # elif c2+'*'+c in HOcorrections_dict[k][dataset] and c2!=c: # idx2 = np.where(c2+'*'+c==HOcorrections_dict[k][dataset])[0][0] # term2 += np.einsum('i,j->ij',np.array(self.full_coeffs[k]).T[idx],HOcorrections_val[k][dataset].T[idx2]) # term2 = np.einsum('ij,j->ij',term2**2,1./total_error[dataset]**2) # quad_fisher_d[k][dataset][coeff] = np.sum(term1+term2,axis=1) # quad_norm_data_d[k][dataset] += np.sum(term1+term2,axis=1) # quad_norm_coeff_d[k][coeff] += np.sum(term1+term2,axis=1) # bar.next() # bar.finish() # # Compute means # for c in quad_norm_coeff_d[k]: # quad_norm_coeff_d[k][c] = np.mean(quad_norm_coeff_d[k][c]) # for dataset in self.dataset[k].ExpNames: # quad_norm_data_d[k][d] = np.mean(quad_norm_data_d[k][dataset]) # for c in self.dataset[k].CorrectionsDICT: # if c=='OW': # c='OpWB' # elif c=='OB': # c='OpD' # if c not in quad_fisher_d[k][dataset]: # continue # quad_fisher_d[k][dataset][c] = np.mean(quad_fisher_d[k][dataset][c]) # # Tables Fisher per grouped dataset # L.append(r"\begin{landscape}") # fit = self.fits[0] # for data_group, data_list in data_groups.items(): # L.append(r"\begin{table}[H]") # L.append(r"\tiny") # L.append(r"\centering") # temp = r"\begin{tabular}{|c|c|" # temp += "c|" * len(data_list) # temp+="}" # L.append(temp) # L.append(r"\hline") # temp = r" \multicolumn{2}{|c|}{} " # for data in data_list: # temp += r" & { \rm " + data.replace('_', r'\_' ) + "}" # temp += r"\\ \hline " # L.append(temp) # # loop on coeffficients # for coeff_group, coeff_list in coeff_org.items(): # L.append( r'\multirow{%d}{*}{%s}'%(len(coeff_list),coeff_group) ) # for i, coeff in enumerate(coeff_list): # if coeff=='OW': # coeff='OpWB' # elif coeff=='OB': # coeff='OpD' # temp = r" & %s"%coeff # for dataset in data_list: # if dataset in fisher_d[fit] and coeff in fisher_d[fit][dataset] and fisher_d[fit][dataset][coeff]!=0: # value = fisher_d[fit][dataset][coeff]/norm_coeff_d[fit][coeff]*100 # if value > 10: # temp += r" & {\color{blue} %0.2f}"%(value) # else: # temp += r" & %0.2f"%(value) # else: # temp += r" & $\times$" # if dataset in quad_fisher_d[fit] and coeff in quad_fisher_d[fit][dataset] and quad_fisher_d[fit][dataset][coeff]!=0: # value = quad_fisher_d[fit][dataset][coeff]/quad_norm_coeff_d[fit][coeff]*100. # if value > 10: # temp += r"{\color{blue} (%0.2f)}"%(value) # else: # temp += r"(%0.2f)"%(value) # else: # temp += r"($\times$)" # if i != len(coeff_list) -1: # temp += r" \\ \cline{2-%d}"%(2+len(data_list)) # else: # temp += r" \\ \hline" # L.append(temp) # L.append(r"\end{tabular}") # L.append(r"\caption{Fisher information normalized per coefficient in %s datasets}" % data_group) # L.append(r"\end{table}") # L.append(r"\end{landscape}") self.run_pdflatex(L, 'fisher') # Heat Map of Fisher table fig = py.figure(figsize=(10,10)) ax = fig.add_subplot(121) colors = py.rcParams['axes.prop_cycle'].by_key()['color'] cmap = matplotlib.colors.ListedColormap(['lightgrey',colors[3], colors[2], colors[1], colors[0]]) bounds = [0, 10, 25, 50, 75, 100] norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N) cax = ax.matshow(fisher_matrix, vmin=0, vmax=100, cmap=cmap, norm=norm) for i, row in enumerate(fisher_matrix): for j, elem in enumerate(row): c = elem if c > 10: ax.text(j, i, str(np.round(c,1)), va='center', ha='center',fontsize=8) coeff_list = [c.replace('O','c') for c in list(new_coeff_dict[fit][np.array(ordered_coeffs)])] temp_list = ['cpD' if c=='cB' else c for c in coeff_list] new_coeff_list = ['cpWB' if c=='cW' else c for c in temp_list] ticks1 = np.arange(0,len(fisher_matrix[0]),1) ticks2 = np.arange(0,len(fisher_matrix),1) ax.set_xticks(ticks1) ax.set_yticks(ticks2) ax.set_xticklabels(fisher[fit].keys()) ax.set_yticklabels(new_coeff_list) ax.tick_params(axis='x',rotation=90,labelsize=15) ax.tick_params(axis='y',labelsize=15) ax.tick_params(which='major',labeltop=True,top=True,labelbottom=False,bottom=False) ax = fig.add_subplot(122) colors = py.rcParams['axes.prop_cycle'].by_key()['color'] cmap = matplotlib.colors.ListedColormap(['lightgrey',colors[3], colors[2], colors[1], colors[0]]) bounds = [0, 10, 25, 50, 75, 100] norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N) cax = ax.matshow(quad_fisher_matrix, vmin=0, vmax=100, cmap=cmap, norm=norm) divider = make_axes_locatable(ax) cax1 = divider.append_axes("right", size="5%", pad=0.1) cbar = fig.colorbar(cax, cax=cax1) cbar.set_label(r'${\rm Normalized\ Fisher\ Value}$',fontsize=25,labelpad=30,rotation=270) cbar.ax.tick_params(labelsize=15) for i, row in enumerate(quad_fisher_matrix): for j, elem in enumerate(row): c = elem if c > 10: ax.text(j, i, str(np.round(c,1)), va='center', ha='center',fontsize=8) ticks1 = np.arange(0,len(fisher_matrix[0]),1) ax.set_xticks(ticks1) ax.set_xticklabels(fisher[fit].keys()) ax.tick_params(axis='x',rotation=90,labelsize=15) ax.tick_params(which='major',labeltop=True,top=True,labelbottom=False,bottom=False,labelleft=False,left=False) py.tight_layout() py.savefig(f'reports/{self.outputname}/Fisher_heat.pdf')
[docs] def write_PCA_table(self): """ Computes and writes PCA table and heat map. Uses coeff_groups.yaml YAML file to get organized list of parameters. Note: matrix being decomposd by SVD are the kappas divided by the total experimental error Note: special treatment is done with cW and cB, degrees of freedom that are included at the fit level, but not at the presentation level. In this case, cW and cB are re-expressed as cpWB and cpD. """ corrections_val={} corrections_dict={} HOcorrections_val={} HOcorrections_dict={} theory_reps = {} data_exp = {} #Load coefficient yaml card yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml' with open(yaml_file) as f: coeff_config = yaml.safe_load(f) coeff_org = {} coeff_org_fixed = {} coeff_list = [] for group in coeff_config.keys(): if group not in coeff_org: coeff_org[group]=[] coeff_org_fixed[group]=[] for c in coeff_config[group]: if c=='OPff': c='Off' if c not in coeff_list and\ np.any([c in self.config[k]['coefficients'] for k in self.fits]) and\ np.any([self.config[k]['coefficients'][c]['fixed'] is False for k in self.fits if c in self.config[k]['coefficients']]): coeff_org[group].append(c) coeff_list.append(c) # Read data and SM theory files datalines={} systypes={} total_error={} for k in self.fits: total_error[k]=[] for dataset in self.dataset[k].ExpNames: datalines[dataset], systypes[dataset] = self.read_dataset(dataset) # Gather data from data tables x_vals = [] central_data = [] stat_error = [] sys_errors = [] for line in datalines[dataset]: ncols = len(line.split()) if ncols==3: ndata = int(line.split()[-1]) if ncols>3: x_vals.append(float(line.split()[2])) central_data.append(float(line.split()[5])) stat_error.append(float(line.split()[6])) sys_errors.append([float(line.split()[i]) for i in range(7,ncols)]) add_corr_index=[] mult_corr_index=[] add_uncorr_index=[] mult_uncorr_index=[] for i in range(1,len(systypes[dataset])): if systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]=='UNCORR': add_uncorr_index.append(int(systypes[dataset][i].split()[0])) elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]=='UNCORR': mult_uncorr_index.append(int(systypes[dataset][i].split()[0])) elif systypes[dataset][i].split()[1]=='ADD' and systypes[dataset][i].split()[-1]!='UNCORR': add_corr_index.append(int(systypes[dataset][i].split()[0])) elif systypes[dataset][i].split()[1]=='MULT' and systypes[dataset][i].split()[-1]!='UNCORR': mult_corr_index.append(int(systypes[dataset][i].split()[0])) add_uncorr_index = 2*np.array(add_uncorr_index)-2 mult_uncorr_index = 2*np.array(mult_uncorr_index)-1 add_corr_index = 2*np.array(add_corr_index)-2 mult_corr_index = 2*np.array(mult_corr_index)-1 # corr_index = np.concatenate([add_corr_index,mult_corr_index],0) # corr_index_sorted = np.argsort(corr_index) # Compute total uncorrelated uncertainty for i in range(ndata): err=0 err += stat_error[i]**2 if len(add_uncorr_index)>0: add_uncorr_error = np.array(sys_errors)[i][add_uncorr_index] err += np.sum(add_uncorr_error**2) if len(mult_uncorr_index)>0: mult_uncorr_error = np.array(sys_errors)[i][mult_uncorr_index]*np.array(central_data)[i]/100. err += np.sum(mult_uncorr_error**2) if len(add_corr_index)>0: add_corr_error = np.array(sys_errors)[i][add_corr_index] err += np.sum(add_corr_error**2) if len(mult_corr_index)>0: mult_corr_error = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100. err += np.sum(mult_corr_error**2) total_error[k].append(np.sqrt(err)) total_error[k] = np.array(total_error[k]) for k in self.fits: theory_reps[k]={} data_exp[k]={} corrections_val[k]={} corrections_dict[k]={} HOcorrections_val[k]={} HOcorrections_dict[k]={} # Construct theory arrays per experiment cnt=0 for i in range(len(self.dataset[k].ExpNames)): set_name=self.dataset[k].ExpNames[i] ndat=self.dataset[k].NdataExp[i] if set_name not in corrections_val[k]: corrections_val[k][set_name] = self.dataset[k].CorrectionsVAL[cnt:cnt+ndat] if set_name not in HOcorrections_val[k]: HOcorrections_val[k][set_name] = self.dataset[k].HOcorrectionsVAL[cnt:cnt+ndat] if set_name not in corrections_dict[k]: corrections_dict[k][set_name] = self.dataset[k].CorrectionsDICT if set_name not in HOcorrections_dict[k]: HOcorrections_dict[k][set_name] = self.dataset[k].HOcorrectionsDICT data_exp[k][set_name] = self.dataset[k].Commondata[cnt:cnt+ndat] cnt+=ndat X = {} new_coeff_dict = {} # Construct theory matrix used in PCA for k in self.fits: fixed_params=[] for j,c in enumerate(self.dataset[k].CorrectionsDICT): if self.config[k]['coefficients'][c]['fixed'] is False: idx = np.where(self.dataset[k].CorrectionsDICT==c)[0][0] self.dataset[k].CorrectionsVAL.T[idx]/=total_error[k] continue elif self.config[k]['coefficients'][c]['fixed'] is not True: if isinstance(self.config[k]['coefficients'][c]['value'],Iterable): for m,coeff in enumerate(self.config[k]['coefficients'][c]['fixed']): idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0] corr = float(self.config[k]['coefficients'][c]['value'][m]) self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr/total_error[k] else: coeff = self.config[k]['coefficients'][c]['fixed'] idx = np.where(self.dataset[k].CorrectionsDICT==coeff)[0][0] corr = float(self.config[k]['coefficients'][c]['value']) self.dataset[k].CorrectionsVAL.T[idx] += self.dataset[k].CorrectionsVAL.T[j]*corr/total_error[k] elif self.config[k]['coefficients'][c]['fixed'] is True: corr = float(self.config[k]['coefficients'][c]['value']) self.dataset[k].CorrectionsVAL.T[j]*=corr/total_error[k] fixed_params.append(j) if 'OW' in self.dataset[k].CorrectionsDICT and 'OB' in self.dataset[k].CorrectionsDICT: OW_idx = np.where(self.dataset[k].CorrectionsDICT=='OW')[0][0] OB_idx = np.where(self.dataset[k].CorrectionsDICT=='OB')[0][0] OW_vals = self.dataset[k].CorrectionsVAL.T[OW_idx] OB_vals = self.dataset[k].CorrectionsVAL.T[OB_idx] self.dataset[k].CorrectionsVAL.T[OW_idx] = -(1./.088625)*OW_vals self.dataset[k].CorrectionsVAL.T[OB_idx] = (1./.3545)*OB_vals - (1./.088625)*(.1626)*(1./.3545)*OW_vals X[k] = np.delete(self.dataset[k].CorrectionsVAL.T,[j for j in fixed_params],0).T new_coeff_dict[k] = np.delete(self.dataset[k].CorrectionsDICT,[j for j in fixed_params],0) coeffs_kept = {} pc_vals = {} s_vals = {} SVs = {} pc_matrix = {} # Decompose matrix with SVD and identify PCs for k in self.fits: npar = len(new_coeff_dict[k]) pc_matrix[k] = np.zeros((npar,npar)) coeffs_kept[k]={} pc_vals[k]={} s_vals[k]=[] SVs[k] = {} X_centered = X[k] - X[k].mean(axis=0) _, W, Vt = np.linalg.svd(X_centered) for i in range(len(Vt)): name = r'${\rm PC\ }'+str(i+1)+r'$' pc = Vt.T[:,i] SV = W[i] s_vals[k].append(SV) SVs[k][name]=SV coeffs_kept[k][i+1]=[] pc_vals[k][i+1]=[] pc_sorted = np.argsort(-np.abs(pc)) for j in pc_sorted: pc_matrix[k][i][j] = pc[j]**2 if (pc[j]**2)>0.05: if new_coeff_dict[k][j]=='OW': coeffs_kept[k][i+1].append('OpWB') elif new_coeff_dict[k][j]=='OB': coeffs_kept[k][i+1].append('OpD') else: coeffs_kept[k][i+1].append(new_coeff_dict[k][j]) pc_vals[k][i+1].append(pc[j]) # Writing PCA file # ================ PCA latex file ===================== L = latex_packages() L.append(r"\usepackage{underscore}") L.append(r"\allowdisplaybreaks") L.append(r"\begin{document}") for k in self.fits: if '_NHO' in k: fit = k else: fit = self.fits[0] # PCA Table L.append(r"\begin{align}") for i in range(len(pc_vals[fit])): L.append(r" &{\underline {\bf {\color{red} %0.2e}}}: "%s_vals[fit][i]) if pc_vals[fit][i+1][0]<0: factor=-1. if pc_vals[fit][i+1][0]>0: factor=1. for j in range(len(pc_vals[fit][i+1])): if np.abs(pc_vals[fit][i+1][j]) > 0.9: clr = r"blue" else: clr = r"black" if j==0 and j!=len(pc_vals[fit][i+1])-1: L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} + "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c'))) elif j==0 and j==len(pc_vals[fit][i+1])-1: L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c'))) elif j!=len(pc_vals[fit][i+1])-1 and (j+1)%7==0: L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} + \nonumber \\ \nonumber \\"%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c'))) elif j!=len(pc_vals[fit][i+1])-1 and j%7==0: L.append(r"&{\color{"+clr+r"} %0.3f} * {\rm %s} + "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c'))) elif j==len(pc_vals[fit][i+1])-1 and j%7==0: L.append(r"&{\color{"+clr+r"} %0.3f} * {\rm %s}"%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c'))) elif j==len(pc_vals[fit][i+1])-1 and j%7!=0: L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s}"%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c'))) elif j!=len(pc_vals[fit][i+1])-1: L.append(r"{\color{"+clr+r"} %0.3f} * {\rm %s} + "%(factor*pc_vals[fit][i+1][j], coeffs_kept[fit][i+1][j].replace('O','c'))) if i!=len(pc_vals[fit])-1: L.append(r" \nonumber \\ \nonumber \\ ") else: L.append(r" \nonumber") L.append(r"\end{align}") self.run_pdflatex(L, 'pca') # Bar Plot of Singular Values nrows,ncols=1,1 py.figure(figsize=(nrows*10,ncols*5)) py.bar(range(len(SVs[fit])), list(SVs[fit].values()), align='center') py.xticks(range(len(SVs[fit])), list(SVs[fit].keys()),fontsize=15) py.xticks(rotation=90) py.tick_params(axis='y',direction='in',labelsize=15) py.yscale('log') py.ylim(1e-3,1e5) py.ylabel(r'${\rm Singular\ Values}$',fontsize=20) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/PCA_SVs.pdf') # Heat Map of PC coefficients fig = py.figure(figsize=(10,10)) ax = fig.add_subplot(111) colors = py.rcParams['axes.prop_cycle'].by_key()['color'] cmap = matplotlib.colors.ListedColormap(['lightgrey',colors[3], colors[2], colors[1], colors[0]]) bounds = [0, 0.10, 0.25, 0.50, 0.75, 1.0] norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N) cax = ax.matshow(pc_matrix[fit], vmin=0, vmax=1, cmap=cmap, norm=norm) divider = make_axes_locatable(ax) cax1 = divider.append_axes("right", size="5%", pad=0.1) cbar = fig.colorbar(cax, cax=cax1) cbar.set_label(r'${\rm a}_i^2$',fontsize=25,labelpad=30,rotation=270) cbar.ax.tick_params(labelsize=15) for i in range(len(pc_matrix[fit])): for j in range(len(pc_matrix[fit][i])): c = pc_matrix[fit][i][j] if c > 0.1: ax.text(j, i, str(np.round(c,1)), va='center', ha='center',fontsize=10) ticks = np.arange(0,len(pc_matrix[fit]),1) ax.set_xticks(ticks) ax.set_yticks(ticks) coeff_list = [c.replace('O','c') for c in new_coeff_dict[fit]] temp_list = ['cpD' if c=='cB' else c for c in coeff_list] new_coeff_list = ['cpWB' if c=='cW' else c for c in temp_list] ax.set_xticklabels([r'${\rm '+coeff+r'}$' for coeff in new_coeff_list]) ax.set_yticklabels(list(SVs[fit].keys())) ax.tick_params(axis='x',rotation=90,labelsize=15) ax.tick_params(axis='y',labelsize=15) ax.tick_params(which='major',labeltop=True,top=True,labelbottom=False,bottom=False) py.tight_layout() py.savefig('reports/%s/PCA_PCs.pdf'%(self.outputname)) # Combine plots in tex file # ================ Coefficients latex file ===================== L = latex_packages() L.append(r"\begin{document}") self.combine_plots(L, 'pca_plots', 'PCA_')
[docs] def plot_chi2(self): """ Plots a bar plot of the chi2 values per experiment, and also the distribution of chi2 values. """ # Plot nrows,ncols=1,1 py.figure(figsize=(nrows*10,ncols*5)) nexp = len(self.data_list) chi2_bar = {} chi2_bar[r'${\rm SM}$'] = np.zeros(nexp) chi2_hist = {} labels = [] for k in self.fits: name=self.fit_labels[k] chi2_bar[name]=np.zeros(nexp) cnt=0 for exp in self.data_list: if exp in self.chi2[k]: chi2_bar[name][cnt] = self.chi2[k][exp]/self.ndat[k][exp] if exp in self.chi2_sm[k]: chi2_bar[r'${\rm SM}$'][cnt] = self.chi2_sm[k][exp]/self.ndat[k][exp] if exp.replace('_','\_') not in labels: labels.append(exp.replace('_','\_')) cnt+=1 for k in self.fits: name=self.fit_labels[k] chi2_hist[name] = self.chi2_total_rep[k]/self.ndat_total[k] # Chi2 bar plot per experiment df = pd.DataFrame.from_dict(chi2_bar,orient='index',columns=labels) new_df = df.T ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5)) py.plot(np.linspace(-1,nexp*10,2),np.ones(2),'k--',alpha=0.7) py.xticks(rotation=90) py.tick_params(axis='y',direction='in',labelsize=15) py.ylabel(r'$\chi^2 {\rm value}$',fontsize=11) py.ylim(0,4) py.legend(loc=2,frameon=False,prop={'size':11}) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/Chi2_Bar.pdf') # Chi2 distribution py.figure(figsize=(7,5)) ax = py.subplot(111) colors = py.rcParams['axes.prop_cycle'].by_key()['color'] i=0 for name in chi2_hist: ax.hist(chi2_hist[name],bins='fd',density=True,color=colors[i],edgecolor='k',alpha=0.3,label=name) i+=1 py.tick_params(axis='x',direction='in',labelsize=15) py.tick_params(axis='y',direction='in',labelsize=15) py.xlabel(r'$\chi^2 {\rm value}$',fontsize=11) py.legend(loc=2,frameon=False,prop={'size':11}) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/Chi2_Hist.pdf') # Combine plots in tex file # ================ Coefficients latex file ===================== L = latex_packages() L.append(r"\begin{document}") self.combine_plots(L, 'chi2_plots', 'Chi2_')
[docs] def plot_coefficients(self): """ Plots central values + 95% CL errors, 95% CL bounds, probability distributions, residuals, residual distribution, and energy reach. Also writes a table displaying values for 68% CL bounds and central value + 95% errors. Takes into account parameter constraints and displays all non-zero parameters. Uses coeff_groups.yaml YAML file to organize parameters in a meaningful way. Note: coefficients that are known to have disjoint probability distributions (i.e. multiple solutions) are manually separated by including the coefficient name in disjointed_list for disjointed_list2 for global and single fit results, respectively. Note: Probability distributions for single parameter fits are included for all coefficients EXCEPT those constrained as a linear combination of two or more parameters (since they have different numbers of posterior samples) """ #Load coefficient yaml card yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml' with open(yaml_file) as f: coeff_config = yaml.safe_load(f) # SMEFiT logo # TODO: fix this logo = py.imread(f'{self.path}/smefit/analyze/logo.png') coeff_list = [] for group in coeff_config.keys(): for c in coeff_config[group]: if c=='OPff': c='Off' if c in ['OW','OB']: continue if np.any(['SNS_c' in k for k in self.fits])\ and np.any([self.config[k]['coefficients'][c]['fixed'] is not False for k in self.fits if c in self.config[k]['coefficients']]): continue if c not in coeff_list and\ np.any([c in self.config[k]['coefficients'] for k in self.fits]) and\ np.any([self.config[k]['coefficients'][c]['fixed'] is not True for k in self.fits if c in self.config[k]['coefficients']]): coeff_list.append(c) npar=len(coeff_list) labels=coeff_list disjointed_list = ['Otap','Obp'] disjointed_list2 = ['Otap','Obp','Opd','OpB','OpW'] if any(['TOP19' in k for k in self.fits]): disjointed_list.append('Otp') # Plot central value + 95% CL errors nrows,ncols=1,1 py.figure(figsize=(nrows*10,ncols*5)) ax=py.subplot(111) # X-axis X=np.array([2*i for i in range(npar)]) # Spacing between fit results val = np.linspace(-0.1*len(self.coeffs),0.1*len(self.coeffs),len(self.coeffs)) i=0 colors = py.rcParams['axes.prop_cycle'].by_key()['color'] lower_bound68={} upper_bound68={} lower_bound95={} upper_bound95={} mean_coeff = {} error68={} error95={} error95_size={} second_err={} energy68={} energy95={} residual68={} residual95={} for k in self.fits: name=self.fit_labels[k]#r'${\rm %s}$'%k.replace('_','\_') lower_bound68[k] = np.zeros(npar) upper_bound68[k] = np.zeros(npar) lower_bound95[k] = np.zeros(npar) upper_bound95[k] = np.zeros(npar) error68[name]=np.zeros(npar) error95[name]=np.zeros(npar) error95_size[name]=np.zeros(npar) second_err[name]=np.zeros(npar) lower_bound68[k+'(2)'] = np.zeros(npar) upper_bound68[k+'(2)'] = np.zeros(npar) lower_bound95[k+'(2)'] = np.zeros(npar) upper_bound95[k+'(2)'] = np.zeros(npar) energy68[name]=np.zeros(npar) energy95[name]=np.zeros(npar) residual68[name]=np.zeros(npar) residual95[name]=np.zeros(npar) mean_coeff[k]=np.zeros(npar) mean_coeff[k+'(2)']=np.zeros(npar) cnt=0 for l in coeff_list: if l in self.full_labels[k]: idx=np.where(np.array(self.full_labels[k])==l)[0][0] else: cnt+=1 continue # Compute 95% CL and central value if 'SNS_' in k: if self.config[k]['coefficients'][l]['fixed'] is not False and self.config[k]['coefficients'][l]['fixed'] is not True: if isinstance(self.config[k]['coefficients'][l]['value'],Iterable): mid=0 error=0 error2=0 for j in range(len(self.config[k]['coefficients'][l]['value'])): coeff = self.config[k]['coefficients'][l]['fixed'][j] coeff_idx = np.where(np.array(self.labels[k])==coeff)[0][0] temp = np.array(self.coeffs[k][coeff_idx]) low = np.nanpercentile(temp,16) high = np.nanpercentile(temp,84) low2 = np.nanpercentile(temp,2.5) high2 = np.nanpercentile(temp,97.5) mid += (np.mean(temp,axis=0)*self.config[k]['coefficients'][l]['value'][j])/len(self.config[k]['coefficients'][l]['value']) error += ((high-low)/2.)**2 error2 += ((high2-low2)/2.)**2 error = np.sqrt(error) error2 = np.sqrt(error2) else: coeff = self.config[k]['coefficients'][l]['fixed'] coeff_idx = np.where(np.array(self.labels[k])==coeff)[0][0] temp = np.array(self.coeffs[k][coeff_idx])*self.config[k]['coefficients'][l]['value'] low = np.nanpercentile(temp,16) high = np.nanpercentile(temp,84) low2 = np.nanpercentile(temp,2.5) high2 = np.nanpercentile(temp,97.5) mid = np.mean(temp,axis=0) error = (high-low)/2. error2 = (high2-low2)/2. else: if '_HO' in k and l in disjointed_list2: idx2 = np.where(np.array(self.labels[k])==l)[0][0] min_val = min(self.coeffs[k][idx2]) max_val = max(self.coeffs[k][idx2]) mid = (max_val+min_val)/2. full_solution = np.array(self.coeffs[k][idx2]) if l=='Obp' or l=='Opd': solution1 = full_solution[full_solution>mid] solution2 = full_solution[full_solution<mid] else: solution1 = full_solution[full_solution<mid] solution2 = full_solution[full_solution>mid] # First solution low = np.nanpercentile(solution1,16) high = np.nanpercentile(solution1,84) low2 = np.nanpercentile(solution1,2.5) high2 = np.nanpercentile(solution1,97.5) mid = np.mean(solution1,axis=0) error = (high-low)/2. error2 = (high2-low2)/2. # Second solution low_2 = np.nanpercentile(solution2,16) high_2 = np.nanpercentile(solution2,84) low2_2 = np.nanpercentile(solution2,2.5) high2_2 = np.nanpercentile(solution2,97.5) mid2 = np.mean(solution2,axis=0) error+=(high_2-low_2)/2. error2+=(high2_2-low2_2)/2. second_err[name][cnt] = (high2_2-low2_2)/2. lower_bound68[k+'(2)'][cnt] = low_2 upper_bound68[k+'(2)'][cnt] = high_2 lower_bound95[k+'(2)'][cnt] = low2_2 upper_bound95[k+'(2)'][cnt] = high2_2 mean_coeff[k+'(2)'][cnt] = mid2 else: idx2 = np.where(np.array(self.labels[k])==l)[0][0] low = np.nanpercentile(np.array(self.coeffs[k][idx2]),16) high = np.nanpercentile(np.array(self.coeffs[k][idx2]),84) low2 = np.nanpercentile(np.array(self.coeffs[k][idx2]),2.5) high2 = np.nanpercentile(np.array(self.coeffs[k][idx2]),97.5) mid = np.mean(np.array(self.coeffs[k][idx2]),axis=0) error = (high-low)/2. error2 = (high2-low2)/2. else: if '_HO' in k and l in disjointed_list: min_val = min(np.transpose(np.array(self.full_coeffs[k]))[idx]) max_val = max(np.transpose(np.array(self.full_coeffs[k]))[idx]) mid = (max_val+min_val)/2. full_solution = np.transpose(np.array(self.full_coeffs[k]))[idx] if l=='Obp' or l=='Opd': solution1 = full_solution[full_solution>mid] solution2 = full_solution[full_solution<mid] else: solution1 = full_solution[full_solution<mid] solution2 = full_solution[full_solution>mid] # First solution low = np.nanpercentile(solution1,16) high = np.nanpercentile(solution1,84) low2 = np.nanpercentile(solution1,2.5) high2 = np.nanpercentile(solution1,97.5) mid = np.mean(solution1,axis=0) error = (high-low)/2. error2 = (high2-low2)/2. # Second solution low_2 = np.nanpercentile(solution2,16) high_2 = np.nanpercentile(solution2,84) low2_2 = np.nanpercentile(solution2,2.5) high2_2 = np.nanpercentile(solution2,97.5) mid2 = np.mean(solution2,axis=0) error+=(high_2-low_2)/2. error2+=(high2_2-low2_2)/2. second_err[name][cnt]=(high2_2-low2_2)/2. lower_bound68[k+'(2)'][cnt] = low_2 upper_bound68[k+'(2)'][cnt] = high_2 lower_bound95[k+'(2)'][cnt] = low2_2 upper_bound95[k+'(2)'][cnt] = high2_2 mean_coeff[k+'(2)'][cnt] = mid2 else: low = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],16) high = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],84) low2 = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],2.5) high2 = np.nanpercentile(np.transpose(np.array(self.full_coeffs[k]))[idx],97.5) mid = np.mean(np.transpose(np.array(self.full_coeffs[k]))[idx],axis=0) error = (high-low)/2. error2 = (high2-low2)/2. lower_bound68[k][cnt] = low upper_bound68[k][cnt] = high lower_bound95[k][cnt] = low2 upper_bound95[k][cnt] = high2 error68[name][cnt]=error error95[name][cnt]=error2 error95_size[name][cnt] = np.abs(high2-low2) energy68[name][cnt]=1./np.sqrt(error) energy95[name][cnt]=1./np.sqrt(error2) residual68[name][cnt] = mid/error68[name][cnt] residual95[name][cnt] = mid/error95[name][cnt] mean_coeff[k][cnt] = mid err = np.array([[mid-low2,high2-mid]]).T # Plot central value + 95% CL asymmetric if cnt==0: ax.errorbar(X[cnt]+val[i],y=mid,yerr=err,color=colors[i],fmt='.',elinewidth=2,label=name) if '_HO' in k and (l in disjointed_list or l in disjointed_list2) and second_err[name][cnt]!=0: s_err = np.array([[mid2-lower_bound95[k+'(2)'][cnt], upper_bound95[k+'(2)'][cnt]-mid2]]).T ax.errorbar(X[cnt]+val[i]+0.5,y=mid2,yerr=s_err,color=colors[i],\ fmt='.',elinewidth=2) elif cnt == 6 and 'HIGGS' in k: ax.errorbar(X[cnt]+val[i],y=mid,yerr=err,color=colors[i],fmt='.',elinewidth=2,label=name) if '_HO' in k and (l in disjointed_list or l in disjointed_list2) and second_err[name][cnt]!=0: s_err = np.array([[mid2-lower_bound95[k+'(2)'][cnt], upper_bound95[k+'(2)'][cnt]-mid2]]).T ax.errorbar(X[cnt]+val[i]+0.5,y=mid2,yerr=s_err,color=colors[i],\ fmt='.',elinewidth=2) else: ax.errorbar(X[cnt]+val[i],y=mid,yerr=err,color=colors[i],fmt='.',elinewidth=2) if '_HO' in k and (l in disjointed_list or l in disjointed_list2) and second_err[name][cnt]!=0: s_err = np.array([[mid2-lower_bound95[k+'(2)'][cnt], upper_bound95[k+'(2)'][cnt]-mid2]]).T ax.errorbar(X[cnt]+val[i]+0.5,y=mid2,yerr=s_err,color=colors[i],fmt='.',elinewidth=2) cnt+=1 i+=1 ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.775,.975,.05,.2],zorder=-1, alpha=1) py.plot([i for i in range(-1,200)],np.zeros(201),'k--',alpha=0.7) py.xlim(-1,(npar)*2-1) py.yscale('symlog',linthreshy=1e-1) py.ylim(-400,400) py.yticks([-100,-10,-1,-0.1,0,0.1,1,10,100],[r'$-100$',r'$-10$',r'$-1$',r'$-0.1$',r'$0$',r'$0.1$',r'$1$',r'$10$',r'$100$']) py.ylabel(r'$c_i/\Lambda^2\ ({\rm TeV}^{-2})$',fontsize=18) py.tick_params(which='major',direction='in',labelsize=13) my_xticks = [c.replace('O','c') for c in coeff_list] py.xticks(X,my_xticks,rotation=90) py.legend(loc=0,frameon=False,prop={'size':11}) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Central.pdf', dpi=1000) # Plot 95% CLs for coefficients (bar plot) nrows,ncols=1,1 py.figure(figsize=(10,7)) df = pd.DataFrame.from_dict(error95_size,orient='index',columns=[c.replace('O','c') for c in coeff_list]) new_df = df.T ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5)) ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.775,.975,.825,.975],zorder=-1, alpha=1) #Hard cutoff py.plot(np.linspace(-1,2*npar+1,2),400*np.ones(2),'k--',alpha=0.7,lw=2) py.xticks(rotation=90) py.tick_params(axis='y',direction='in',labelsize=15) py.yscale('log') py.ylabel(r'${\rm Magnitude\ of\ 95\%\ Confidence\ Level\ Bounds}\ (1/{\rm TeV}^2)$',fontsize=11) py.ylim(1e-3,5e3) py.legend(loc=2,frameon=False,prop={'size':11}) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Bar.pdf', dpi=1000) # Plot posteriors (histograms) if any(['GLOBAL_' in k for k in self.fits]): gs=int(np.sqrt(npar)) else: gs=int(np.sqrt(npar))+1 nrows,ncols=gs,gs fig = py.figure(figsize=(nrows*4,ncols*3)) redundant_coeffs = ['Opl2','Opl3','O3pl2','O3pl3','Opmu','Opta','Opdi'] heights={} total_cnt=[] clr_cnt=0 print_labels=True for k in self.fits: heights[k]={} if any(['GLOBAL_' in k for k in self.fits]): cnt=8 else: cnt=1 for l in coeff_list: if l in redundant_coeffs: continue if l in self.full_labels[k]: idx=np.where(np.array(self.full_labels[k])==l)[0][0] else: cnt+=1 continue ax = py.subplot(ncols,nrows,cnt) if 'SNS_' in k: if self.config[k]['coefficients'][l]['fixed'] is not False and self.config[k]['coefficients'][l]['fixed'] is not True: if isinstance(self.config[k]['coefficients'][l]['value'],Iterable): # can't plot them here since the number of replicas is not the same for the 2 different dofs cnt+=1 continue else: coeff = self.config[k]['coefficients'][l]['fixed'] coeff_idx = np.where(np.array(self.labels[k])==coeff)[0][0] values = np.array(self.coeffs[k][coeff_idx])*self.config[k]['coefficients'][l]['value'] else: if '_HO' in k and l in disjointed_list2: idx2 = np.where(np.array(self.labels[k])==l)[0][0] min_val = min(self.coeffs[k][idx2]) max_val = max(self.coeffs[k][idx2]) mid = (max_val+min_val)/2. full_solution = np.array(self.coeffs[k][idx2]) if l=='Obp' or l=='Opd': solution1 = full_solution[full_solution>mid] solution2 = full_solution[full_solution<mid] else: solution1 = full_solution[full_solution<mid] solution2 = full_solution[full_solution>mid] values=solution1 ax.hist(solution2,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3) else: idx2 = np.where(np.array(self.labels[k])==l)[0][0] values = np.array(self.coeffs[k][idx2]) ax.hist(values,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3) lbl = r'${\rm {\bf '+l.replace('O','c')+'}}$' if print_labels and l not in ['Opl1','O3pl1','Ope','Opui']: ax.text(0.05,0.85,lbl,transform=ax.transAxes,fontsize=20) elif print_labels and l=='Opl1': ax.text(0.05,0.85,r'${\rm \bf{cpl1}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{cpl2}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.55,r'${\rm \bf{cpl3}}$',transform=ax.transAxes,fontsize=20) elif print_labels and l=='O3pl1': ax.text(0.05,0.85,r'${\rm \bf{c3pl1}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{c3pl2}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.55,r'${\rm \bf{c3pl3}}$',transform=ax.transAxes,fontsize=20) elif print_labels and l=='Ope': ax.text(0.05,0.85,r'${\rm \bf{cpe}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{cpmu}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.55,r'${\rm \bf{cpta}}$',transform=ax.transAxes,fontsize=20) elif print_labels and l=='Opui': ax.text(0.05,0.85,r'${\rm \bf{cpui}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{-2cpdi}}$',transform=ax.transAxes,fontsize=20) else: if '_HO' in k and l in disjointed_list: min_val = min(np.transpose(np.array(self.full_coeffs[k]))[idx]) max_val = max(np.transpose(np.array(self.full_coeffs[k]))[idx]) mid = (max_val+min_val)/2. full_solution = np.transpose(np.array(self.full_coeffs[k]))[idx] if l=='Obp' or l=='Opd': solution1 = full_solution[full_solution>mid] solution2 = full_solution[full_solution<mid] else: solution1 = full_solution[full_solution<mid] solution2 = full_solution[full_solution>mid] heights[k][cnt]=ax.hist(solution1,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3) ax.hist(solution2,bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3) else: heights[k][cnt]=ax.hist(np.transpose(self.full_coeffs[k])[idx],bins='fd',density=True,color=colors[clr_cnt],edgecolor='black',alpha=0.3,\ label=self.fit_labels[k]) lbl = r'${\rm {\bf '+l.replace('O','c')+'}}$' if print_labels and l not in ['Opl1','O3pl1','Ope','Opui']: ax.text(0.05,0.85,lbl,transform=ax.transAxes,fontsize=20) elif print_labels and l=='Opl1': ax.text(0.05,0.85,r'${\rm \bf{cpl1}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{cpl2}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.55,r'${\rm \bf{cpl3}}$',transform=ax.transAxes,fontsize=20) elif print_labels and l=='O3pl1': ax.text(0.05,0.85,r'${\rm \bf{c3pl1}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{c3pl2}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.55,r'${\rm \bf{c3pl3}}$',transform=ax.transAxes,fontsize=20) elif print_labels and l=='Ope': ax.text(0.05,0.85,r'${\rm \bf{cpe}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{cpmu}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.55,r'${\rm \bf{cpta}}$',transform=ax.transAxes,fontsize=20) elif print_labels and l=='Opui': ax.text(0.05,0.85,r'${\rm \bf{cpui}}$',transform=ax.transAxes,fontsize=20) ax.text(0.05,0.7,r'${\rm \bf{-2cpdi}}$',transform=ax.transAxes,fontsize=20) ax.tick_params(which='both',direction='in',labelsize=22.5) ax.tick_params(labelleft=False) if cnt not in total_cnt: total_cnt.append(cnt) cnt+=1 clr_cnt+=1 print_labels = False for cnt in total_cnt: ax = py.subplot(ncols,nrows,cnt) if len(self.fits)>1: fit1=self.fits[0] fit2=self.fits[1] if cnt in heights[fit1] and cnt in heights[fit2]: max_height = max(max(heights[fit1][cnt][0]),max(heights[fit2][cnt][0])) elif cnt in heights[fit1]: max_height = max(heights[fit1][cnt][0]) elif cnt in heights[fit2]: max_height = max(heights[fit2][cnt][0]) else: max_height=0 else: #max_height = max(heights[self.fits[0]][cnt][0]) max_height=0 if max_height!=0: ax.set_ylim(0,1.2*max_height) lines, labels = fig.axes[-2].get_legend_handles_labels() if any(['GLOBAL_' in k for k in self.fits]): fig.legend(lines, labels, loc = 'upper center',ncol=2,prop={'size':35},bbox_to_anchor=(0.5,0.92)) else: fig.legend(lines, labels, loc = 'upper center',ncol=2,prop={'size':35},bbox_to_anchor=(0.5,0.05)) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Hist.pdf') # Plot residuals at 68% CL (bar plot) nrows,ncols=1,1 py.figure(figsize=(30,20)) df = pd.DataFrame.from_dict(residual68,orient='index',columns=[c.replace('O','c') for c in coeff_list]) new_df = df.T ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5)) ax.plot([-1,npar+1],np.zeros(2),'k--',lw=2) ax.plot([-1,npar+1],np.ones(2),'k--',lw=2,alpha=0.3) ax.plot([-1,npar+1],-1.*np.ones(2),'k--',lw=2,alpha=0.3) ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.775,.975,.825,.975],zorder=-1,alpha=1) py.xticks(rotation=90) py.tick_params(axis='y',direction='in',labelsize=15) py.ylabel(r'${\rm Residuals\ (68\%)}$',fontsize=15) py.ylim(-4,4) py.legend(loc=2,frameon=False,prop={'size':11}) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Residuals.pdf', dpi=1000 ) # Plot residuals at 68% CL (histogram) nrows,ncols=1,1 py.figure(figsize=(10,7)) ax = py.subplot(111) i=0 for name in residual68: ax.hist(residual68[name],bins='fd',density=True,color=colors[i],edgecolor='k',alpha=0.3,label=name) i+=1 ax.imshow(logo, aspect='auto',transform=ax.transAxes,extent=[.675,.975,.825,.975],zorder=-1, alpha=1) py.xticks(rotation=90) py.tick_params(which='both',direction='in',labelsize=15) py.ylabel(r'${\rm Normalized\ Yield}$',fontsize=15) py.xlabel(r'${\rm Residuals\ (68\%)}$',fontsize=15) py.ylim(0,1) py.xlim(-3,3) py.legend(loc=2,frameon=False,prop={'size':11}) py.tight_layout() py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Residuals_Hist.pdf', dpi=1000) # Plot 95% CL for energy reach (bar plot) # nrows,ncols=1,1 # py.figure(figsize=(7,5)) # df = pd.DataFrame.from_dict(energy95,orient='index',columns=coeff_list) # new_df = df.T # ax = new_df.plot(kind='bar',rot=0,width=0.7,figsize=(10,5)) # py.xticks(rotation=90) # py.tick_params(axis='y',direction='in',labelsize=15) # py.ylabel(r'$\Lambda/\sqrt{c_i}\ ({\rm TeV})\ {\rm at\ the\ 95\%\ CL}$',fontsize=11) # py.ylim(0,5) # py.legend(loc=2,frameon=False,prop={'size':11}) # py.tight_layout() # py.savefig(f'{self.path}/reports/{self.outputname}/Coeffs_Energy.pdf') # Combine plots in tex file # ================ Coefficients latex file ===================== L = latex_packages() L.append(r"\begin{document}") # Write coefficients table L.append('\n') L.append('\n') L.append(r"\begin{table}[H]") L.append(r"\centering") temp = r"\begin{tabularx}{\textwidth}{|c|c|" for i in range(len(self.fits)): temp += "X|X|" temp+="}" L.append(temp) L.append(r"\hline") temp=' & & ' for i,k in enumerate(self.fits): if i != len(self.fits)-1: temp += r" \multicolumn{2}{c|}{%s} & "%self.fit_labels[k]#k.replace('_','\_') else: temp += r" \multicolumn{2}{c|}{%s} \\ \hline"%self.fit_labels[k]#k.replace('_','\_') L.append(temp) temp = r'Class & Coefficients &' for i,k in enumerate(self.fits): if i!= len(self.fits)-1: temp += r" 68\% CL Bounds & 95\% CL Bounds &" else: temp += r" 68\% CL Bounds & 95\% CL Bounds \\ \hline" L.append(temp) #Load coefficient yaml card yaml_file = f'{self.path}/smefit/analyze/coeff_groups.yaml' with open(yaml_file) as f: coeff_config = yaml.safe_load(f) new_coeff_list = np.array([c.replace('O','c') for c in coeff_list]) # Build lists for grouped coefficients coeffs_organized = {} for group in coeff_config.keys(): coeffs_organized[group]=[] for c in coeff_config[group]: if c=='OPff': c='Off' if c not in coeffs_organized[group] and c.replace('O','c') in new_coeff_list: coeffs_organized[group].append(c.replace('O','c')) for group in coeffs_organized.keys(): if len(coeffs_organized[group])==0: continue L.append('\multirow{%d}{*}{%s}'%(len(coeffs_organized[group]),group)) for isub,coeff in enumerate(coeffs_organized[group]): temp='' temp+=' & '+coeff+" & " for i,k in enumerate(self.fits): if i != len(self.fits)-1: if coeff in new_coeff_list: idx = np.where(new_coeff_list==coeff)[0][0] error = (upper_bound95[k][idx]-lower_bound95[k][idx])/2. if 'O'+coeff[1:] in disjointed_list or 'O'+coeff[1:] in disjointed_list2 and lower_bound68[k+'(2)'][idx]!=0: error2 = (upper_bound95[k+'(2)'][idx]-lower_bound95[k+'(2)'][idx])/2. temp += r"[%0.3f,%0.3f]\newline [%0.3f,%0.3f] & %0.3f $\pm$ %0.3f\newline %0.3f $\pm$ %0.3f] & "%(lower_bound68[k][idx],\ upper_bound68[k][idx],\ lower_bound68[k+'(2)'][idx],\ upper_bound68[k+'(2)'][idx],\ mean_coeff[k][idx],\ error,\ mean_coeff[k+'(2)'][idx],\ error2) else: temp += r"[%0.3f,%0.3f] & %0.3f $\pm$ %0.3f & "%(lower_bound68[k][idx],upper_bound68[k][idx],\ mean_coeff[k][idx],error) else: temp += r" & & " elif i == len(self.fits)-1 and isub!=len(coeffs_organized[group])-1: if coeff in new_coeff_list: idx = np.where(new_coeff_list==coeff)[0][0] error = (upper_bound95[k][idx]-lower_bound95[k][idx])/2. if 'O'+coeff[1:] in disjointed_list or 'O'+coeff[1:] in disjointed_list2 and lower_bound68[k+'(2)'][idx]!=0: error2 = (upper_bound95[k+'(2)'][idx]-lower_bound95[k+'(2)'][idx])/2. temp += r"[%0.3f,%0.3f]\newline [%0.3f,%0.3f] & %0.3f $\pm$ %0.3f\newline %0.3f $\pm$ %0.3f\\ \cline{2-%d}"%(lower_bound68[k][idx],\ upper_bound68[k][idx],\ lower_bound68[k+'(2)'][idx],\ upper_bound68[k+'(2)'][idx],\ mean_coeff[k][idx],\ error,\ mean_coeff[k+'(2)'][idx],\ error2,\ 2+2*len(self.fits)) else: temp += r"[%0.3f,%0.3f] & %0.3f $\pm$ %0.3f \\ \cline{2-%d}"%(lower_bound68[k][idx],upper_bound68[k][idx],\ mean_coeff[k][idx],error,2+2*len(self.fits)) else: temp += r" & \\ \cline{2-%d}"%(2+2*len(self.fits)) elif i==len(self.fits)-1 and isub==len(coeffs_organized[group])-1: if coeff in new_coeff_list: idx = np.where(new_coeff_list==coeff)[0][0] error = (upper_bound95[k][idx]-lower_bound95[k][idx])/2. if 'O'+coeff[1:] in disjointed_list or 'O'+coeff[1:] in disjointed_list2 and lower_bound68[k+'(2)'][idx]!=0: error2 = (upper_bound95[k+'(2)'][idx]-lower_bound95[k+'(2)'][idx])/2. temp += r"[%0.3f,%0.3f]\newline [%0.3f,%0.3f] & %0.3f $\pm$ %0.3f\newline %0.3f $\pm$ %0.3f\\ \hline"%(lower_bound68[k][idx],\ upper_bound68[k][idx],\ lower_bound68[k+'(2)'][idx],\ upper_bound68[k+'(2)'][idx],\ mean_coeff[k][idx],\ error,\ mean_coeff[k+'(2)'][idx],\ error2) else: temp += r"[%0.3f,%0.3f] & %0.3f $\pm$ %0.3f \\ \hline"%(lower_bound68[k][idx],upper_bound68[k][idx],\ mean_coeff[k][idx],error) else: temp += r" & \\ \hline" L.append(temp) L.append(r"\end{tabularx}") L.append(r"\caption{Coefficient comparison.}") L.append(r"\end{table}") L.append('\n') L.append('\n') self.combine_plots(L, 'coefficient_plots', 'Coeffs_')
[docs] def plot_ellipse(self): """ Plot the 2 standard deviation ellipse of two parameters (used for 2 parameter fits) """ mu={} lbls=[] hndls=[] p1={} p2={} coeff_min={} coeff_max={} _, ax = py.subplots(figsize=(6, 6)) colors = py.rcParams['axes.prop_cycle'].by_key()['color'] i=0 for k in self.fits: if k not in mu: mu[k]=[] if '-NS_c' in k: coeff_names = k.split('_')[1] else: coeff_names = self.fits[-2].split('_')[1] coeff_names = coeff_names.split('c')[1:] coeff1 = 'O'+coeff_names[0] coeff2 = 'O'+coeff_names[1] if coeff1 in self.full_labels[k]: if 'SNS_' in k: idx1=np.where(np.array(self.labels[k])==coeff1)[0][0] coeff1_values = np.array(self.coeffs[k][idx1]) else: idx1=np.where(np.array(self.full_labels[k])==coeff1)[0][0] coeff1_values = np.array(self.full_coeffs[k]).T[idx1] mu[k].append(np.mean(coeff1_values,axis=0)) if coeff2 in self.full_labels[k]: idx2=np.where(np.array(self.full_labels[k])==coeff2)[0][0] if 'SNS_' in k: idx2=np.where(np.array(self.labels[k])==coeff2)[0][0] coeff2_values = np.array(self.coeffs[k][idx2]) else: idx2=np.where(np.array(self.full_labels[k])==coeff2)[0][0] coeff2_values = np.array(self.full_coeffs[k]).T[idx2] mu[k].append(np.mean(coeff2_values,axis=0)) else: coeff_min[k] = np.nanpercentile(coeff1_values,2.5) coeff_max[k] = np.nanpercentile(coeff1_values,97.5) vertical=True if coeff1 not in self.full_labels[k]: coeff_min[k] = np.nanpercentile(coeff2_values,2.5) coeff_max[k] = np.nanpercentile(coeff2_values,97.5) horizontal=True if 'SNS_' in k: if vertical: p2[k]=ax.axvspan(coeff_min[k],coeff_max[k],alpha=0.3,facecolor=colors[i],edgecolor=None) p1[k]=ax.axvspan(coeff_min[k],coeff_max[k],facecolor=(0,0,0,0),edgecolor=colors[i]) vertical=False elif horizontal: p2[k]=ax.axhspan(coeff_min[k],coeff_max[k],alpha=0.3,facecolor=colors[i],edgecolor=None) p1[k]=ax.axhspan(coeff_min[k],coeff_max[k],facecolor=(0,0,0,0),edgecolor=colors[i]) horizontal=False else: p2[k]=confidence_ellipse(coeff1_values, coeff2_values, ax,n_std=2,alpha=0.3, facecolor=colors[i], edgecolor=None) p1[k]=confidence_ellipse(coeff1_values, coeff2_values, ax,n_std=2,alpha=1, edgecolor=colors[i]) ax.scatter(mu[k][0], mu[k][1], c=colors[i], s=3) py.xlabel(r'${\rm '+coeff1.replace('O','c')+'}$',fontsize=20) py.ylabel(r'${\rm '+coeff2.replace('O','c')+'}$',fontsize=20) py.tick_params(which='both',direction='in',labelsize=15) if (coeff1.replace('O','c') in k) and ("GLOBAL" in k): lbls.append(r'${\rm All\ Data\ (2D)}$') elif (coeff1.replace('O','c') not in k) and "GLOBAL" in k: lbls.append(r'${\rm All\ Data\ (Marg)}$') else: lbls.append(r'${\rm '+k.split('_')[2]+'}$') hndls.append((p1[k],p2[k])) i+=1 ax.scatter(0, 0, c='k',marker='s',s=10,zorder=10) py.legend(loc='best',handles=hndls,labels=lbls,frameon=False,prop={'size':18}) py.title(r'${\rm 95\%\ Confidence\ Level\ Bounds}$',fontsize=15) py.tight_layout() py.savefig(f'{self.path}/reports/{(self.outputname)}/Ellipse_{coeff1}_{coeff2}.pdf')
[docs] def plot_correlations(self): """ Computes and displays the correlation coefficients between parameters in a heat map """ for k in self.fits: coeff_list = self.full_labels[k] param_data = pd.DataFrame(self.full_coeffs[k]) correlations = param_data.corr() rows_to_keep = [] for i in range(len(correlations)): for j in range(len(correlations[i])): if (coeff_list[i]!='OpWB' and coeff_list[i]!='OpD')\ and self.config[k]['coefficients'][coeff_list[i]]['fixed'] is not False: continue if coeff_list[i]=='OW' or coeff_list[i]=='OB': continue if i!=j and (correlations[i][j]>0.5 or correlations[i][j]<-0.5): if i not in rows_to_keep: rows_to_keep.append(i) correlations = np.array(correlations)[np.array(rows_to_keep),:] correlations = np.array(correlations)[:,np.array(rows_to_keep)] labels = np.array(self.full_labels[k])[np.array(rows_to_keep)] npar=len(labels) fig = py.figure(figsize=(10,10)) ax = fig.add_subplot(111) colors = py.rcParams['axes.prop_cycle'].by_key()['color'] cmap = matplotlib.colors.ListedColormap([colors[0], 'lightgrey', colors[1]]) bounds = [-1.0, -0.5, 0.5, 1.0] norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N) cax = ax.matshow(correlations, vmin=-1, vmax=1, cmap=cmap, norm=norm) divider = make_axes_locatable(ax) cax1 = divider.append_axes("right", size="5%", pad=0.1) fig.colorbar(cax, cax=cax1) for i in range(npar): for j in range(npar): c = correlations[j,i] if (c>0.5 or c<-0.5): ax.text(i, j, str(np.round(c,1)), va='center', ha='center',fontsize=10) #ax.text(i, j, str(np.round(c,1)), va='center', ha='center',fontsize=10) names = [l.replace('O','c') for l in labels] ticks = np.arange(0,npar,1) ax.set_xticks(ticks) ax.set_yticks(ticks) ax.set_xticklabels(names) ax.set_yticklabels(names) ax.tick_params(axis='x',rotation=90,labelsize=15) ax.tick_params(axis='y',labelsize=15) py.tight_layout() py.savefig('reports/%s/Coeffs_Corr_%s.pdf'%(self.outputname,k)) # Combine plots in tex file # ================ Correlations latex file ===================== L = latex_packages() L.append(r"\begin{document}") self.combine_plots(L, 'correlation_plots', 'Coeffs_Corr_', caption=True )
[docs] def plot_data_vs_theory(self): """ Plots data vs theory. Datasets to be plotted should be written in the plot_settings.yaml YAML file. There you should also specify the various plot settings. Note: special treatment is given to plots 14 and 16 (W helicity data and combined ATLAS+CMS Run I signal strength data, respectively) to get them to display properly. If these are moved in plot_settings, they must be moved here as well. Note: In some cases (e.g. fits with mass cuts) this function throws an error that has yet to be resolved. """ mean_theory_exp={} std_theory_exp={} for k in self.fits: datalines={} systypes={} smtheory={} mean_theory_exp[k]={} std_theory_exp[k]={} # Read data and SM theory files data_list=[] for dataset in self.config[k]['datasets']: if dataset in data_list: continue datalines[dataset], systypes[dataset] = self.read_dataset(dataset) data_list.append(dataset) # Construct theory arrays per experiment cnt=0 for i in range(len(self.dataset[k].ExpNames)): set_name=self.dataset[k].ExpNames[i] ndat=self.dataset[k].NdataExp[i] mean_theory_exp[k][set_name] = self.mean_theory[k][cnt:cnt+ndat] std_theory_exp[k][set_name] = self.std_theory[k][cnt:cnt+ndat] if set_name not in smtheory: smtheory[set_name]=self.sm_theory[k][cnt:cnt+ndat] cnt+=ndat # Load plots YAML file yaml_file = f'{self.path}/smefit/analyze/plot_settings.yaml' with open(yaml_file) as f: plots = yaml.safe_load(f) for p in plots.keys(): # Skip datasets that were not included in fit if np.any([dataset not in data_list for dataset in plots[p]['datasets']]): continue # Set figure sizes if 'figsize' in plots[p]: py.figure(figsize=(plots[p]['figsize'][0],plots[p]['figsize'][1])) else: py.figure(figsize=(4,3)) xcnt=0 for dataname in plots[p]['datasets']: # Gather data from data tables x_vals = [] central_data = [] stat_error = [] sys_errors = [] for line in datalines[dataname]: ncols = len(line.split()) if ncols==3: ndata = int(line.split()[-1]) if ncols>3: x_vals.append(float(line.split()[2])) central_data.append(float(line.split()[5])) stat_error.append(float(line.split()[6])) sys_errors.append([float(line.split()[i]) for i in range(7,ncols)]) add_corr_index=[] mult_corr_index=[] add_uncorr_index=[] mult_uncorr_index=[] for i in range(1,len(systypes[dataname])): if systypes[dataname][i].split()[1]=='ADD' and systypes[dataname][i].split()[-1]=='UNCORR': add_uncorr_index.append(int(systypes[dataname][i].split()[0])) elif systypes[dataname][i].split()[1]=='MULT' and systypes[dataname][i].split()[-1]=='UNCORR': mult_uncorr_index.append(int(systypes[dataname][i].split()[0])) elif systypes[dataname][i].split()[1]=='ADD' and systypes[dataname][i].split()[-1]!='UNCORR': add_corr_index.append(int(systypes[dataname][i].split()[0])) elif systypes[dataname][i].split()[1]=='MULT' and systypes[dataname][i].split()[-1]!='UNCORR': mult_corr_index.append(int(systypes[dataname][i].split()[0])) add_uncorr_index = 2*np.array(add_uncorr_index)-2 mult_uncorr_index = 2*np.array(mult_uncorr_index)-1 add_corr_index = 2*np.array(add_corr_index)-2 mult_corr_index = 2*np.array(mult_corr_index)-1 corr_index = np.concatenate([add_corr_index,mult_corr_index],0) # corr_index_sorted = np.argsort(corr_index) shift_data=True # Compute total uncorrelated uncertainty total_uncorr=[] total_error=[] for i in range(ndata): stat_err = 0. sys_err = 0. corr_sys_err = 0. stat_err += stat_error[i]**2 if len(add_uncorr_index)>0: add_uncorr_error = np.array(sys_errors)[i][add_uncorr_index] sys_err += np.sum(add_uncorr_error**2) if len(mult_uncorr_index)>0: mult_uncorr_error = np.array(sys_errors)[i][mult_uncorr_index]*np.array(central_data)[i]/100. sys_err += np.sum(mult_uncorr_error**2) if len(mult_corr_index)>0: mult_corr_error = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100. corr_sys_err+=np.sum(mult_corr_error**2) if len(add_corr_index)>0: add_corr_error = np.array(sys_errors)[i][add_corr_index] corr_sys_err+=np.sum(add_corr_error**2) # If no uncorrelated errors are specified, # do not shift data if (stat_err+sys_err)==0 and p!='plot16': shift_data = False total_uncorr.append(np.sqrt(stat_err+sys_err)) total_error.append(np.sqrt(stat_err+sys_err+corr_sys_err)) total_uncorr = np.array(total_uncorr) total_error = np.array(total_error) # Gather correlataed uncertainties if available corr_errors=[] for i in range(ndata): if len(mult_corr_index)>0: mult_errors = np.array(sys_errors)[i][mult_corr_index]*np.array(central_data)[i]/100. if len(add_corr_index)>0: add_errors = np.array(sys_errors)[i][add_corr_index] corr_errors.append(np.concatenate([mult_errors,add_errors],0)) corr_errors = np.array(corr_errors) # Last two points in Run I Higgs signal strengths do not have correlated errors # Temporarily removing the last two points to compute shifts if p=='plot16': corr_errors=corr_errors[:-2] total_uncorr_temp = total_uncorr total_uncorr=total_uncorr[:-2] central_data_temp = central_data central_data = central_data[:-2] mean_theory_temp={} for k in self.fits: if dataname not in mean_theory_exp[k]: continue mean_theory_temp[k]=mean_theory_exp[k][dataname] mean_theory_exp[k][dataname]=mean_theory_exp[k][dataname][:-2] # Number of correlated errors if len(corr_errors)>0: ncorr=len(corr_errors[0]) else: ncorr=0 # Compute shift to data from correlated errors if present # In multi-fit comparison, shift is taken to be theory of last fit shift=[] for k in self.fits: if dataname in mean_theory_exp[k]: fit_for_shift = k if shift_data and ncorr>0: A = np.diag(np.diag(np.ones((ncorr,ncorr)))) + np.einsum('ik,il,i->kl',corr_errors,corr_errors,1./(total_uncorr**2)) B = np.einsum('ik,i,i->k',corr_errors,central_data-mean_theory_exp[fit_for_shift][dataname],1./(total_uncorr**2)) try: r=np.einsum('kl,l->k',np.linalg.inv(A),B) except: r=np.zeros(len(corr_errors[0])) shift = np.einsum('k,ik->i',r,corr_errors) # Set up x-values, spacing between data and theory points, # and normalization for each plot if 'xshift1' in plots[p]: xshift1 = plots[p]['xshift1'] if isinstance(xshift1, Iterable): xshift1=np.array(xshift1) if 'xshift2' in plots[p]: xshift2 = plots[p]['xshift2'] if isinstance(xshift2, Iterable): xshift2=np.array(xshift2) if 'normed' in plots[p]: if plots[p]['normed']: norm=smtheory[dataname] else: norm=np.ones(ndata) if 'x_values' in plots[p]: if plots[p]['x_values'] is not None: x_vals = np.array(plots[p]['x_values']) if len(plots[p]['datasets'])==len(x_vals): x_vals = np.array([plots[p]['x_values'][xcnt]]) # Exceptions if p=='plot14': x_vals = np.array(x_vals[::2]) if p=='plot14' and dataname.startswith('CMS'): x_vals = x_vals+0.5 if p=='plot16': # Adding last two points back, last two points have zero shift shift_temp = np.zeros(ndata) for i, elem in enumerate(shift): shift_temp[i]=elem central_data = central_data_temp total_uncorr = total_uncorr_temp for k in self.fits: if dataname not in mean_theory_exp[k]: continue mean_theory_exp[k][dataname]=mean_theory_temp[k] shift=shift_temp # Reset color cycle py.gca().set_prop_cycle(None) # Only need labels for one dataset in multi-dataset plots if xcnt==0: # Plot unshifted data print(dataname) py.errorbar(np.array(x_vals),central_data/norm,\ yerr=total_error/norm,fmt='.',markersize=5,\ label=r'${\rm Unshifted\ Data}$') # Plot shifted data if correlated uncertainties exist if len(corr_index)>0 and len(shift)>0: py.errorbar(np.array(x_vals)+xshift1,(central_data-shift)/norm,\ yerr=total_uncorr/norm,fmt='.',markersize=5,\ label=r'${\rm Shifted\ Data}$') else: # Plot dummy point to keep consistent with colors/legend py.errorbar(10000.,10000.,yerr=0.,fmt='.',markersize=5,label=r'${\rm Shifted\ Data}$') # Plot theory for i,k in enumerate(self.fits): if dataname not in mean_theory_exp[k]: continue if 'NHO_vs_HO' in self.outputname: if 'NHO' in k: temp_label =r'${\rm EFT\ Lin}$' elif 'HO' in k: temp_label =r'${\rm EFT\ Quad}$' else: temp_label = self.fit_labels[k] py.errorbar(np.array(x_vals)+xshift2*(i+1),(mean_theory_exp[k][dataname])/norm,\ yerr=std_theory_exp[k][dataname]/norm,fmt='.',markersize=5,\ label=temp_label) #r'${\rm Fit}%d$'%(i+1)) # Plot without labels else: # Plot unshifted data py.errorbar(np.array(x_vals),central_data/norm,\ yerr=total_error/norm,fmt='.',markersize=5) # Plot shifted data if correlated uncertainties exist if len(corr_index)>0 and len(shift)>0: py.errorbar(np.array(x_vals)+xshift1,(central_data-shift)/norm,\ yerr=total_uncorr/norm,fmt='.',markersize=5) else: # Plot dummy point to keep consistent with colors/legend py.errorbar(10000.,10000.,yerr=0.,fmt='.',markersize=5) # Plot theory for i,k in enumerate(self.fits): if dataname not in mean_theory_exp[k]: continue py.errorbar(np.array(x_vals)+xshift2*(i+1),(mean_theory_exp[k][dataname])/norm,\ yerr=std_theory_exp[k][dataname]/norm,fmt='.',markersize=5) xcnt+=1 # Construct plot limits, labels, and titles if 'plot_ones' in plots[p]: if plots[p]['plot_ones']: py.plot(np.linspace(-10000,10000,2),np.ones(2),'k--',alpha=0.7) if 'xlim' in plots[p]: py.xlim(plots[p]['xlim'][0],plots[p]['xlim'][1]) if 'ylim' in plots[p]: py.ylim(plots[p]['ylim'][0],plots[p]['ylim'][1]) if 'xscale' in plots[p]: py.xscale(plots[p]['xscale']) if 'yscale' in plots[p]: py.yscale(plots[p]['yscale']) if 'xlabel' in plots[p]: if plots[p]['xlabel'] is not None: py.xlabel(r'%s'%plots[p]['xlabel'],fontsize=12) if 'ylabel' in plots[p]: if plots[p]['ylabel'] is not None: py.ylabel(r'%s'%plots[p]['ylabel'],fontsize=10) if 'title' in plots[p]: if plots[p]['title'] is not None: py.title(r'%s'%plots[p]['title'],fontsize=10) if 'legend_loc' in plots[p]: legend_loc = plots[p]['legend_loc'] py.tick_params(which='both',direction='in') if 'x_labels' in plots[p]: if plots[p]['x_labels'] is not None and 'rotate_xlabels' in plots[p]: py.xticks(plots[p]['x_values'],plots[p]['x_labels'],rotation=plots[p]['rotate_xlabels']) elif plots[p]['x_labels'] is not None: py.xticks(plots[p]['x_values'],plots[p]['x_labels']) py.legend(loc=legend_loc, frameon=False, prop={'size':10}) py.tight_layout() if 'output' in plots[p]: if plots[p]['output'] is not None: filename=plots[p]['output'] else: filename=plots[p]['datasets'][0] py.savefig(f'{self.path}/reports/%s/DvT%s_%s.pdf'%(self.outputname,p[4:],filename)) # Combine plots in tex file # ================ Data vs Theory latex file ===================== L = latex_packages() L.append(r"\usepackage{multicol}") L.append(r"\newenvironment{Figure}{\par\medskip\noindent\minipage{\linewidth}}{\endminipage\par\medskip}") L.append(r"\begin{document}") L.append(r"\begin{multicols}{2}") for k in sorted(listdir(f'{self.path}/reports/{self.outputname}')): if k.startswith('DvT') is False: continue L.append(r"\begin{Figure}") L.append(r"\centering") L.append(r"\includegraphics[width=0.99\textwidth]{%s/reports/%s/%s}"%(self.path,self.outputname,k)) L.append(r"\end{Figure}") L.append(r"\end{multicols}") self.run_pdflatex(L, 'dvt_plots') self.move_to_meta('DvT*_*')