0806.1487/ms.tex
1: \documentclass[twocolumn,showpacs,preprintnumbers,amsmath,amssymb,nofootinbib,superscriptaddress]{revtex4}
2: %\documentclass[preprint,showpacs,preprintnumbers,amsmath,amssymb,nofootinbib,superscriptaddress]{revtex4}
3: 
4: % Some other (several out of many) possibilities
5: %\documentclass[preprint,aps]{revtex4}
6: %\documentclass[preprint,aps,draft]{revtex4}
7: %\documentclass[prb]{revtex4}% Physical Review B
8: 
9: \usepackage{graphicx}% Include figure files
10: \usepackage{dcolumn}% Align table columns on decimal point
11: \usepackage{bm}% bold math
12: 
13: \usepackage[usenames]{color} % for adding editing notes to draft
14: 
15: %\nofiles
16: 
17: %%%%%%%%%%%%%%%%%%%   begin local macros %%%%%%%%%%%%%%%%%%%%
18: \makeatletter
19: \newenvironment{tablehere}{\def\@captype{table}}{}
20: \newenvironment{figurehere}{\begin{figure}[tb]\epsscale{1}}{\end{figure}}
21: %%% \newenvironment{figurehere}{\def\@captype{figure}}{\vspace{0.3in}}
22: \makeatother
23: \newcommand{\rad}{r}    % comoving radial distance
24: \newcommand{\da}{d_A}   % comoving angular diameter distance
25: 
26: \def\order{{\cal O}}
27: \def\etal{{\it et al.~}}
28: \def\arcsec{{\prime\prime}}
29: \def\VEV#1{\left\langle #1\right\rangle}
30: \def\abso#1{\mid\! #1\!\mid}
31: \def\bftheta{{\mbox{\boldmath $\theta$}}}
32: \def\bfkappa{{\mbox{\boldmath $\kappa$}}}
33: \def\bfell{{\mbox{\boldmath $\ell$}}}
34: \def\Ref{{\bf REF!!!}}
35: \def\bfalpha{{\mbox{$\vec \alpha$}}}
36: \def\vecx{{\mbox{\boldmath $x$}}}
37: \def\hattheta{{\bf \hat \theta}}
38: 
39: \def\d{\delta}
40: \def\dt{\tilde \delta}
41: \def\dD{\delta_{\rm D}}
42: \def\del{\nabla}
43: \def\knl{k_{n\ell}}
44: \newcommand{\fore}{{\rm f}}
45: 
46: \newcommand{\deld}{\delta^{\rm D}}
47: \newcommand{\tableskip}{\\[-6pt]}
48: \newcommand{\sky}{{\rm sky}}
49: \newcommand{\tot}{{\rm tot}}
50: \newcommand{\noise}{{\rm noise}}
51: \newcommand{\Ylm}[1]{Y_{l_#1}^{m_#1}}
52: \newcommand{\Ylmn}{Y_{l}^{m}}
53: \newcommand{\alm}[1]{a_{l_#1 m_#1}}
54: \newcommand{\almn}{a_{l m}}
55: \newcommand{\Dk}{\frac{d^3{\bf k}}{\left( 2\pi \right) ^3}}
56: \renewcommand{\dbltopfraction}{1.0}
57: \renewcommand{\topfraction}{1.0}
58: \renewcommand{\bottomfraction}{1.0}
59: \renewcommand{\textfraction}{0.10}
60: 
61: 
62: \def\be{\begin{equation}}
63: \def\ee{\end{equation}}
64: \def\bea{\begin{eqnarray}}
65: \def\eea{\end{eqnarray}}
66: \def\sm{{\rm M}_\odot}
67: \def\uline#1{$\underline{\smash{\hbox{#1}}}$}
68: 
69: \def\ang{\,{\rm\AA}}
70: \def\flux{\,{\rm erg\,cm^{-2}\,arcsec^{-2}\,\AA^{-1}\,s^{-1}}}
71: \def\GeV{\,{\rm GeV}}
72: \def\TeV{\,{\rm TeV}}
73: \def\gev{\,{\rm GeV}}
74: \def\keV{\,{\rm keV}}
75: \def\MeV{\,{\rm MeV}}
76: \def\sec{\,{\rm sec}}
77: \def\Gyr{\,{\rm Gyr}}
78: \def\yr{\,{\rm yr}}
79: \def\rcm{\,{\rm cm}}
80: \def\pc{\,{\rm pc}}
81: \def\kpc{\,{\rm kpc}}
82: \def\Mpc{\,{\rm Mpc}}
83: \def\mpc{\,{\rm Mpc}}
84: \def\eV{{\,\rm eV}}
85: \def\ev{{\,\rm eV}}
86: \def\erg{{\,\rm erg}}
87: \def\cmm2{{\,\rm cm^{-2}}}
88: \def\cm2{{\,{\rm cm}^2}}
89: \def\cmm3{{\,{\rm cm}^{-3}}}
90: \def\gcmm3{{\,{\rm g\,cm^{-3}}}}
91: \def\kms{\,{\rm km\,s^{-1}}}
92: \def\HO{{100h\,{\rm km\,sec^{-1}\,Mpc^{-1}}}}
93: \def\HOh{{100\,{\rm km\,sec^{-1}\,Mpc^{-1}}}}
94: \def\mpl{{m_{\rm Pl}}}
95: \def\mpp{{m_{\rm Pl,0}}}
96: \def\trh{T_{\rm RH}}
97: \def\g{\tilde g}
98: \def\R{{\cal R}}
99: \def\zl{z_{\rm LSS}}
100: \def\zeq{z_{\rm EQ}}
101: \def\he{$^4$He}
102: \def\VEV#1{\left\langle #1\right\rangle}
103: \def\fun#1#2{\lower3.6pt\vbox{\baselineskip0pt\lineskip.9pt
104:   \ialign{$\mathsurround=0pt#1\hfil##\hfil$\crcr#2\crcr\sim\crcr}}}
105: \def\TBD{{\bf TBD}}
106: \def\muK{\mu{\rm K}}
107: \def\muk{\mu{\rm K}}
108: \def\Oli#1{\noindent{\bf[$\diamondsuit$ #1]}}
109: 
110: \def\edge{{\it EDGE}}
111: \def\planck{{\it Planck}}
112: \def\GHz{\,{\rm GHz}}
113: \def\C{{\cal C}}
114: \hyphenation{an-isot-ropy an-isot-ropies FIRB}
115: 
116: \newcommand{\half}{\ensuremath{\frac{1}{2}\,}}
117: 
118: \newcommand{\erfc}{\ensuremath{\mbox{Erfc}}}
119: 
120: \newcommand{\figpath}{.}
121: \newcommand{\pspath}{.}
122: \newcommand{\bigfigsize}{1.0}
123: 
124: \newcommand{\py}{\ensuremath{p_y}}
125: \newcommand{\by}{\ensuremath{{\bf y}}}
126: \newcommand{\bphi}{\mbox{\boldmath$\phi$}}
127: \newcommand{\bs}{\ensuremath{{\bf s}}}
128: \newcommand{\br}{\ensuremath{{\bf r}}}
129: \newcommand{\bk}{\ensuremath{{\bf k}}}
130: \newcommand{\beps}{\mbox{\boldmath$\epsilon$}}
131: \newcommand{\bmu}{\mbox{\boldmath$\mu$}}
132: \newcommand{\bD}{\mbox{\boldmath$D$}}
133: \newcommand{\bEta}{\mbox{\boldmath$\eta$}}
134: \newcommand{\blambda}{\mbox{\boldmath$\lambda$}}
135: \newcommand{\brho}{\mbox{\boldmath$\rho$}}
136: \newcommand{\btheta}{\mbox{\boldmath$\theta$}}
137: \newcommand{\atanh}{\ensuremath{{\rm tanh}^{-1}}}
138: 
139: \newcommand{\ident}{\ensuremath{\mathbb{I}}}
140: \newcommand{\kvec}{\ensuremath{\vec{k}}}
141: \newcommand{\lmu}{\lambda_{\mu}}
142: \newcommand{\ld}{\lambda_{D}}
143: \newcommand{\rmu}{\rho_{\mu}}
144: \newcommand{\rd}{\rho_{D}}
145: \newcommand{\lw}{\blambda_w}
146: \newcommand{\lv}{\blambda_v}
147: \newcommand{\lphi}{\blambda_{\varphi}}
148: \newcommand{\rw}{\brho_w}
149: \newcommand{\rv}{\brho_v}
150: \newcommand{\rphi}{\brho_{\varphi}}
151: \newcommand{\evmu}{\vec{\epsilon}_{\mu}}
152: \newcommand{\evd}{\vec{\epsilon}_{D}}
153: \newcommand{\lemu}{\lambda_{\epsilon_{\mu}}}
154: \newcommand{\led}{\lambda_{\epsilon_{D}}}
155: \newcommand{\slope}{s}
156: \newcommand{\bomega}{\mbox{\boldmath$\omega$}}
157: 
158: \newcommand{\phibar}{\bar{\varphi}}
159: \newcommand{\Ys}{Y^{*}}
160: \newcommand{\Yst}{\tilde{Y}^{*}}
161: \newcommand{\mus}{\mu^{*}}
162: \newcommand{\must}{\tilde{\mu}^{*}}
163: \newcommand{\Ds}{D^{*}}
164: \newcommand{\Dst}{\tilde{D}^{*}}
165: \newcommand{\phis}{\varphi^{*}}
166: \newcommand{\ws}{w^{*}}
167: \newcommand{\vs}{v^{*}}
168: \newcommand{\rprior}{\pi_{N}}
169: 
170: \newcommand{\Swwy}{\Sigma_{\hat{w}\,w_{y}}}
171: \newcommand{\Slw}{\Sigma_{\lambda_{w}}}
172: \newcommand{\Swy}{\bar{\Sigma}_{w_{y}}}
173: \newcommand{\Swb}{\bar{\Sigma}_{\hat{w}}}
174: \newcommand{\xhat}{\hat{x}}
175: %%%%%%%%%%%%        end local macros      %%%%%%%%%%%%%%%%%%%%%%%%%
176: \newcommand\mnras{{Mon.~Not.~Roy.~Astron.~Soc.}}% 
177:           % Monthly Notices of the RAS 
178: \newcommand\physrep{{Phys.~Rep.}}% 
179:           % Physics Reports 
180: \newcommand\aap{{Astronomy and Astrophysics}} 
181: 
182: %\shortauthors{Schneider, Knox, Heitmann, Habib} 
183: %\shorttitle{Parameter constraints from the non-Gaussiance matter power spectrum}
184: 
185: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
186: \newcommand{\edit}{\tt\color{red}}
187: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
188: \begin{document}
189: \bibliographystyle{apsrev}
190: 
191: \title{Simulations and cosmological inference: \\A statistical model for power spectra means and covariances}% Force line breaks with \\
192: 
193: \author{Michael D. Schneider}
194: \email{schneider@ucdavis.edu}
195: \affiliation{Department of Physics, University of California, One Shields Avenue, Davis, CA 95616, USA.}
196: 
197: \author{Lloyd Knox}
198: \affiliation{Department of Physics, University of California, One Shields Avenue, Davis, CA 95616, USA.}
199: 
200: \author{Salman Habib}
201: \affiliation{T-8, MS B285, Los Alamos National Laboratory, Los Alamos, NM 87545, USA.}
202: 
203: \author{Katrin Heitmann}
204: \affiliation{ISR-1, MS D466, Los Alamos National Laboratory, Los Alamos, NM 87545, USA.}
205: 
206: \author{David Higdon}
207: \affiliation{ CCS-6, MS F600, Los Alamos National Laboratory, Los Alamos, NM 87545, USA.}
208: 
209: \author{Charles Nakhleh}
210: \affiliation{Pulsed Power Sciences, Sandia National Laboratories, Albuquerque, NM 87185, USA.}
211: 
212: \date{\today}
213: 
214: \preprint{LA-UR-08-0730}
215: \pacs{98.80.-k, 95.35.+d, 02.50.-r, 02.50.Tt}
216: 
217: %\author{Michael D. Schneider\altaffilmark{1},
218: %Salman Habib\altaffilmark{3},
219: %Katrin Heitmann\altaffilmark{2}, 
220: %David Higdon\altaffilmark{4},
221: %Lloyd Knox\altaffilmark{1},
222: %Charles Nakhleh\altaffilmark{5}}
223: %\altaffiltext{1}{Department of Physics, University of California, One
224: %  Shields Avenue, Davis,
225: %CA 95616, USA \\ email: schneider@ucdavis.edu}
226: %\altaffiltext{2}{ISR-1, MS D466, Los Alamos National Laboratory, Los Alamos, NM 87545}
227: %\altaffiltext{3}{T-8, MS B285, Los Alamos National Laboratory, Los Alamos, NM 87545}
228: %\altaffiltext{4}{CCS-6, MS F600, Los Alamos National Laboratory, Los Alamos, NM 87545}
229: %\altaffiltext{5}{X-2, MS T087, Los Alamos National Laboratory, Los Alamos, NM 87545}
230: 
231: \begin{abstract}
232: We describe an approximate statistical model for the sample variance distribution of the non-linear matter power spectrum that can be calibrated from limited numbers of simulations.  Our model retains the common assumption of a multivariate Normal distribution for the power spectrum band powers, but takes full account of the (parameter dependent) power spectrum covariance.  The model is calibrated using an extension of the framework in~\textcite{habib07} to train Gaussian processes for the power spectrum mean and covariance given a set of simulation runs over a hypercube in parameter space.  We demonstrate the performance of this machinery by estimating the parameters of a power-law model for the power spectrum.  Within this framework, our calibrated sample variance distribution is robust to errors in the estimated covariance and shows rapid convergence of the posterior parameter constraints with the number of training simulations.
233: \end{abstract}
234: 
235: \keywords{cosmology: theory -- cosmology: parameter estimation}
236: 
237: \maketitle
238: 
239: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
240: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
241: \section{Introduction}
242: 
243: The indirect nature of most cosmological observations usually requires numerical simulations of the data in order to infer constraints on cosmological models.  For parameter inference from the cosmic microwave background (CMB), galaxy and weak lensing surveys, and the Lyman~$\alpha$ forest, the required simulations can be computationally expensive in order to capture the relevant physics, noise sources, and dynamic range.  The computational demands for future observations will only increase as more accurate theoretical predictions are required to match the reduced errors in the data.  In response to this foreseen bottleneck, several tools have recently been under development to reduce computational costs by emulating the output of cosmological simulations for the CMB and galaxy surveys given a training set of simulations~\cite{pico,pico2,cosmonet,cosmonet2,heitmann06,habib07}.  These tools have been aimed at producing fast estimates of the mean simulation output, but often the error distribution for the data also needs to be inferred from simulations.  
244: 
245: Typically, error models are constructed by running many realizations of a forward simulation of the data (or a compressed version of the data) at a fixed point in the model parameter space.  These multiple realizations can be used, for example, to construct covariance matrix estimates for use in inferring cosmological parameter distributions given the data.  If the error distribution is parameter dependent, then many more forward simulations run with varying input parameters could be required~\cite{seljak03}.
246: 
247: We propose a unified framework for combining estimates of both the data mean and covariance matrix from the same set of simulations for cosmological parameter inference.  Our framework uses an efficient algorithm to interpolate between simulations run at sparse locations in parameter space and allows for propagation of interpolation errors into the inferred cosmological parameter constraints.  This is an extension of the method in Ref.~\cite{habib07} in that sample covariance estimates at several points in parameter space are interpolated along with the sample mean estimates previously considered.  By requiring sample covariance matrices to be computed at many points in parameter space, our model might appear to require a large increase in the computational resources.  However, we also outline a general method to jointly constrain the covariance matrices for different parameters with the combined simulation realizations covering the whole parameter space.  We focus our validation tests on the prerequisite step of demonstrating the statistical framework when the covariances are already known.  
248: 
249: Our model also provides a tool to determine whether the parameter dependence of the errors is important in any given application and a way to incorporate this parameter dependence when it is important (which are issues that can never be addressed by using jacknife covariance estimates from the data).  The parameter dependence of the errors is likely unimportant in any application where the parameters are known {\it a priori} to be tightly constrained.  However, it may not be clear in any given application what constitutes ``tight'' constraints for the purposes of this approximation.  When the parameters are not tightly constrained, we expect it will probably be important to model the parameter dependence of the errors whenever performing inference from a reduced statistic of the data (because residual parameter dependence of the data can be absorbed into the error distribution for the reduced statistic).  We focus on the non-linear matter power spectrum in this paper as an example of this type of situation.  Because the non-linear matter distribution is non-Gaussian, the power spectrum is not a sufficient statistic and the variance of the power spectrum receives contributions from the (parameter dependent) connected four-point function.  It has already been shown~\cite{sefusatti06} that the joint covariance of the two-point and three-point functions of the non-linear galaxy distribution has non-trivial and significant parameter dependence.  
250: 
251: 
252: %Obtaining accurate cosmological parameter constraints from large datasets such as the cosmic microwave background (CMB) and galaxy and weak lensing surveys requires careful modeling of noise sources.  Except in special cases, models for the noise in a survey are unknown and must be constructed via Monte Carlo simulations.  Typically, noise models are constructed by running many realizations of a forward simulation of the data (or a compressed version of the data) at a fixed point in the model parameter space.  These multiple realizations can be used, for example, to construct covariance matrix estimates for use in inferring cosmological parameter distributions given the data.  This procedure will be inaccurate, however, if the noise distribution (in addition to the mean model prediction) depends on the model parameters.  If this is the case, many more forward simulations run with varying input parameters could be required to accurately capture the parameter dependence of the noise distribution.  
253: 
254: A simple example where the parameter-dependence of the errors is important is the measurement of the quadrupole of the CMB power spectrum (which has received considerable interest after WMAP reported a value somewhat lower than expectations). The dominant error on the quadrupole actually depends on the value of the quadrupole itself.  So, a naive analysis where one might attempt to construct the error distribution by running Monte Carlo simulations at a fixed point in parameter space would severely bias the inferred value of the quadrupole.   
255: 
256: In fact, the properties of the large-scale CMB are simple enough that it is easy to analytically solve for the error distribution of the CMB quadrupole~(e.g. Ref.~\cite{bond98}) or, using a sampling approach, even
257: calculate the multivariate distribution of a whole set of multipole power amplitudes~\cite{wandelt04}.  However, in most situations it is likely that the only recourse is to learn about the error distribution from simulations.  For example, the CMB power spectrum error distribution can no longer be calculated analytically once systematic errors and foreground modelling are included, yet the parameter dependence of the error distribution is likely to remain important.  This will not be the case in general, and the importance of modelling the noise variation over the parameter space will have to be decided on a case-by-case basis.  
258: 
259: 
260: %In this paper we introduce a framework for bringing together estimation of both the (parameter dependent) model mean and covariance from simulations.  Our model provides an efficient tool to determine whether the parameter dependence of the errors is important in any given application and a framework to incorporate this parameter dependence when it is important.  These are issues that can never be addressed by using jacknife covariance estimates from the data, so we have focused on making use of a limited number of expensive forward simulations.  
261: We explain our framework in the context of performing parameter inference from the non-linear matter power spectrum and have therefore limited the model for the (reduced) data error distribution to a multivariate Normal.  This model could be extended, for example, by considering a mixture of multivariate Normal distributions.  We have otherwise kept a general framework that can be applied to a wide array of applications.  
262: 
263: This paper is organized as follows.  In Section~\ref{sc:DMps} we give some background on the statistical properties of the dark matter power spectrum that serve as motivation for our framework.  In Section~\ref{sc:framework} we describe our model for the power spectrum sample variance distribution and how to calibrate the model using simulations.  We then derive the joint likelihood of the simulation outputs and observed power spectrum for performing parameter estimation.  We test the performance of this framework with a toy model for the power spectrum in Section~\ref{sc:validation}.  In Section~\ref{sc:conclusions}, we summarize our results and outline future directions of this work.  A guide to the notation is given in Appendix~\ref{sc:notation}, a covariance matrix parameterization that fits in our framework is given in Appendix~\ref{sc:covparam}, and details of the likelihood calculation and evaluation are given in Appendices~\ref{sc:emlike}, \ref{sc:proposal}, \ref{sc:priors}, and \ref{sc:covmatexpressions}.
264: 
265: \section{\label{sc:DMps}Dark matter power spectrum}
266: %The statistical properties of the large-scale distribution of matter are potentially highly informative about cosmological models.  
267: %Current measurements of the galaxy~\cite{tegmark04,sanchez06} and weak lensing~\cite{benjamin07,semboloni06a} power spectra, which trace the underlying dark matter distribution, have placed constraints on cosmological parameters both competitive and complementary to other probes such as the cosmic microwave background and supernovae luminosity distance measurements.  Ongoing and planned surveys hope to achieve even better constraints by reducing noise and increasing dynamic range with larger and deeper observations.  Because these datasets are projected to reach errors as low as 1\% for several observable quantities, they will require increasingly accurate theoretical predictions of the dark matter distribution for theoretical uncertainties to remain a sub-dominant component of the inferred parameter constraints.  
268: 
269: The primary difficulty in calculating theoretical predictions of the matter distribution (when gas dynamics are neglected) is accounting for non-linear gravitational evolution of the matter density fluctuations.  The only known way to obtain reasonably accurate predictions is by running N-body numerical simulations (although perturbation theory has had some success over a limited range of length scales~\cite{bernardeau02,matsubara07}).  Because two-point functions are ubiquitous in the analysis of galaxy and weak-lensing data, substantial effort has gone into obtaining accurate predictions of the mean of estimators of the dark matter power spectrum~\cite{heitmann05,heitmann07a}.  On the other hand, the error distributions of these power spectrum estimators are much less developed.  Using N-body simulations, \textcite{meiksin99} and \textcite{scoccimarro99} showed that non-linear evolution leads to strong correlations in the band-averaged power spectrum.  \textcite{cooray00} reproduced this result using the halo model and forecast that the non-linear corrections to the power spectrum covariance led to a $\sim15$\% increase in parameter error bars from a fiducial all-sky weak lensing survey.   Using ray-tracing through N-body simulations, \textcite{semboloni06} have shown similar increases to the weak lensing power spectrum variance and correlations due to non-linear evolution.
270: 
271: On a finite or masked region of the sky the window function further modifies the covariance structure of power spectrum estimators.  \textcite{hamilton05} found that the coupling of Fourier modes due to non-linear evolution induces a significant increase in the power spectrum variance when windows are applied to the dark matter density calculated from N-body simulations.  
272: If ignored, these corrections to the power spectrum covariance could lead to biases and underestimates in inferred cosmological parameter constraints.  Preliminary forecasts have shown that improved modelling of the power spectrum covariance is important in understanding the cosmological information in the non-linear power spectrum~\cite{rimes05b,neyrinck06b}.  Ideally, these effects would be understood by generating mock survey catalogues~\cite{sefusatti06}.  But, this approach quickly becomes computationally prohibitive if we try to run multiple survey simulations for different cosmological models to capture the full parameter dependence of the non-linear dark matter distribution.  We address this problem by extending the methods of Refs.~\cite{habib07}~and~\cite{heitmann06} to build a statistical formulation to accurately model both the power spectrum mean and covariance over parameter space given a fixed number of simulated power spectrum realizations.  We use the scatter between realizations to infer the power spectrum sample variance distribution for a given cosmology, which we then interpolate over the rest of parameter space.  
273: 
274: The non-linear evolution of the dark matter density skews the one-point probability distribution away from its Gaussian initial condition.  As a result, the power spectrum is no longer a sufficient statistic for describing the density field.  An alternative approach to estimating cosmological parameters from the non-linear dark matter distribution could therefore be to model the non-Gaussian one-point distribution directly or to devise alternative summary statistics that capture additional or complementary information to the power spectrum~\cite{taylor01,takada02}.  However, we will not explore this line of inquiry in this paper.
275: 
276: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
277: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
278: \section{\label{sc:framework}Statistical framework}
279: %Because it is not possible to make direct observations of the three-dimensional matter distribution for estimating the matter power spectrum, the power spectrum is instead inferred from observations of biased tracers of the matter distribution (often in projection along the line of sight).  The distribution of observable power spectra (such as those for galaxy or cosmic shear surveys) will therefore depend on the nature of the bias and projection in addition to shot noise contributions from discrete sampling of the matter density field.  
280: 
281: We confine our investigation to the distribution of shell-averaged power spectrum estimators of the form,
282: \begin{equation}\label{eq:psestimator}
283: 	\hat{P}(k_{i}) = \frac{1}{V}\int_{{\rm S}_{i}} \frac{d^{3}k}{V_{{\rm S}_{i}}}\, 
284: 	\delta^{*}(\bk)\delta(\bk),
285: \end{equation}
286: where $\delta(\bk)$ is the Fourier transform of the matter density contrast $\delta(\br) = (\rho(\br)-\bar{\rho})/\bar{\rho}$, $V$ is the survey volume, and S$_{i}$ is a spherical shell in $k$-space with radius centered at $k_{i}$.  The shell averaging exploits the assumed isotropy of the density field and reduces the variance of the power spectrum estimator if $\delta(\br)$ is Gaussian.  On large scales, $\delta(\br)$ is indeed expected to be Gaussian and this reduced variance is a prime motivation for constructing power spectrum estimators of the form given in Eqn.~(\ref{eq:psestimator}). 
287: In the Gaussian case, $\hat{P}$ is a sum of squares of Gaussian variates, and thus Wishart distributed ({\it i.e.} the marginal distributions of each band power are $\chi^{2}$).  The variance of $\hat{P}$ then decreases as one over the number of modes in the shell (as the number of degrees of freedom increase).  However, if $\delta(\br)$ is non-Gaussian in general there will be a non-zero connected 4-point function contributing to the variance of $\hat{P}$, which does not decrease in amplitude with increasing number of modes in the shell~\cite{meiksin99,scoccimarro99}.  The connected 4-point function also introduces correlations in the power spectrum, which are enhanced by the band-averaging (when the Gaussian contribution to the variance is reduced while the off-diagonal covariance remains constant).  	
288: 
289: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
290: \subsection{\label{sc:model}Model for the sample variance distribution}
291: The Central Limit Theorem guarantees that the Normal distribution will be a valid approximation for the distribution of $\hat{P}$ from Eqn.~(\ref{eq:psestimator}) as long as there are a large number of modes in each band power~\cite{szapudi99}.  This approximation will break down on the largest scales of a survey (where only a few modes can be measured), but this could be mitigated by using wider bins.  Alternatively, an exact likelihood could be used if the survey is big enough that the largest scales probe fluctuations in the linear regime.  Therefore, for a given vector of wavenumbers $\kvec=\{ k_1,k_2,\dots,k_{n_{y}}\}$ (where $n_{y}$ is the number of bands), we model the power spectrum sample variance distribution as, 
292: \begin{equation}\label{eq:mvmodel}
293:   y(\kvec,\theta) \sim {\rm N}(\mu(\kvec;\theta),\Sigma_{y}(\theta)).
294: \end{equation}
295: That is, the observed power spectrum $y$ in bands $\kvec$ for cosmological parameters $\theta$, is assumed to be a random sample from a multivariate-Normal distribution with mean vector $\mu(\kvec;\theta)$ and covariance matrix $\Sigma_{y}(\theta)$ (which has dimensions $n_{y}\times n_{y}$).  We allow for an arbitrary covariance matrix, including the strong correlations and parameter dependence generated by non-linear evolution.  In general it is desirable to reduce the number of components of $\Sigma_{y}(\theta)$ whose $\theta$-dependence must be modelled.  We will denote this subset of components as a column-vector, $D(\kvec;\theta)$, so that $\Sigma_{y}=\Sigma_{y}(D(\kvec;\theta))$.  $D(\kvec;\theta)$ could be, for example, the eigenvalue spectrum with $\theta$-independent eigenvectors assumed for $\Sigma_{y}$.  See Appendix~\ref{sc:covparam} for an explicit example of a paremeterization of the covariance matrix that makes our framework tractable.  
296: 
297: Note that Eqn.~(\ref{eq:mvmodel}) models the distribution of the power spectrum estimator given the parameters as a Gaussian, which does not necessarily imply that the distribution of the true power spectrum given the estimator is Gaussian\footnote{This implication holds only if the parameters are the true band powers and a uniform prior is assumed for the true band powers.}.  In this sense, the model in Eqn.~(\ref{eq:mvmodel}) is quite general.  
298: 
299: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
300: \subsection{Calibration from simulations}
301: 
302: We use a fixed number of stochastic simulations of $y(\kvec,\theta)$ at several values of $\theta$ to calibrate the model for the sample variance distribution in Eqn.~(\ref{eq:mvmodel}).  The first step is to choose a set of values of $\theta$ that will cover the region of parameter space we wish to explore while using as small a number of simulation runs as possible.  We refer to this choice as the {\it simulation design}.  Second, we need a way to interpolate the model for the sample variance to new regions of parameter space where no simulations have been run.  We call this the {\it simulation emulator}.
303: 
304: \subsubsection{\label{sc:simdesign}Simulation design}
305: 
306: We follow Section~II.B of Ref.~\cite{habib07} to construct the simulation design as an orthogonal array Latin hypercube sample~\cite{leary03,morris95,tang93,welch85,ye00}.   We begin by specifying a hyper-rectangle in parameter space over which we wish to run simulations.  The parameter axes are then rescaled to give a unit hypercube so that all parameters are subsequently defined on the interval $(0,1)$.  We use the {\sc R} package~\cite{R} {\tt lhs}~\cite{Rlhs} to compute the Latin hypercube sample given the number of design points, $n_{d}$.
307: 
308: For a given $\theta$ we assume a single simulation run gives a random realization of $y(\kvec,\theta)$.  We then run $n_{r_{i}}$ realizations at each design point $i=1,\dots,n_{d}$ for a total of $m\equiv \sum_{i=1}^{n_{d}}n_{r_{i}}$ simulation runs, giving output $Y_{ij}=$~$j$th realization of $y(\kvec,\theta_{i})$ with $\, j=1\,\dots,n_{r_{i}}$.  
309: 
310: In what follows, we use the $*$ superscript to denote simulation outputs for the design settings so that $\left\{Y_{ij}\right\}\equiv Y^{*}$.  We will also find it convenient to label the parameters for the sample variance distribution of $Y^{*}$ at the design points as $\mus$ and $\Ds$ (each of length $n_{y}n_{d}\equiv q$).
311: %$\Ds\equiv\text{diag}(\mathsf{D}^{*})$ (each of length $n_{y}n_{d}\equiv q$), and $\phis$ (of length $\half n_{y}(n_{y}-1)n_{d}$).
312: Following Ref.~\cite{habib07} and to simplify later prior specifications, we center $\mus$ and $\Ds$ by the constant vectors $\mu_{c}(\kvec)$ and $D_{c}(\kvec)$ to have zero mean and then re-scale each by a single number ($\mu_{c}$ and $D_{c}$) to give unit variance (over the set of simulation runs),
313: \begin{eqnarray}\label{eq:simdecomposition}
314: 	\tilde{\mu}(\kvec;\theta_{i}^{*}) &\equiv& \left(\mu(\kvec;\theta_{i}^{*})-
315: 	\mu_{c}(\kvec)\right)/\mu_{s}, \nonumber\\
316: 	\ln\left(\tilde{D}(\kvec;\theta_{i}^{*})\right) &\equiv& \left(\ln\left(D(\kvec;\theta_{i}^{*})\right)-
317: 	D_{c}(\kvec)\right)/D_{s},
318: \end{eqnarray}
319: where $\theta_{i}^{*}$ denotes the input settings at the $i$th design point ($i=1,\dots,n_{d}$).  We transform to the logarithm of $D$ because our interpolation method requires support over the entire real line (while $D$ has only positive support if $D$ is the eigenvalue spectrum or is as defined in Eqn.~\ref{eq:covparam}).  If a different parameterization of the $\theta$-dependence of $\Sigma_{y}$ gives a non-positive $D$, other mappings of $D$ to the real line can be substituted here.  
320: 
321: If the number of realizations at each design point, $n_{r_{i}}$, is sufficiently large, we can construct a simplified simulation emulator by first reducing the simulation design runs to sample mean and covariance estimates at each design point.  This allows us to reduce the computational complexity of the emulator by inferring the emulator parameters directly from the sample means and covariances.  We use this simplified emulator for the examples in Section~\ref{sc:validation} with the added assumption that the sample means and covariances are perfect estimates of the true means and covariances.  The number of realizations at each design point, $n_{r}$, required to make this approximation valid for the covariance can be many times the number of power spectrum bands, $n_{y}$.  More optimized techniques for estimating the power spectrum covariance from simulations might also be helpful in some applications~\cite{pope07}.
322: 
323: 
324: %reduce these to sample mean and covariance estimates, $\tilde{\mu}(\kvec;\theta^{*})$ and $\tilde{\Sigma}_{y}(\theta^{*})$ (where $\theta^{*}$ denotes parameter values in the simulation design).  In this paper, we will neglect the error in the sample means and covariances and will assume that $\tilde{\mu}(\kvec;\theta^{*}) = \mu(\kvec;\theta^{*})$ and $\tilde{\Sigma}_{y}(\theta^{*})=\Sigma_{y}(\theta^{*})$.  The number of realizations at each design point, $n_{r}$, required to make this approximation valid for the covariance can be many times the number of power spectrum bands, $n_{y}$.  More optimized techniques for estimating the power spectrum covariance from simulations might also be helpful in some applications~\cite{pope07}.
325: 
326: 
327: \subsubsection{Simulation emulator}
328: 
329: We can further reduce the number of components to model by performing a principal component (PC) analysis on the scaled means, $\tilde{\mu}(\kvec,\theta^*)$, and variances, $\tilde{D}(\kvec,\theta^{*})$, of the design simulations.  Following Ref.~\cite{habib07}, we perform a singular value decomposition on the $n_{y}\times n_{d}$ matrix of simulation sample means at each design setting, $\left[\tilde{\mu}^*\right]=\mathsf{UBV}^T$ where $\mathsf{U}$ has dimension $n_{y}\times p$ ($p\equiv\text{min}(n_{y},n_{d})$) with $\mathsf{U}^{T}\mathsf{U}=\ident_{p}$, $\mathsf{V}$ has dimension $n_{d}\times p$ with $\mathsf{V}^{T}\mathsf{V}=\ident_{p}$, $\mathsf{V}\mathsf{V}^{T}=\ident_{n_{d}}$, and $\mathsf{B}$ ($p\times p$) is a diagonal matrix of singular values.
330: We then decompose $\tilde{\mu}^{*}$ in the basis vectors, $\Phi_{\mu}=\mathsf{U}$ and weights $w=\mathsf{BV}^{T}$ so that $\Phi_{\mu}^{T}\Phi_{\mu}=\ident_{p}$ (with an analogous decomposition for $[\ln\tilde{D}^*]$)\footnote{Ref.~\cite{habib07} use the alternate weighting $\Phi_{\mu}=\frac{1}{\sqrt{n_{d}}}\mathsf{U}\mathsf{B}$ and $w=\sqrt{n_{d}}\mathsf{V}^{T}$ so that $\frac{1}{n_{d}}w^{T}w=\ident_{n_{d}}$}.  Retaining only the first $p_{\mu}$ and $p_{D}$ columns of $\Phi_{\mu}$ and $\Phi_{D}$,
331: \begin{eqnarray}\label{eq:pcdecomp}
332: 	\tilde{\mu}(\kvec;\theta) &=& \sum_{i=1}^{p_{\mu}} \Phi_{\mu,i}(\kvec)\,w_{i}(\theta) + \evmu, \nonumber\\
333: 	\ln\left(\tilde{D}(\kvec;\theta)\right) &=& \sum_{i=1}^{p_{D}} \Phi_{D,i}(\kvec)\,v_{i}(\theta) + \evd,
334: \end{eqnarray}
335: where $p_{\mu},p_D \le n_{y}$, $\Phi_i$ is the $i$th column of $\Phi$, $w_{i}$ and $v_{i}$ are (parameter dependent) basis weights, and $\evmu,\evd$ are independent and identically distributed (i.i.d.)~Normal variates parameterizing the error in the truncation of the principal component (PC) decomposition.  
336: 
337: The parameter dependence of the likelihood has now been isolated into a set of $p_{\mu}+p_{D}$ basis weights for the power spectrum mean and ``log-variance''.  To find a model that fits all the simulation design runs, we 
338: again follow Ref.~\cite{habib07} and model the basis weights as Gaussian processes (GP) over the prior parameter space,
339: \begin{eqnarray}\label{eq:gpdists}
340: 	w_{i}(\theta) &\sim& {\rm GP}\left(0,\Sigma_{w}(\theta;\lambda_{w,i},\brho_{w,i})\right)
341: 	\qquad i=1,\dots,p_{\mu},\nonumber\\
342: 	v_{i}(\theta) &\sim& {\rm GP}\left(0,\Sigma_{v}(\theta;\lambda_{v,i},\brho_{v,i})\right)
343: 	\qquad i=1,\dots,p_{D},
344: \end{eqnarray}
345: where,
346: \begin{equation}\label{eq:GPcov}
347:   \Sigma_{X}(\theta,\theta';\lambda_{X,i},\brho_{X,i}) = \lambda_{X,i}^{-1}\prod_{\ell=1}^{p_{\theta}} \rho_{X,i\ell}^{4(\theta_{\ell}-\theta_{\ell}')^{2}}
348: \end{equation}
349: gives the covariance of the GP for weight $i$ between parameter values $\theta$ and $\theta'$ with precision $\lambda_{X,i}$ and correlations (over the parameter space) $\brho_{X,i}$.
350: 
351: From Eqns.~(\ref{eq:pcdecomp}) and (\ref{eq:gpdists}) we can now derive the sampling models for the parameters $\must$ and $\Dst$.  Let $\mu^{*}$ and $D^{*}$ denote the $n_{y}n_{d}\equiv q$ column vectors obtained by concatenating the sample means and variances at each design point.  Further, let $w^{*}$ and $v^{*}$ denote the PC weights for $\must$ and $\ln(\Dst)$ evaluated at the design points.  Then, from the i.i.d. Normal model for $\lemu$ and $\led$,
352: \begin{eqnarray}\label{eq:GPmodeConditionals}
353:   \must|w^*,\lemu &\sim& {\rm
354:     N}(\Phi_{\mu}w^*,\lemu^{-1} \ident_{q}),\nonumber\\
355:   \ln\left(\Dst\right)|v^*,\led &\sim& {\rm
356:     N}(\Phi_{D}v^*,\led^{-1}\ident_{q}).
357: \end{eqnarray}
358: %These informative priors on the GP parameters are critical in obtaining converged posterior estimates~(see the discussion in Ref.~\cite{habib07} and references therein).  (We give explicit expressions for the priors and their hyper-parameters in Appendix~\ref{sc:likelihood}.)  
359: Restricted to the design points, the GP models give Normal priors for $w^{*}$, $v^{*}$, and $\phis$,
360: \begin{eqnarray}\label{eq:despriors}
361: 	w^{*} &\sim& {\rm N}\left(0,\Sigma_{w}^{*}(\blambda_{w},\brho_{w})\right), \nonumber\\
362: 	v^{*} &\sim& {\rm N}\left(0,\Sigma_{v}^{*}(\blambda_{v},\brho_{v})\right),
363: \end{eqnarray}
364: where $\Sigma_{w}^{*}$, $\Sigma_{v}^{*}$, and $\Sigma_{\varphi}^{*}$ are the extension of Eqn.~(\ref{eq:GPcov}) to the design points for each mode amplitude (see Appendix~\ref{sc:covmatexpressions}).  
365: 
366: The marginal distribution for $\must$ is,
367: \begin{eqnarray}\label{eq:wstarmarg}
368: 	&&\pi(\must|\lemu,\blambda_{w},\brho_{w}) = \nonumber\\
369: 	&&\quad\int dw^{*}\, 
370: 	\pi(\must|\lemu,w^{*})\cdot \pi(w^{*}|\blambda_{w},\brho_{w}).
371: \end{eqnarray}
372: We use the intermediate result from~Eqn.~(21) of Ref.~\cite{habib07} along with the definition $\hat{w}\equiv\Phi_{\mu}^{T}\must$ to get, 
373: \begin{eqnarray}
374: 	&&\pi(\must|\lemu,w^{*}) \propto \lemu^{\frac{n_{d}p_{\mu}}{2}}
375: 	\exp\left[-\half\lemu(w^{*}-\hat{w})^{T}(w^{*}-\hat{w})\right]
376: 	\nonumber\\
377: 	&&\times \lemu^{n_{d}(n_{y}-p_{\mu})/2}\exp\left[
378: 	-\half\lemu \tilde{\mu}^{*T}(\ident_{q}-\Phi_{\mu}\Phi_{\mu}^{T})\must\right],
379: 	\label{eq:reddesign}\\
380: 	&&\equiv \pi(w^{*}|\hat{w},\lemu)\cdot
381: 	\rprior(\must|\lemu),\nonumber
382: \end{eqnarray}
383: with an analogous result for $\pi(\ln(\Dst)|\led,v^{*})$.  It is now straightforward to perform the integral in Eqn.~(\ref{eq:wstarmarg}),
384: \begin{eqnarray}
385: 	\pi(\must|\lemu,\blambda_{w},\brho_{w}) &=& 
386: 	\pi(\hat{w}|\lemu,\blambda_{w},\brho_{w})\nonumber\\
387: 	&\times& \rprior(\must|\lemu),
388: \end{eqnarray}
389: where
390: \begin{eqnarray}
391: 	\hat{w}|\lemu,\lw,\rw &\sim& {\rm N}\left(0,\lemu^{-1}\ident + 
392: 	\Sigma_{w}^{*}(\lw,\rw)\right).
393: \end{eqnarray}
394: Similarly for $\ln(\Dst)$,
395: \begin{eqnarray}
396: 	\pi(\ln(\Dst)|\led,\blambda_{v},\brho_{v}) &=& 
397: 	\pi(\hat{v}|\led,\blambda_{v},\brho_{v})\nonumber\\
398: 	&\times& \rprior(\ln(\Dst)|\led),
399: \end{eqnarray}
400: with
401: \begin{eqnarray}
402: 	\hat{v}|\led,\lv,\rv &\sim& {\rm N}\left(0,\led^{-1}\ident + 
403: 	\Sigma_{v}^{*}(\lv,\rv)\right),
404: \end{eqnarray}
405: and $\hat{v}\equiv \Phi_{D}^{T}\ln(\Dst)$.
406: 
407: We calibrate the emulator by using Markov Chain Monte Carlo (MCMC) to draw samples from the posterior of the GP model parameters given the design runs, $\pi(\bomega | \Ys)$ ($\bomega\equiv\left\{\lemu,\lw,\rw,\led,\lv,\rv\right\}$).  For the ``simplified emulator'' described at the end of Section~\ref{sc:simdesign}, this posterior factors so the parameters for the power spectrum mean and variance can be calibrated separately,
408: \begin{eqnarray}
409:   &&\pi(\bomega|\tilde{\mu}^*,\tilde{D}^*) \propto
410:   \label{eq:simpgplike}\\
411:   &&\left[\pi(\must|\lemu,\lw,\rw)\cdot
412:   \pi(\lemu) \cdot \pi(\lw)\cdot \pi(\rw)\right]
413:   \nonumber\\
414:   &\times&\left[\pi(\ln(\Dst)|\led,\lv,\rv)\cdot
415:   \pi(\led) \cdot \pi(\lv)\cdot \pi(\rv)\right].\nonumber
416: \end{eqnarray}
417: By sampling from this posterior, we can propagate the error in the calibration of the models for $\mu$ and $D$ from our limited set of simulation runs.  For the simplified emulator likelihood in Eqn.~(\ref{eq:simpgplike}), the model for the mean is identical to that in Ref.~\cite{habib07}.  Explicit expressions for the full likelihood and priors are given in Appendices~\ref{sc:emlike},~\ref{sc:priors},~and~\ref{sc:covmatexpressions}.  
418: 
419: 
420: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
421: \subsection{Cosmological parameter estimation}
422: 
423: We now consider how to use our simulation-calibrated model for the sample variance distribution to estimate cosmological parameters from an observation of the power spectrum, denoted $y(\kvec)$.  For complete error propagation, our goal is to compute the joint posterior
424: $\pi\left(\theta_{0},\bomega|y,Y^{*}\right)$, or, if using the ``simplified emulator,'' $\pi\left(\theta_{0},\bomega|y,\mu^{*},D^{*}\right)$, where $\theta_{0}$ are the ``true'' parameters that generated the observation $y(\kvec)$.  
425: 
426: First, we decompose the mean and variance of the data error distribution into the same bases as the design runs.  The model for the sample variance distribution in Eqn.~(\ref{eq:mvmodel}) becomes
427: \begin{equation}
428: 	\tilde{y}|w(\theta_{0}),v(\theta_{0}) \sim {\rm N}\left(\Phi_{\mu}w(\theta_{0}),
429: 	W_{y}^{-1}(v(\theta_{0}))\right),
430: \end{equation}
431: where $W_{y}^{-1}(v(\theta_{0}))\equiv \Sigma_{y}\left(\exp\left(D_{s}\Phi_{D}v(\theta_{0})+D_{c}\right)\right)/\mu_{s}^{2}$.  Note that we model the mean and variance of the observations as perfectly described by the PC weights $w(\theta_{0})$ and $v(\theta_{0})$, without the error terms that were included in the decomposition of the simulation means and variances in Eqn.~(\ref{eq:pcdecomp}).  Next, to simplify the expression for marginalizing over $w$, we rewrite this distribution in terms of 
432: \[
433: \hat{w}_y(\theta) \equiv \left(\Phi_{\mu}^TW_{y}(\theta)\Phi_{\mu}\right)^{-1} \Phi_{\mu}^T W_{y}(\theta) y
434: \]
435: in analogy with Eqn.~(\ref{eq:reddesign}).  However, because $W_{y}$ depends on $\theta_{0}$, we must be careful to preserve all the normalization factors.  The exact relation is:
436: \begin{widetext}
437: \begin{eqnarray}
438: 	L(y|w,v) &=& \left[(2\pi)^{n_{y}}\left|W_{y}^{-1}\right|\right]^{-1/2} \,
439: 	\exp\left\{-\half\left(\tilde{y}-\Phi_{\mu}w\right)^{T}W_{y}\left(\tilde{y}-\Phi_{\mu}w\right)\right\} 
440: 	\nonumber\\
441: 	&=& \left[(2\pi)^{p_{\mu}}\left|\Phi_{\mu}^{T}W_{y}\Phi_{\mu}\right|^{-1}\right]^{-1/2}\,
442: 	\exp\left\{-\half\left(w-\hat{w}_{y}\right)^{T}\Phi_{\mu}^{T}W_{y}\Phi_{\mu}
443: 	\left(w-\hat{w}_{y}\right)\right\}\nonumber\\*
444: 	&\times& (2\pi)^{-(n_{y}-p_{\mu})/2}\left|W_{y}\right|^{1/2}
445: 	\left|\Phi_{\mu}^{T}W_{y}\Phi_{\mu}\right|^{1/2}\,
446: 	\exp\left\{-\half\left(\tilde{y}-\Phi_{\mu}\hat{w}_{y}\right)^{T}W_{y}
447: 	\left(\tilde{y}-\Phi_{\mu}\hat{w}_{y}\right)\right\}\nonumber\\
448: 	&\equiv& L(\hat{w}_{y}|w,v)\cdot \rprior(y|v).
449: \end{eqnarray}
450: \end{widetext}
451: The first line of the final result is a properly normalized Gaussian distribution in $w$, while the second line is independent of $w$.  The priors on the PC weights for the data are,
452: \begin{eqnarray}
453: 	w(\theta_{0}) &\sim& {\rm N}\left(0,\Sigma_{\lambda_{w}}\right)\,\text{and}\nonumber\\
454: 	v(\theta_{0}) &\sim& {\rm N}\left(0,\Sigma_{\lambda_{v}}\right),
455: \end{eqnarray}
456: where,
457: \begin{eqnarray}
458: 	\Sigma_{\lambda_{w}} &=& {\rm diag}\left(\lambda_{w_{i}}^{-1}\right)\qquad
459: 	(p_{\mu}\times p_{\mu})\,\text{and} \nonumber\\
460: 	\Sigma_{\lambda_{v}} &=& {\rm diag}\left(\lambda_{v_{i}}^{-1}\right)\qquad
461: 	(p_{D}\times p_{D})\nonumber.
462: \end{eqnarray} 
463: 
464: \begin{widetext}
465: The joint likelihood for the data and simulation outputs can be constructed by multiplying the individual likelihoods and marginalizaing over the variables for the mean and covariance (weighted by their prior distributions),
466: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
467: % The commented expressions below include the phi dependence
468: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
469: %\begin{eqnarray}
470: %	L(y,\Ys|\theta_{0},\bomega) &=&\int\int\int d\mus\,d\Ds\,d\phis\,
471: %	L(\Ys|\mus,\Ds,\phis)\nonumber\\
472: %	&\times&\,\int\int\int d\bar{w}\,d\bar{v}\,d\varphi_{0}\, 
473: %	 L(y|w_{0},v_{0},\phi_{0})\nonumber\\
474: %	&\times&\pi(\mus|\ws,\lemu)\cdot \pi(\Ds|\vs,\led)
475: %	\cdot\pi(\varphi^{}{*},\varphi_{0}|\phibar,\nu_{\varphi},
476: %	\lphi,\rphi)\nonumber\\
477: %	&\times&\,\pi(\bar{w}|\theta_{0},\lw,\rw)
478: %	\cdot\pi(\bar{v}|\theta_{0},\lv,\rv),
479: %\end{eqnarray}
480: %where $\bar{w}=\left\{\ws,w_{0}\right\}$ and $\bar{v}=\left\{\vs,v_{0}\right\}$.  The integrals over $\bar{w}$ and $\vs$ can be performed analytically, giving,  
481: %\begin{eqnarray}\label{eq:finallike}
482: %	L(y,\Ys|\theta_{0},\bomega) &=&\int\int\int d\mus\,d\Ds\,d\phis\,
483: %	\int dv_{0}\int d\varphi_{0}\,L(\Ys|\mus,\Ds,\phis)\nonumber\\
484: %	&\times&\, \pi(\hat{w}_{y},\hat{w}|v_{0},\phi_{0},\theta_{0},\bomega)\cdot 
485: %	\pi_{N}(\mus|\lemu)\cdot \pi_{N}(y|v_{0},\phi_{0})\nonumber\\
486: %	&\times&\, \pi(v_{0},\hat{v}|\theta_{0},\bomega) \cdot 
487: %	\pi_{N}(\Ds|\led)\cdot\pi(\varphi_{0}|\phis,\theta_{0},\bomega)\cdot
488: %	\pi(\phis|\bomega),
489: %\end{eqnarray}	
490: \begin{eqnarray}
491: 	L(y,\Ys|\theta_{0},\bomega) &=&\int\int d\mus\,d\Ds\,
492: 	L(\Ys|\mus,\Ds)\,\int\int dw^{*}\,dw_{0}\,\int\int dv^{*}\,dv_{0}\, 
493: 	 L(y|w_{0},v_{0})\nonumber\\
494: 	&\times&\pi(\mus|\ws,\lemu)\cdot \pi(\Ds|\vs,\led)
495: 	\cdot\pi(w^{*},w_{0}|\theta_{0},\lw,\rw)
496: 	\cdot\pi(v^{*},v_{0}|\theta_{0},\lv,\rv).\nonumber
497: \end{eqnarray}
498: The integrals over $w^{*},w_{0}$ and $\vs$ can be performed analytically, giving,  
499: \begin{eqnarray}\label{eq:finallike}
500: 	L(y,\Ys|\theta_{0},\bomega) &=&\int\int d\mus\,d\Ds\,
501: 	\int dv_{0}\,L(\Ys|\mus,\Ds)\\
502: 	&\times&\, \pi(\hat{w}_{y},\hat{w}|v_{0},\theta_{0},\bomega)\cdot 
503: 	\pi_{N}(\mus|\lemu)\cdot \pi_{N}(y|v_{0})\cdot
504: 	\pi(v_{0},\hat{v}|\theta_{0},\bomega) \cdot 
505: 	\pi_{N}(\Ds|\led),\nonumber
506: \end{eqnarray}	
507: where,
508: \begin{equation}\label{eq:wjointlike}
509:   \left(
510:     \begin{array}{c}
511:       \hat{w} \\
512:       \hat{w}_y
513:     \end{array} \right)
514:   \sim {\rm N}\left( \left(\begin{array}{c} 0 \\ 0 \end{array}\right), 
515:     \left[ \left(\begin{array}{cc}
516:         \lemu^{-1}\ident_{n_dp_{\mu}} & 0 \\
517:         0 &
518:         (\Phi_{\mu}^TW_{y}\Phi_{\mu})^{-1} \end{array}\right)
519:     + \left(\begin{array}{cc}
520:         \Sigma_{\hat{w}} & \Sigma_{\hat{w}\,w_y} \\
521:         \Sigma_{\hat{w}\,w_y}^T & \Sigma_{\lambda_{w}}
522:       \end{array}\right)\right]\right),
523:     %+ \Sigma_{\hat{w},\hat{w}_y}(\theta,\lw,\rw)\right] \right),
524: \end{equation}
525: \begin{equation}\label{eq:vjointlike}
526:   \left(
527:     \begin{array}{c}
528:       \hat{v} \\
529:       v(\theta_{0})
530:     \end{array} \right)
531:   \sim {\rm N}\left( \left(\begin{array}{c} 0 \\ 0 \end{array}\right), 
532:     \left[ \left(\begin{array}{cc}
533:         \led^{-1}\ident_{n_dp_{D}} & 0 \\
534:         0 & 0 \end{array}\right)
535:     + \left(\begin{array}{cc}
536:         \Sigma_{\hat{v}}  & \Sigma_{\hat{v}\,v} \\
537:         \Sigma_{\hat{v}\,v}^T & \Sigma_{\lambda_{v}}
538:       \end{array}\right)\right]\right).
539:     %+ \Sigma_{\hat{v},v}(\theta,\lv,\rv)\right] \right),
540: \end{equation}  
541: Eqn.~(\ref{eq:finallike}) is simplified further in Appendix~\ref{sc:emlike} and explicit expressions for the covariance matrices are given in Appendix~\ref{sc:covmatexpressions}.
542: 
543: For the simplified emulator (that is conditioned directly on the sample means and variances from the design runs), the integrals over $\mus$ and $D^{*}$ in Eqn.~(\ref{eq:finallike}) can be dropped.  The joint likelihood for the data and the simulation runs in this case is,
544: \begin{eqnarray}\label{eq:simpemlike}
545:   L(y,\mus,D^{*}|\theta_0,\bomega)
546:   = &\int& d^{p_D}v_{0}\,
547: 	\pi(\hat{w}_{y},\hat{w}|v_{0},\theta_{0},\bomega)\cdot 
548: 	\pi_{N}(\mus|\lemu)\cdot \pi_{N}(y|v_{0}) \nonumber\\
549: 	&\times&\pi(v_{0},\hat{v}|\theta_{0},\bomega) \cdot \pi_{N}(\Ds|\led).
550: \end{eqnarray}
551: We use this likelihood distribution in an MCMC algorithm to simultaneously constrain the $\theta_{0}$ and the GP parameters.  The details of the likelihood evaluation, the prior distributions on the parameters, and the proposal distributions for our Metropolis-Hastings updates are given in the Appendices.
552: 
553: \end{widetext}
554: 
555: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
556: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
557: \section{\label{sc:validation}Validation tests}
558: In this Section we use a toy power-law model for the power spectrum to test the performance of our statistical framework.  We work with a toy model both to speed the computation time involved and to separate issues with the GP calibration from issues with modelling more complicated power spectra and their covariance structures.  Our statistical framework is kept completely general, however, so more sophisticated simulations can be added without further modification.
559: 
560: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
561: \subsection{\label{sc:toymodel}Power-law power spectrum model}
562: We use a two-parameter model for the power spectrum,
563: \begin{equation}\label{eq:toymodel}
564:   P(k_i) = A\, k_i^{-\slope},
565: \end{equation}
566: characterized by the amplitude, $A$, and slope, $\slope$.  To give sufficient information to distinguish constraints on $A$ and $\slope$, we use $n_{y}=32$ bands in $k$ with $k_{1}=2\pi/450$ and $\Delta k=8k_{1}$.  We set ``true'' values of $A=200$ and $\slope=0.5$, which roughly match the amplitude and shape of the matter power spectrum inside a 450~Mpc/$h$ cubic volume.  
567: 
568: To match our model for the power spectrum distribution, we assume $\hat{P}(\vec{k})$ is multivariate Normal distributed with covariance 
569: \begin{equation}\label{eq:toycov}
570:   C = {\rm diag}\left( \frac{2P^2(\vec{k})}{4\pi}\right).
571: \end{equation}
572: This is the standard prediction for the covariance of power of a Gaussian random field with $\sim 4\pi$ modes contributing to the power estimate in each $k$-band.  In practice, the number of modes available in each $k$-band increases with the volume of the shell in $k$-space.  However, we assume the same number of modes are used in each band as a way to increase the variance for later validation purposes.  In this model, the decomposition of the covariance as described in Section~\ref{sc:model} is trivial and we set $D(\vec{k};\theta)={\rm diag}(C)$. 
573: 
574: % ``simulation'' = compute pop. mean and covariance
575: Our ``simulations'' for this model simply involve computing the true power spectrum mean and covariance from Eqns.~(\ref{eq:toymodel}) and (\ref{eq:toycov}).  We neglect the error in sample mean and covariance estimates unless explicitly stated.  In the principal component decompositions, we retain 7 modes in the mean and 2 modes in the log-variance.  We found that our method is numerically stable to retaining modes with small weights (i.e. more modes than necessary in the decomposition), although the MCMC sampling of these weights can be inefficient.  The GP model will automatically determine which weights are active in which directions of parameter space (see Fig.~\ref{fg:rhobox}).  We just have to make sure to use enough modes in the basis decomposition so that we do not lose important features in the response.
576: 
577: 
578: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
579: \subsection{Results}
580: 
581: The marginal posterior distributions for the GP correlation parameters $\brho_{w}$ and $\brho_{v}$ are summarized in Fig.~\ref{fg:rhobox}.  The boxes are centered on the medians and extend to the first and third quartiles, while the bars indicate the extent of samples in the tails of the distribution.  A $\rho=1$ indicates a linear interpolation of the surface (perfect correlation) in the given direction of parameter space for the given PC weight, while a $\rho=0$ indicates a rapidly varying surface.  From Eqns.~(\ref{eq:pcdecomp}), (\ref{eq:toymodel}), and (\ref{eq:toycov}), we can see that the parameter dependence of the PC weights is,
582: \begin{eqnarray}
583: 	w_{i} &=& \phi_{\mu,i}^{T}\,\mu(\kvec;\theta) \sim A\,\left(\phi_{\mu,i}^{T}\kvec^{-\slope}\right),\\
584: 	v_{i} &=& \phi_{D,i}^{T}\,\ln(D(\kvec;\theta) )\sim 
585: 	\ln(A)\,\left(\phi_{D,i}^{T}\ident\right) - 
586: 	 2\slope\,\left(\phi_{D,i}^{T}\ln\kvec\right).\nonumber
587: \end{eqnarray}
588: So $w_{i}(\theta)$ is linear in $A$ for fixed $\slope$ and $v_{i}(\theta)$ is linear in $\slope$ for fixed $A$.  This dependence is accurately reflected in the posteriors in Fig.~\ref{fg:rhobox} where $\rho_{w_{i},A}$ and $\rho_{v_{i},\slope}$ are tightly distributed near 1 for all the modes.  Although we retained 7 modes in the decomposition of the mean, $\mu(\kvec)$, only the 5 modes plotted in Fig.~\ref{fg:rhobox} showed active posterior distributions.
589: \begin{figure}[hb]
590: 	\centerline{
591: 		\scalebox{0.6}{\includegraphics{fig1.pdf}}
592: 	}	
593: 	\caption{\label{fg:rhobox}Boxplots of marginal posterior realizations of the GP correlation parameters for the PC weights of the mean (blue, circles) and covariance (red, triangles) of the power spectrum.  The points indicate the medians of the marginal posterior realizations while the boxes extend from the 1st to the 3rd quartiles.  The bars (frequently called ``whiskers'') indicate the extent of the tails of the distribution and extend to the most extreme sample point that is no more than 1.5 times the box length away from the box.}
594: \end{figure}	
595: 
596: In Fig.~\ref{fg:paramposts} we show comparison of the marginal parameter posteriors computed using the calibrated power spectrum distribution with the exact result (computed using standard Metropolis-Hastings MCMC).  The top panels show the results for a 30-point simulation design while the middle panels show the same results for a 7-point design.  The 30-point design results are nearly indistinguishable from the exact result, indicating the design points have sufficiently sampled the variation in the mean and covariance response surfaces.  The 7-point design results, however, show noticeable deviations from the exact result.  
597: 
598: The dotted blue lines in the middle panels show the posteriors obtained by fixing the parameters in the covariance to the ``true'' values (so the parameter dependence of the covariance is neglected).   We can see that the 7-point design posteriors are much closer to the exact result than to the fixed-covariance result.  We interpret this as indicating that the parameter dependence of the covariance is still captured, but with more noise than in the 30-point design. The ``bump'' in the tail of the marginal posterior for $\slope$ in the middle panel of Fig.~\ref{fg:paramposts} is an artifact of the interpolation error in this sparse design.  The ``bump'' occurs in a region of parameter space where the GP models attempt to extrapolate from the nearest design point to the edge of our parameter prior region.  However, the 7 points in the design only loosely constrain the GP parameters so the extrapolation is not well-defined.  We have confirmed that a different 7-point design realization can remove the ``bump'' in the $s$ posterior, but only at the expense of larger errors elsewhere in the joint posterior.    Figure~\ref{fg:vipost} shows the marginal posteriors for the variance PC weights for the 30-point and 7-point designs.  This gives a clear illustration of how the posterior distributions broaden (although asymmetrically) as the number of design points is reduced.
599: 
600: The bottom panels of Fig.~\ref{fg:paramposts} show the marginal parameter posteriors when a noisy estimate of the sample covariance is used in the design instead of the perfectly known population covariance.  We used $n_{r}=32$ realizations to estimate the variance at each design point.
601: %, which equals the number of bands, $n_{y}$, in the variance.  
602: %The threshold $n_{r}=n_{y}$ is where the number of realizations equals the number of degrees of freedom (for our diagonal covariance model) and is about where we expect the sample variance estimates to transition from being noisy ($n_{r}<n_{y}$) to well-determined ($n_{r}\gg n_{y}$).  
603: While deviations from the exact posteriors can be seen, the match with the exact result is quite close compared to the width of the posterior distributions.
604: \begin{figure}[ht]
605: 	\centerline{
606: 		\scalebox{0.4}{\includegraphics{fig2.pdf}}
607: 	}
608: 	\caption{\label{fg:paramposts}Marginal posteriors of the ``cosmological parameters.''  Black (dashed) is the exact result while red (solid) is the result from our model.  Top: 30 point design.  Middle: 7 point design.  The blue (dotted) lines show the posteriors obtained neglecting the parameter dependence of the covariance.  Bottom: 30 point design using the sample covariance estimated from $n_{r}=32$ realizations at each design point.}
609: \end{figure}
610: \begin{figure}[ht]
611: 	\centerline{
612: 		\scalebox{0.35}{\includegraphics{fig3.pdf}}
613: 	}
614: 	\caption{\label{fg:vipost} Marginal posteriors of the principal component weights for the power spectrum ``variance'' at the true cosmology, $v(\theta_{0})$.  The black (dashed) curves show the posteriors for the 30-point design, while the red (solid) curves show the 7-point design posteriors.}
615: \end{figure}
616: 
617: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
618: \subsection{Challenges for practical implementation}
619: Several complexities may arise in applying our method to the analysis of actual galaxy or weak lensing surveys.  A significant challenge for the simplified emulator demonstrated here will likely be the computation of converged covariance matrix estimates at each simulation design point.  
620: %Additional computational requirements may appear in finding appropriate parameterizations, $D(\theta)$, of the cosmological parameter dependence of the covariance, $\Sigma_{y}(D(\theta))$.  
621: However, the only costs incurred with more design points or band-powers in the observed power spectrum are the increased time for computing the Cholesky factorizations of the covariance matrices in the likelihood (see Appendix~\ref{sc:covmatexpressions}).
622: 
623: For estimating the covariance of the 3-dimensional matter power spectrum from N-body simulations, it was found in Ref.~\cite{meiksin99} that several hundred simulation realizations were needed to obtain converged estimates of the covariance for 20 bands in wavenumber.  If the 128-point design used in Ref.~\cite{habib07} for computing the mean power spectrum is also sufficient sampling for the covariance, then our simplified emulator could possibly require as many as $\sim 128\times 200 = 25600$ runs of an N-body code to calibrate the sample variance distribution of the 3-D dark matter power s	pectrum.  However, we expect a sparser sampling of the covariance would suffice in several directions of the 5-dimensional parameter space used in Ref.~\cite{habib07}.  In addition, once the parameterization of (the $N\times N$) $\Sigma_{y}$ is chosen, our formulation is only concerned with modelling a few of the $N(N+1)/2$ degrees of freedom in the covariance.  It may be possible to estimate the degrees of freedom of interest with substantially fewer power spectrum realizations than are needed to determine the entire covariance matrix.  And, combined with the smoothness assumptions in the GP models, the degrees of freedom in the covariance matrix might be jointly constrained across the simulation design with many fewer realizations than are needed to constrain the covariance at just one point in parameter space.  Finally, because estimates of the mean power spectrum also require several simulation realizations, the parameterized covariance could possibly be constrained without any additional simulation runs.  The techniques proposed in Refs.~\cite{hamilton05}~and~\cite{pope07} for estimating the power spectrum covariance with limited numbers of the N-body simulations could also potentially be useful for our framework.  However, more work may need to be done to accurately capture the effects of the survey window with these methods.  
624: 
625: % discuss revised statistical formulation to avoid explicit computation of covariance matrices
626: As detailed in Eqns.~(\ref{eq:finallike}) and (\ref{eq:finallike2}), these difficulties with estimating covariance matrices from simulations may be avoided by conditioning the emulator on the individual power spectrum realizations at each design point.  The potential challenge in this case is performing the Monte Carlo integral over the $(n_{y}\,\sum_{i}n_{r_{i}})$ $\Ds$ components in Eqn.~(\ref{eq:finallike2}).  This is not necessarily a computational obstacle if an appropriate proposal distribution for the Metropolis MCMC algorithm can be found.  Note that according to Eqn.~(\ref{eq:pcdecomp}), $(n_{y}-p_{D})$ components of $\Ds$ at each design point are i.i.d. Gaussian random variates; which should be easy to sample in an MCMC.  That leaves only $(p_{D}\sum_{i}n_{r_{i}})$ correlated components of $\Ds$ to sample.  We found that the prior on $v(\theta)$ is an excellent proposal distribution for computing the integral in Eqn.~(\ref{eq:vjointlike}), and this form may scale easily to more dimensions.
627: %These difficulties with estimating covariance matrices from simulations might be avoided by revising our statistical formulation.  Instead of conditioning the sample variance model in Eqn.~(\ref{eq:mvmodel}) on the sample mean and covariance estimates, we could condition directly on the individual power spectrum realizations at each design point.  Jointly conditioning on every simulation run means we need to compute a multivariate likelihood for an $n_{d}n_{r}n_{y}$-length vector, which could be $\sim 100\cdot10\cdot10= 10^{4}$ in practice.  In addition, the Monte Carlo marginalization over the covariance weights in Eqn.~(\ref{eq:vmarg}) would have to be extended to integrate over the basis weights for each design point; giving an $(n_{d}+1)p_{D}$-dimensional Monte Carlo integral to perform (this would be of order a few hundred dimensions in practice).  This is not necessarily a computational obstacle if an appropriate proposal distribution for the Metropolis MCMC algorithm can be found.  We found that the prior on $v(\theta)$ is an excellent proposal distribution for computing the integral in Eqn.~(\ref{eq:vmarg}), and this may scale easily to more dimensions.
628: 
629: Our toy model in Section~\ref{sc:toymodel} avoided the potentially complicated issue of parameterizing the cosmological parameter dependence of the power spectrum covariance.  While it is straightforward to calculate an eigenvalue spectrum, more general parameterizations will likely be needed for practical application of our method.  
630: There is a large literature on parameterizing covariance matrices~\cite{manly87,boik02,daniels06,pourahmadi07} that can be applied to this problem, but the choice of parameterization may be a significant complication beyond the toy model studied here.  
631: We describe how the parameterization of \cite{pourahmadi07} can fit into our framework in Appendix~\ref{sc:covparam}, but this remains untested in a numerical example.  
632: Because our statistical formulation is insensitive to the choice of parameterization, the only other practical difficulty might come from increased computation time in repeatedly constructing and deconstructing $\Sigma_{y}(D(\kvec;\theta))$.  This will have to be addressed on a case-by-case basis.
633: 
634: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
635: \section{\label{sc:conclusions}Conclusions}
636: 
637: We have demonstrated an extension to the statistical model of Ref.~\cite{habib07} to estimate cosmological parameters from the power spectrum using a sample variance distribution calibrated from simulations.  This framework allows modelling of arbitrary, parameter-dependent power spectrum covariance matrices given 
638: several realizations of the power spectrum at a fixed number of points in parameter space.  We have focused on modelling the covariance of a multivariate Normal model for the estimated power spectrum in order to capture the correlations induced by filtering a Gaussian CMB or galaxy map or from non-linear graviational evolution in the matter power spectrum.  
639: 
640: We tested the calibration of our model from simulations using a toy power-law model for the power spectrum.  In order to focus our tests, we used a simplified emulator that is conditioned on sample means and variances of the simulated power spectra rather than on the individual power spectrum realizations.  For this model, our calibration procedure converges quickly and is quite robust to reducing the number of simulation design points.  We expect that the requirement of computing converged sample covariance estimates at each design point is likely to be a strain on the simulation resources of actual galaxy and weak lensing survey analyses. Therefore, we have described a general formulation of the emulator that allows for constraining parameterized covariance matrices jointly with the other emulator parameters.  Again for our toy model, we have shown that while noisy covariance estimates bias the parameter constraints, the shift is small compared to the width of the parameter posterior distributions.
641: 
642: Our final goal with this work is to develop practical tools to aid in the estimation of cosmological parameters from future measurements of galaxy and cosmic shear power spectra.  As a next step we plan to demonstrate our calibration algorithm using N-body simulations of the dark matter density.  With N-body simulations, our framework provides the means to understand in which regimes modelling of non-linear evolution is important for estimating parameters and, as a related question, how much cosmological information can be extracted from non-linear scales in the dark matter distribution~\cite{taylor01,rimes05b,neyrinck06b}.  The non-trivial effects of the survey window on the power spectrum covariance discussed in Refs.~\cite{hamilton05} and~\cite{sefusatti06} could potentially lead to biases in inferred parameter constraints without the careful modelling allowed by our framework.  In particular, the scaling of the ``beat-coupling'' effect described in Ref.~\cite{hamilton05} with the fundamental modes in a survey implies extra parameter-dependence in the small-scale power spectrum covariance that could be significant in estimating cosmological parameters.  An emulator for N-body simulations will also provide valuable tests of the full emulator formulation presented here that conditions the GP models on the scatter between power spectrum realizations directly.  In this formulation (and a parameterization as in Appendix~\ref{sc:covparam}), it may be possible to model the parameter dependent power spectrum covariance without any more simulations than are needed to accurately estimate the mean power.
643: 
644: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
645: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
646: \begin{widetext}
647: \appendix
648: \section{\label{sc:notation} Notation}
649: See Table~\ref{tb:notationkey} for the key to the notation used in the paper.
650:  \begin{table}[hb]
651:    \begin{center}
652:      \begin{tabular}{clc}
653:        \hline \hline 
654:        Symbol & Description & Value\\
655:        \hline
656:        $n_y$  & number of band powers in $k$ & 32 \\
657:        $p_{\theta}$ & dimensionality of the parameter space & 2 \\
658:        $n_r$  & number of simulations runs at each design point  & NA\\
659:        $n_d$  & number of design points & 30,7\\
660:        $p_{\mu}$ & number of modes in decomposition of $\mu(\kvec;\theta)$ & 7\\
661:        $p_{D}$ & number of modes in decomposition of $\log(D(\kvec;\theta))$ & 2\\
662:        $\theta$ & cosmological parameters &\\
663:        $y(\kvec)$ & observed power spectrum &\\
664:        $\lemu$ & precision for the error in the PC decomposition of the mean\\
665:        $\led$ & precision for the error in the PC decomposition of the covariance\\
666:        $\lw=\left\{\lambda_{w,1},\dots,\lambda_{w,p_{\mu}}\right\}$ & precision of the GP models for the power spectra means &\\
667:        $\lv=\left\{\lambda_{v,1},\dots,\lambda_{v,p_{D}}\right\}$ & precision of the GP models for the power spectra variances &\\       
668:        $\rw=\left\{\rho_{w,1},\dots,\rho_{w,p_{\mu}p_{\theta}}\right\}$ & correlations of the GP models for the power spectra means &\\       
669:        $\rv=\left\{\rho_{v,1},\dots,\rho_{v,p_{D}p_{\theta}}\right\}$ & correlations of the GP models for the power spectra variances &\\              
670:        \hline \hline
671:      \end{tabular}
672:      \caption{\label{tb:notationkey}Key to the notation used in the paper.  The ``Value'' column indicates the values assigned in the validation tests of Section~\ref{sc:validation}.}
673:    \end{center}
674:  \end{table}
675:  
676:  
677: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
678: \section{\label{sc:covparam}Covariance matrix parameterization}
679: 
680: We require a covariance matrix parameterization that is general enough to be applied to a wide array of applications while remaining computationally tractable within our framework.  We focus on the generalized Cholesky decomposition described in Ref.~\cite{pourahmadi07}, although other choices may certainly be viable or even preferable for some applications.  For given $\theta$, we decompose the $n_{y}\times n_{y}$ covariance matrix $\Sigma_{y}$ as,
681: \begin{equation}\label{eq:covparam}
682: 	\mathsf{T}(\theta)\,\Sigma_{y}(\theta)\, \mathsf{T}^{T}(\theta) = \mathsf{D}(\theta) \quad \text{or} \quad \Sigma_{y}^{-1} = \mathsf{T}^{T}\,\mathsf{D}^{-1}\, \mathsf{T},
683: \end{equation}
684: where $\mathsf{D}$ is a diagonal matrix of strictly positive ``variances'' and $\mathsf{T}$ is a lower triangular matrix with ones on the diagonal and unconstrained off-diagonal elements 
685: \[
686: \varphi_{ij}\equiv - \mathsf{T}_{ij}\qquad 2\le i\le n_{y}, \quad j = 1,\dots,i-1.
687: \]
688: The fact that the $\varphi_{ij}$ are unconstrained makes this a computationally convenient parameterization.  In addition, because the decomposition of the inverse covariance is quadratic in the $\varphi_{ij}$, the conjugate prior for the $\varphi_{ij}$ is a Gaussian.  This will be very convenient when we specify our interpolation method below.  A conjugate Gaussian prior allows us to impose prior structure on $\Sigma_{y}$ via the mean and covariance of $\varphi_{ij}$.  Considered as a single column vector for given $\theta$,
689: \begin{equation}\label{eq:phiprior}
690: 	\varphi \sim \text{N}\left(\bar{\varphi},C_{\varphi}\right).
691: \end{equation}
692: Note that $\bar{\varphi}$ and $C_{\varphi}$ are independent of $\theta$ so that we can ``shrink'' the covariance matrix estimates towards a parameter-independent $\mathsf{T}$.  The prior mean, $\bar{\varphi}$, can be constructed from the generalized Cholesky decomposition of the the average sample covariance matrix from the simulation runs, 
693: \[
694: 	\hat{S}_{y} \equiv \frac{1}{n_{d}} \sum_{i=1}^{n_{d}} \tilde{S}_{y,i},
695: \]
696: where $\tilde{S}_{y,i}$ is the sample covariance matrix at the $i$th design point.  If there are not enough simulation runs to get good estimates of $\tilde{S}_{y}$, the sample covariance of the combined simulation runs could be used instead,
697: \[
698: 	\tilde{S}_{\text{design}} = \frac{1}{m} \sum_{i=1}^{m} (y_{i}-\mu_{i})(y_{i}-\mu_{i})^{T}.
699: \]
700: The prior covariance, $C_{\varphi}$, could be diagonal with separate variances for each $\varphi_{ij}$ when little prior knowledge about the structure of the $\Sigma_{y,i}$ is known.  A slightly more informative prior is the generalized inverse Wishart prior~\cite{brown94} with scale matrix $\hat{S}_{y}$ or $\tilde{S}_{\text{design}}$.  In this case, $C_{\varphi}$ takes a block diagonal structure as described in Eqns.~(12-17) of Ref.~\cite{daniels02}.
701: 
702: The number of components to model can be reduced by expanding $\varphi$ in a set of basis functions (or covariates) so that 
703: \begin{equation}\label{eq:phidecomp}
704: 	\varphi_{ij} = \sum_{k=1}^{p_{\varphi}} Z_{ij}^{k}\gamma_{k}\quad 
705: 	p_{\varphi} \le \half n_{y}(n_{y}-1).
706: \end{equation}
707: This decomposition preserves the quadratic dependence of the log-likelihood on the variables, so a conjugate Gaussian prior can be specified on the $\gamma_{k}$.
708: 
709: In analogy with Eqn.~(\ref{eq:gpdists}), we model the individual $\varphi_{i}$'s as GPs, with the same covariance structure as in Eqn.~(\ref{eq:GPcov}),
710: \begin{eqnarray}
711: 	\varphi_{i}(\theta) \sim &\text{GP}\left(\phibar_{i},\Sigma_{\varphi}(\theta;\lambda_{\varphi,i},\brho_{\varphi,i})\right) 
712: 	& i=1,\dots,\frac{n_{y}(n_{y}-1)}{2}.
713: \end{eqnarray}
714: Note that if the decomposition in Eqn.~(\ref{eq:phidecomp}) is used, then $\gamma_{i}$ can be substituted for $\varphi_{i}$ above.  Restricted to the design points, the prior for $\varphi$ becomes,
715: \begin{eqnarray}
716: 	\phis &\sim& {\rm N}\left(\phibar,\Sigma_{\varphi}^{*}(\blambda_{\varphi},
717: 	\brho_{\varphi})\right),
718: \end{eqnarray}
719: where $\phis$ has length $\half n_{y}(n_{y}-1)n_{d}$.
720: 
721: The sampling distribution for $\phis$ is just the product of the GP prior on $\phis$ times the prior in Eqn.~(\ref{eq:phiprior}), which gives an unnormalized Gaussian distribution for $\phis$,
722: \begin{eqnarray}\label{eq:phisamp}
723: 	&&\pi(\phis|\blambda_{\varphi},\brho_{\varphi}) = 
724: 	\left|C_{\varphi}\otimes\ident_{n_{d}}\right|^{-1/2}\,
725: 	\left|\Sigma_{\varphi}^{*}\right|^{-1/2}
726: 	\\
727: 	&&\times 
728: 	\exp\left\{ (\phis-\phibar)^{T}
729: 	\left[ \Sigma_{\varphi}^{*-1}+
730: 	\left(C_{\varphi}\otimes\ident_{n_{d}}\right)^{-1} \right]
731: 	(\phis-\phibar)\right\}.\nonumber
732: \end{eqnarray}
733: 
734: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
735: \section{\label{sc:emlike}Full emulator likelihood}
736: The expression for the joint likelihood of the data and simulation runs in Eqn.~(\ref{eq:finallike}) can be simplified further by performing the integral over $\mus$.   
737: If we collect all the $\mus$-dependent terms in the integrand of Eqn.~(\ref{eq:finallike}), we can write the conditional distribution for $\mus$ as,
738: \begin{eqnarray}\label{eq:musintegrand}
739: 	\pi(\mus|\Ys,\Ds,\phis,\hat{w}_{y},v_{0},\varphi_{0},\bomega) = 
740: 	L(\Ys|\mus,\Ds,\phis)\cdot \pi(\hat{w}|\hat{w}_{y},v_{0},\varphi_{0},\theta_{0},\bomega)\cdot \rprior(\mus|\lemu).
741: \end{eqnarray}
742: For this section, we have included the covariance matrix parameterization from Appendix~\ref{sc:covparam}, which accounts for the extra $\varphi$ factors above.  
743: %Because each term is Gaussian in $\mus$, the full conditional distribution is also Gaussian in $\mus$ and can therefore be sampled directly.  We just need to find the mean and covariance of the product of distributions above.  Considering just the arguments of the exponentials we have,
744: 
745: Using Eqn.~(\ref{eq:mvmodel}), we can write an explicit expression for the likelihood of the simulation design outputs,
746: \begin{eqnarray}\label{eq:simlikeexp}
747: 	\ln\left(L(\Ys|\mus,\Ds,\phis)\right) &=& -\frac{\mu_{s}^{2}}{2}
748: 	\sum_{i=1}^{n_{d}}\sum_{j=1}^{n_{r_{i}}} \left(\Yst_{ij}-\must_{i}\right)^{T} 
749: 	\Sigma_{y}^{-1}\left[\Ds_{i},\phis_{i}\right] \left(\Yst_{ij}-\must_{i}\right)\\
750: 	 &-&\half\sum_{i=1}^{n_{d}}\,n_{r_{i}}
751: 	\sum_{j=1}^{n_{y}}\left(D_{s}\ln\left(\Dst_{ij}\right)+D_{c,j}\right) 
752: 	+ \text{constant}
753: 	\nonumber\\
754: 	&=& -\frac{\mu_{s}^{2}}{2} \sum_{i=1}^{n_{d}}\left[
755: 	\left(\must_{i} - \bar{Y}^{*}_{i}\right)^{T}n_{r_{i}}\Sigma_{y,i}^{-1}
756: 	\left(\must_{i} - \bar{Y}^{*}_{i}\right)
757: 	- \bar{Y}^{*T}_{i}n_{r_{i}}\Sigma_{y,i}^{-1}\bar{Y}^{*}_{i} 
758: 	+ \sum_{j=1}^{n_{r_{i}}}\tilde{Y}^{*T}_{ij}\Sigma_{y,i}^{-1}\Yst_{ij}
759: 	\right],\nonumber
760: \end{eqnarray}
761: where $\tilde{Y}_{i}=\frac{1}{n_{r_{i}}} \sum_{j=1}^{n_{r_{i}}} Y_{ij}$ is the sample mean at each design point.
762: 
763: \begin{eqnarray}
764: 	-2\,\ln\left(\pi(\hat{w}|\hat{w}_{y},v_{0},
765: 	\varphi_{0},\theta_{0},\bomega)\right) 
766: 	&=& \left(\hat{w}-\Swwy\Swy^{-1}\hat{w}_{y}\right)^{T} 
767: 	\left(\Swb-\Swwy\Swy^{-1}\Swwy^{T}\right)^{-1}
768: 	\left(\hat{w}-\Swwy\Swy^{-1}\hat{w}_{y}\right)
769: 	\nonumber\\
770: 	&& + 
771: 	\ln\left| \Swb-\Swwy\Swy^{-1}\Swwy^{T} \right|
772: 	+ \text{constant}
773: 	\nonumber\\
774: 	&\equiv& \left(\hat{w}-z_{y}\right)^{T} S_{w_{y}}^{-1}
775: 	\left(\hat{w}-z_{y}\right) + \ln\left|S_{w_{y}}\right| + \text{constant},
776: \end{eqnarray}
777: where $\Swy\equiv \Slw + \left(\Phi_{\mu}^{T}W_{y}\Phi_{\mu}\right)^{-1}$ and $\Swb\equiv \lemu^{-1}\ident_{n_{d}p_{\mu}} + \Sigma_{\hat{w}}$,
778: \begin{eqnarray}
779: 	-2\,\ln\left(\rprior(\mus|\lemu)\right) 
780: 	&=& \lemu \mu^{*T}\left(\ident_{q}-\Phi_{\mu}\Phi_{\mu}^{T}\right)\mus 
781: 	-n_{d}(n_{y}-p_{\mu})\ln\left(\lemu\right) + \text{constant}.
782: \end{eqnarray}
783: %Combining these with Eqn.~\ref{eq:simlikeexp} the final expression is,
784: %\begin{eqnarray}
785: %	-2\,\ln\left(\pi(\mus|\Ys,\Ds,\phis,\hat{w}_{y},
786: %	v_{0},\varphi_{0},\bomega)\right) 
787: %	&=& \left(\mus - A_{\mu}^{-1}(F+G)\right)^{T} A_{\mu} 
788: %	\left(\mus - A_{\mu}^{-1}(F+G)\right) \nonumber\\
789: %	&&+
790: %	(F+G)^{T}A_{\mu}^{-1}(F+G) +\, \text{constant}
791: %\end{eqnarray}
792: %where
793: %\begin{eqnarray}
794: %	\left(A_{\mu}\right)_{ij} &=& \left(\Sigma_{\mu}^{-1}\right)_{ij} 
795: %	+ n_{r_{i}}\Sigma_{y,i}^{-1}\delta_{ij},
796: %	\qquad i,j=1,\dots,n_{d}\nonumber\\
797: %	\Sigma_{\mu}^{-1} &\equiv& 
798: %	\lemu\left(\ident_{q}-\Phi_{\mu}\Phi_{\mu}^{T}\right) 
799: %	+\Phi_{\mu}\left(\Swb-\Swwy\Swy^{-1}\Swwy^{T}\right)^{-1}\Phi_{\mu}^{T}
800: %	\nonumber\\
801: %	F &=& \sum_{i=1}^{n_{d}}n_{r_{i}}\Sigma_{y,i}^{-1}\tilde{Y}_{i}
802: %	\nonumber\\
803: %	G &=& \Phi_{\mu}\left(\Swb-\Swwy\Swy^{-1}\Swwy^{T}\right)^{-1} 
804: %	\Swwy\Slw^{-1}\hat{w}_{y},
805: %\end{eqnarray}
806: %%%%%%%%%%%%%%%%%%%%%
807: 
808: %Now that we have completed the square in $\mus$, we perform the 
809: %integral in Eqn.~\ref{eq:musintegrand} using the standard Gaussian formula, which gives a factor of $\left| A_{\mu}\right|^{-1/2}$.  
810: %The existing normalization factors are:
811: %\begin{equation}
812: %	\left|\Swb-\Swwy\Swy^{-1}\Swwy^{T}\right|^{-1/2}\,
813: %	\lemu^{n_{d}(n_{y}-p_{\mu})/2}\,
814: %	\left[\prod_{i=1}^{n_{d}}\prod_{j=1}^{n_{y}}\exp\left(
815: %	D_{s}\ln\left(\Dst_{ij}\right)+D_{c,j}\right)\right]^{-1/2}.
816: %\end{equation}
817: %Finally, the term left over after completing the square in the exponent is:
818: %\begin{equation}
819: %	\exp\left[-\half(F+G)^{T}A_{\mu}^{-1}(F+G)\right].
820: %\end{equation}
821: 
822: %\begin{eqnarray}
823: %	A_{y} &\equiv& \Phi_{\mu}^{T}\Sigma_{y}^{-1}\Phi_{\mu} \nonumber\\
824: %	N &\equiv& \tilde{\Phi}_{\mu}^{T}\Sigma_{y}^{-1}\Phi_{\mu} \nonumber\\
825: %	M &\equiv& A_{y}+S_{w_{y}}^{-1}\nonumber\\
826: %	z_{y} &\equiv& \Swwy\Swy^{-1}\hat{w}_{y} \nonumber
827: %\end{eqnarray}
828: 
829: 
830: The final expression for the joint likelihood of the data and simulation runs becomes,
831: \begin{align}\label{eq:finallike2}
832: 	L(y,\Ys|\theta_{0},\bomega) &= \int\int d\Ds\,d\phis\, \int dv_{0}\,d\phi_{0}\,\,
833: 	L(\Ys|\Ds,\phis,\hat{w}_{y},v_{0},\varphi_{0},\theta_{0},\bomega)\\
834: 	&\quad\times\pi(\hat{w}_{y}|v_{0},\varphi_{0})\cdot
835: 	\rprior(y|v_{0},\phi_{0})\cdot
836: 	\pi(v_{0},\hat{v}|\theta_{0},\bomega)\cdot\rprior(\Ds|\led)\cdot
837: 	\pi(\varphi_{0},\phis|\theta_{0},\bomega),\nonumber
838: \end{align}
839: with,
840: \begin{eqnarray}
841: 	-2\ln\left(L(\Ys|\Ds,\phis,\hat{w}_{y},v_{0},\phi_{0},\theta_{0},\bomega)\right)
842: 	&=& 
843: %	\ln\left|\Sigma_{y}\right| + \ln\left|S_{w_{y}}\right|
844: %	-n_{d}(n_{y}-p_{\mu})\ln(\lemu)
845: 	\ln\left|	C_{y} + \Sigma_{\mu_{p}}\right|
846: 	%+\ln\left|A_{y}+S_{w_{y}}^{-1}\right| + \ln\left|NM^{-1}N^{T}\right|
847: 	+(x^{*}-z)^{T}\left(C_{y}+\Sigma_{\mu_{p}}\right)^{-1}(x^{*}-z)
848: 	\nonumber\\
849: 	&&+\, \mu_{s}^{2}\sum_{i=1}^{n_{d}}\left[
850: 	\sum_{j=1}^{n_{r_{i}}}\left(\tilde{Y}^{*T}_{ij}\Sigma_{y,i}^{-1}\Yst_{ij}\right)
851: 	-\bar{Y}^{*T}_{i}n_{r_{i}}\Sigma_{y,i}^{-1}\bar{Y}^{*}_{i} \right],
852: %	\xhat^{T} A_{y}\xhat + z_{y}^{T}S_{w_{y}}^{-1}z_{y}
853: %	- \left(A_{y}\xhat + S_{w_{y}}^{-1}z_{y}\right)^{T}M^{-1}
854: %	\left(A_{y}\xhat + S_{w_{y}}^{-1}z_{y}\right)
855: %	\nonumber\\
856: %	&& -\, 
857: %	\left(\xhat - M^{-1}\left(A_{y}\xhat + S_{w_{y}}^{-1}z_{y}\right)\right)^{T}
858: %	N^{T}\left(NM^{-1}N^{T}\right)^{-1}N
859: %	\left(\xhat - M^{-1}\left(A_{y}\xhat + S_{w_{y}}^{-1}z_{y}\right)\right)
860: 	\nonumber\\
861: %	\ln\left| A_{\mu}\right| + 
862: %	\ln\left|\Swb-\Swwy\Swy^{-1}\Swwy^{T}\right|\notag\\
863: %	&&\qquad-\, n_{d}(n_{y}-p_{\mu})\ln(\lemu)+
864: %	\sum_{i=1}^{n_{d}}\sum_{j=1}^{n_{y}}\left(
865: %	D_{s}\ln\left(\Dst_{ij}\right)+D_{c,j}\right)\notag\\ 
866: %	&&\qquad\quad
867: %	+\left[(F+G)^{T}A_{\mu}^{-1}(F+G)\right]
868: %	- \hat{w}_{y}^{T}\Swy^{-1}\Swwy^{T}S_{\hat{w}}^{-1}\Swwy\Swy^{-1}
869: %	\hat{w}_{y}\notag\\
870: %	&&\qquad\qquad
871: %	- \sum_{i=1}^{n_{d}}\sum_{j=1}^{n_{r_{i}}}\,
872: %	\tilde{Y}_{ij}^{*T} W_{y_{i}} \Yst_{ij}
873: %	+ \text{const.},\notag\\	
874: 	-2\ln\left(\pi(\hat{w}_{y}|v_{0},\phi_{0})\right) &=& 
875: 	\ln\left|\bar{\Sigma}_{w_{y}}\right| + 
876: 	\hat{w}_{y}^{T}\bar{\Sigma}_{w_{y}}^{-1}\hat{w}_{y} 
877: 	+ \text{constant},\nonumber\\
878: 	-2\ln\left(\rprior(y|v_{0},\phi_{0})\right) &=&
879: 	-\ln\left|W_{y}\right|
880: 	-\ln\left|\Phi_{\mu}^{T}W_{y}\Phi_{\mu}\right| + 
881: 	\left(\tilde{y}-\Phi_{\mu}\hat{w}_{y}\right)^{T}W_{y}
882: 	\left(\tilde{y}-\Phi_{\mu}\hat{w}_{y}\right)\nonumber\\
883: 	&+& \text{constant},\nonumber\\
884: 	-2\ln\left(\pi(v_{0},\hat{v}|\theta_{0},\bomega)\right) &=&
885: 	\ln\left|S_{\hat{v}}\right| + 
886: 	\left(v_{0}-\Sigma_{\hat{v}v}^{T}\bar{\Sigma}_{\hat{v}}^{-1}\hat{v}\right)^{T}
887: 	S_{\hat{v}}^{-1}
888: 	\left(v_{0}-\Sigma_{\hat{v}v}^{T}\bar{\Sigma}_{\hat{v}}^{-1}\hat{v}\right)
889: 	\notag\\
890: 	&&\qquad + \ln\left|\bar{\Sigma}_{\hat{v}}\right| 
891: 	+ \hat{v}^{T}\bar{\Sigma}_{\hat{v}}^{-1}\hat{v},
892: 	\notag\\
893: 	-2\ln\left(\rprior(\Ds|\led)\right) &=& 
894: 	-\,n_{d}(n_{y}-p_{D})\ln(\led)
895: 	+ \led \ln\left(\tilde{D}^{*T}\right) \left(I_{q}-\Phi_{D}^{*}\Phi_{D}^{*T}
896: 	\right)\ln\left(\Dst\right),\notag\\
897: 	-2\ln\left(\pi(\phis,\varphi_{0}|\theta_{0},\bomega)\right) &=&
898: 	\text{extension of Eqn.~(\ref{eq:phisamp})},\notag
899: \end{eqnarray}
900:  $S_{\hat{v}}$ is the Schur complement of $\bar{\Sigma}_{\hat{v}}$ in the joint covariance for $\hat{v},v_{0}$,
901: %\[
902: %	\ln\left|\Sigma_{y}\right| = \sum_{i=1}^{n_{d}}n_{r_{i}}\sum_{j=1}^{n_{y}}\left(
903: %	D_{s}\ln\left(\Dst_{ij}\right)+D_{c,j}\right)
904: %\].
905: \begin{eqnarray}
906: 	C_{y,i}^{-1}\equiv \mu_{s}^{2}\,n_{r_{i}}
907: 	\Phi_{\mu}^{T}\Sigma_{y,i}^{-1}\Phi_{\mu},
908: 	\qquad
909: 	\Sigma_{\mu_{p}} \equiv\left(
910: 	\begin{array}{cc}
911: 		S_{w_{y}} & 0 \\
912: 		0 & \lemu^{-1}\ident_{n_{d}(n_{y}-p_{\mu})}
913: 	\end{array}
914: 	\right),
915: \end{eqnarray}
916: $x^{*}_{i}\equiv\Phi_{\mu}^{T}\bar{Y}^{*}_{i}$ for $i=1\dots n_{d}$, and,
917: \[
918: 	z \equiv \left(
919: 	\begin{array}{c}
920: 		\Swwy\Swy^{-1}\hat{w}_{y}\\
921: 		0
922: 	\end{array}
923: 	\right) \qquad (\text{dimensions:}\, n_{y}\times 1).
924: \]
925: 
926: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
927: \section{\label{sc:proposal} Proposal distributions for Metropolis MCMC updates}
928: %\subsection{Metropolis updates on $v_{0}$}
929: We use the prior on $v(\theta)$ as a proposal distribution for the Metropolis updates in performing the Monte Carlo integral in Eqns.~(\ref{eq:finallike}) or (\ref{eq:finallike2}).  We rewrite the joint prior in Eqn.~(\ref{eq:vjointlike}) as
930: \begin{equation}
931: 	\pi(v,\hat{v}|\theta_0,\led,\lv,\rv) = \pi(v|\hat{v},\theta_{0},\led,\lv,\rv)\cdot 
932: 	\pi(\hat{v}|\led,\lv,\rv),
933: \end{equation}
934: where, using Eqn.~(\ref{eq:vjointlike}) and the conditional Normal rule,
935: \begin{eqnarray}
936: 	v|\hat{v},\theta_{0},\led,\lv,\rv &\sim& {\rm N}\left( \Sigma_{\hat{v}v}^{T}
937: 	\bar{\Sigma}_{\hat{v}}^{-1}\hat{v}, \Sigma_{\lambda_{v}} - 
938: 	\Sigma_{\hat{v}v}^{T}\bar{\Sigma}_{\hat{v}}^{-1}\Sigma_{\hat{v}v}\right), \nonumber\\
939: 	\hat{v}|\led,\lv,\rv &\sim& {\rm N}\left(0,\bar{\Sigma}_{\hat{v}}\right),
940: \end{eqnarray}
941: and we have defined the shorthand, $\bar{\Sigma}_{\hat{v}}\equiv \led^{-1}\ident + \Sigma_{\hat{v}}$.  
942:  
943:  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
944:  \section{\label{sc:priors} Priors for the emulator hyperparameters}
945:  The full joint posterior of the cosmological and GP parameters is,
946:  \begin{eqnarray}
947:  	\pi\left( \theta_{0},\lemu,\lw,\rw,\led,\lv,\rv| y,\tilde{\mu}^{*},\tilde{D}^{*}\right)
948: 	&\propto& L\left( y,\tilde{\mu}^{*},\tilde{D}^{*}| 
949: 	\theta_{0},\lemu,\lw,\rw,\led,\lv,\rv\right) \nonumber\\*
950: 	&\times& \pi\left(\lemu\right)\pi(\lw)\pi(\rw)
951: 	\pi\left(\led\right)\pi(\lv)\pi(\rv)\pi(\theta_{0}),\nonumber
952:  \end{eqnarray}
953:  where the likelihood is given in Eqns.~(\ref{eq:finallike2}) or (\ref{eq:simpemlike}), and copying~\cite{habib07},
954:  \begin{eqnarray}
955:  	\pi\left(\lemu\right) &\propto& \lemu^{a_{\mu}-1} e^{-b_{\mu}\lemu}, 
956: 	\nonumber\\
957: 	\pi(\lw) &\propto& \prod_{i=1}^{p_{\mu}} \lambda_{w_i}^{a_w-1} 
958: 	e^{-b_w\lambda_{w_i}}, 
959: 	\nonumber\\
960: 	\pi(\rw) &\propto& \prod_{i=1}^{p_{\mu}}
961: 	\prod_{j=1}^{p_{\theta}} \rho_{w_{ij}}^{a_{\rho_w}-1} 
962: 	(1-\rho_{w_{ij}})^{b_{\rho_w}-1}, 
963: 	\nonumber\\
964: 	\pi\left(\led\right) &\propto& \led^{a_{D}-1} e^{-b_{D}\led}, 
965: 	\nonumber\\
966: 	\pi(\lv) &\propto& \prod_{i=1}^{p_{D}} 
967: 	\lambda_{v_i}^{a_v-1} e^{-b_v\lambda_{v_i}},
968: 	\nonumber\\
969: 	\pi(\rv) &\propto& \prod_{i=1}^{p_{D}} \prod_{j=1}^{p_{\theta}}
970: 	\rho_{v_{ij}}^{a_{\rho_v}-1} (1-\rho_{v_{ij}})^{b_{\rho_v}-1},\,\text{and}
971: 	\nonumber\\
972: 	\pi(\theta_{0}) &=& {\rm uniform}(0,1)\, 
973: 	\text{for each}\, \theta_{0,i}\quad
974: 	i=1,\dots,p_{\theta},
975:  \end{eqnarray}
976:  with
977: % \begin{eqnarray}
978: %   a_{\mu}' &\equiv& a_{\mu}+\half n_{d}\left(n_y-p_{\mu}\right), \nonumber\\
979: %   b_{\mu}' &\equiv& b_{\mu}+\half \mu^{*T}\left(
980: %     I-\Phi_{\mu}\Phi_{\mu}^T \right)\mu^*, \nonumber\\
981: %   a_{D}' &\equiv& a_{D}+\half n_{d}\left(n_y-p_{D}\right), \nonumber\\
982: %   b_{D}' &\equiv& b_{D}+\half D^{*T}\left(
983: %     I-\Phi_{D}\Phi_{D}^T \right)D^*,
984: % \end{eqnarray}
985: $a_{\mu}=a_{D}=1$, $b_{\mu}=b_{D}=0.0001$, $a_{w}=a_{v}=5$, $b_{w}=b_{v}=5$, 
986:  $a_{\rho_{w}}=a_{\rho_{v}}=1$, $b_{\rho_{w}}=b_{\rho_{v}}=0.2$.
987:  
988: % The posterior for the GP parameters given the reduced simulation outputs is,
989: % \begin{align} \label{eq:simppost}
990: %   \pi\left( \lemu,\led,\lambda_w,\rho_w,\lambda_v,\rho_v
991: %     |\hat{w},\hat{v}\right) &\propto 
992: % %   \int dw\,dv\,
993: % %   L(\hat{w}|w,\lambda_{\mu})\cdot L(\hat{v}|v,\lambda_D) \cdot
994: % %   \pi(w|\lambda_w,\rho_w)\cdot
995: % %   \pi(v|\lambda_v,\rho_v) \nonumber\\
996: % %   &\qquad \cdot \pi(\lambda_{\mu})\cdot \pi(\lambda_D)\cdot
997: % %   \pi(\lambda_w)\cdot \pi(\rho_w)\cdot
998: % %   \pi(\lambda_v) \cdot\pi(\rho_v) \nonumber\\\nonumber\\
999: %   \left|\lemu^{-1}I + \Sigma_w\right|^{-1/2}
1000: %   \exp\left[-\half \hat{w}\left(
1001: %       \lemu^{-1}I+\Sigma_w\right)^{-1} \hat{w}\right] \nonumber\\
1002: %   &\qquad\times \left|\led^{-1}I + \Sigma_v\right|^{-1/2}
1003: %   \exp\left[-\half \hat{v}\left(
1004: %       \led^{-1}I+\Sigma_v\right)^{-1} \hat{v}\right] \notag\\
1005: %   &\qquad\times \lemu^{a_{\mu}'-1}
1006: %   e^{-b_{\mu}'\lemu}\, \led^{a_{D}'-1}
1007: %   e^{-b_{D}'\led}\, \notag\\
1008: %   &\qquad\times\prod_{i=1}^{p_{\mu}} \lambda_{w_i}^{a_w-1}
1009: %   e^{-b_w\lambda_{w_i}}\, \prod_{i=1}^{p_{\mu}}
1010: %   \prod_{j=1}^{p_{\theta}} \rho_{w_{ij}}^{a_{\rho_w}-1} (1-\rho_{w_{ij}})^{b_{\rho_w}-1} \notag\\
1011: %   &\qquad\times\prod_{i=1}^{p_{D}} \lambda_{v_i}^{a_v-1}
1012: %   e^{-b_v\lambda_{v_i}}\, \prod_{i=1}^{p_{D}} \prod_{j=1}^{p_{\theta}}
1013: %   \rho_{v_{ij}}^{a_{\rho_v}-1} (1-\rho_{v_{ij}})^{b_{\rho_v}-1}
1014: % \end{align}
1015: % with 
1016: % \begin{align}
1017: %   a_{\mu}' &\equiv a_{\mu}+\half m\left(n_y-p_{\mu}\right) \notag\\
1018: %   b_{\mu}' &\equiv b_{\mu}+\half \mu^{*T}\left(
1019: %     I-\Phi_{\mu}\Phi_{\mu}^T \right)\mu^* \notag\\
1020: %   a_{D}' &\equiv a_{D}+\half m\left(n_y-p_{D}\right) \notag\\
1021: %   b_{D}' &\equiv b_{D}+\half D^{*T}\left(
1022: %     I-\Phi_{D}\Phi_{D}^T \right)D^* 
1023: % \end{align}
1024: % Prior hyperparameters: $a_{\mu},b_{\mu},a_D,b_D,
1025: % a_w,b_w,a_{\rho_w},b_{\rho_w}, a_v,b_v, a_{\rho_v},b_{\rho_v}$.
1026: 
1027: \section{\label{sc:covmatexpressions}Explicit expressions for covariance matrices}
1028: To evaluate the distributions in Eqns.~(\ref{eq:wjointlike}) and (\ref{eq:vjointlike}), we use,
1029:  \begin{eqnarray}
1030: 	\Sigma_{\lambda_{w}} &=& {\rm diag}\left(\lambda_{w_{i}}^{-1}\right) \qquad
1031: 	(\text{dimensions:}\,p_{\mu}\times p_{\mu}), \nonumber\\
1032: 	\Sigma_{\lambda_{v}} &=& {\rm diag}\left(\lambda_{v_{i}}^{-1}\right) \qquad
1033: 	(\text{dimensions:}\,p_{D}\times p_{D}),\nonumber
1034: \end{eqnarray}
1035: \begin{equation}
1036: 	\Sigma_{\hat{w}} = \left(
1037: 	\begin{array}{ccc}
1038: 	\Lambda_{w_{1}} & 0 & 0 \\
1039: 	0 & \ddots & 0 \\
1040: 	0 & 0 & \Lambda_{w_{p_{\mu}}}
1041: 	\end{array}\right) \qquad (\text{dimensions:}\,(n_{d}\,p_{\mu})\times(n_{d}\,p_{\mu})),
1042: \end{equation}
1043: following Eqn.~(15) of Ref.~\cite{habib07}, with,
1044: \begin{equation}
1045: 	\Lambda_{w_{i}} = \lambda_{w_{i}}^{-1} R(\theta^{*};\rho_{w_{i}}) \qquad
1046: 	(\text{dimensions:}\,n_{d}\times n_{d}),
1047: \end{equation}
1048: \begin{equation}
1049: 	\Sigma_{\hat{w}\, w_{y}} = \left(
1050: 	\begin{array}{ccc}
1051: 	\lambda_{w_{1}}^{-1}R(\theta,\theta^{*};\rho_{w_{1}}) & 0 & 0 \\
1052: 	0 & \ddots & 0 \\
1053: 	0 & 0 & \lambda_{w_{p_{\mu}}}^{-1}R(\theta,\theta^{*};\rho_{w_{p_{\mu}}})
1054: 	\end{array}\right)
1055: 	\qquad (\text{dimensions:}\,(n_{d}\,p_{\mu})\times p_{\mu}),
1056: \end{equation}
1057: and $R(\theta,\theta^{*};\rho_{w_{i}})$ is a $n_{d}\times 1$ correlation sub-matrix.  Analogous expressions hold for $\Sigma_{\hat{v}}$ and $\Sigma{\hat{v}v}$.
1058: 
1059: We invert the full covariance matrices in  Eqns.~(\ref{eq:wjointlike}) and (\ref{eq:vjointlike}) using the block-inverse formula,
1060: \begin{equation}
1061: 	\left(\begin{array}{cc}
1062: 	A & B \\
1063: 	B^{T} & D
1064: 	\end{array}\right)^{-1}
1065: 	= \left(\begin{array}{cc}
1066: 	(A-BD^{-1}B^{T})^{-1} & -A^{-1}B(D-B^{T}A^{-1}B)^{-1} \\
1067: 	-(D-B^{T}A^{-1}B)^{-1}B^{T}A^{-1} & (D-B^{T}A^{-1}B)^{-1}
1068: 	\end{array}\right).
1069: \end{equation}
1070: For the $w$ likelihood, $A=\lambda_{\epsilon_{\mu}}^{-1}\ident_{n_{d}\,p_{\mu}} + \Sigma_{\hat{w}}$, $B = \Sigma_{\hat{w}\,w_{y}}$, $D = \Sigma_{\lambda_{w}} + (\Phi_{\mu}^TW_{y}\Phi_{\mu})^{-1}$.  For the $v$ likelihood, $A=\lambda_{\epsilon_{D}}^{-1} \ident_{n_{d}p_{D}} + \Sigma_{\hat{v}}$, $B=\Sigma_{\hat{v}\,v}$, $D=\Sigma_{\lambda_{v}}$.
1071: 
1072: \end{widetext}
1073: 
1074: \begin{acknowledgments}
1075: We would like to thank Roman Scoccimarro, Benjamin Wandelt, and Martin White for useful discussions.  S.H., K.H., and D.H. acknowledge support from the LANL
1076: LDRD program.  The computations for this paper were carried out using the Scythe Statistical Library~\cite{scythestatlib}.  This work was supported in part at UC Davis by NSF Grant AST-0709498.
1077: \end{acknowledgments}
1078: 
1079: \bibliography{compexp}
1080: \end{document}
1081: