1: %\documentstyle[aps,twocolumn,prl,tighten,flushrt]{revtex} %eqsecnum,
2: \documentstyle[aps,twocolumn,tighten]{revtex}
3: %\documentstyle[preprint,aps,prl]{revtex}
4:
5: %\documentclass{revtex}
6: %\usepackage{aps}
7: %\usepackage{twocolumn}
8: %\usepackage{tighten}
9:
10: \def\btt#1{{\tt$\backslash$#1}}
11: \input epsf
12: \def\plotone#1{\centering \leavevmode
13: \epsfxsize= 1.0\columnwidth \epsfbox{#1}}
14: \def\plottwo#1{\centering \leavevmode
15: \epsfxsize= 1.0\columnwidth \epsfbox{#1}}
16: \def\plotfiddle#1#2#3#4#5#6#7{\centering \leavevmode
17: \vbox to#2{\rule{0pt}{#2}}
18: \special{psfile=#1 voffset=#7 hoffset=#6 vscale=#5 hscale=#4 angle=#3}}
19: %\def\plotrotate#1{\centering \leavevmode
20: %\epsfxsize= 0.8\columnwidth \epsfbox{#1 {angle=-90}}}
21: \def\plotrotate#1{\centering
22: %\leavevmode
23: %\epsfxsize= 1.7\columnwidth
24: \epsfbox{#1 angle=-90}}
25:
26: \def\bff{}
27: %\newcommand\apj[3]{ {\it Astrophys. J.} {\bf #1}, #2 (19#3) }
28: \def\apjl{Astrophys. J. Lett.}
29: %\newcommand\prd[3]{ {\it Physical Review D} {\bf #1}, #2 (19#3) }
30: %\newcommand\prl[3]{ {\it Physical Review Letters} {\bf #1}, #2 (19#3) }
31: %\newcommand\np[3]{ {\it Nucl.~Phys.} {\bf #1}, #2 (19#3) }
32: \def\mnras{Mon.Not.Roy.As.Soc.}
33: \def\araa{Annu. Rev. Astron. Astrophys.}
34: \def\aj{Astron. J.}
35: \def\asap{Astron. Astrophys.}
36:
37: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
38: %%%%%%%%%%%%%%%%%%% begin local macros %%%%%%%%%%%%%%%%%%%%%%
39: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
40: \def\be{\begin{equation}}
41: \def\ee{\end{equation}}
42: \def\bea{\begin{eqnarray}}
43: \def\eea{\end{eqnarray}}
44: \def\sm{{\rm M}_\odot}
45: \def\uline#1{$\underline{\smash{\hbox{#1}}}$}
46:
47: \def\muk{\mu{\rm K}}
48:
49: \def\ang{\,{\rm\AA}}
50: \def\flux{\,{\rm erg\,cm^{-2}\,arcsec^{-2}\,\AA^{-1}\,s^{-1}}}
51: \def\GeV{\,{\rm GeV}}
52: \def\TeV{\,{\rm TeV}}
53: \def\gev{\,{\rm GeV}}
54: \def\keV{\,{\rm keV}}
55: \def\MeV{\,{\rm MeV}}
56: \def\sec{\,{\rm sec}}
57: \def\Gyr{\,{\rm Gyr}}
58: \def\yr{\,{\rm yr}}
59: \def\rcm{\,{\rm cm}}
60: \def\pc{\,{\rm pc}}
61: \def\kpc{\,{\rm kpc}}
62: \def\Mpc{\,{\rm Mpc}}
63: \def\mpc{\,{\rm Mpc}}
64: \def\eV{{\,\rm eV}}
65: \def\ev{{\,\rm eV}}
66: \def\erg{{\,\rm erg}}
67: \def\cmm2{{\,\rm cm^{-2}}}
68: \def\cm2{{\,{\rm cm}^2}}
69: \def\cmm3{{\,{\rm cm}^{-3}}}
70: \def\gcmm3{{\,{\rm g\,cm^{-3}}}}
71: \def\kms{\,{\rm km\,s^{-1}}}
72: \def\HO{{100h\,{\rm km\,sec^{-1}\,Mpc^{-1}}}}
73: \def\mpl{{m_{\rm Pl}}}
74: \def\mpp{{m_{\rm Pl,0}}}
75: \def\trh{T_{\rm RH}}
76: \def\g{\tilde g}
77: \def\R{{\cal R}}
78: \def\zl{z_{\rm LSS}}
79: \def\zeq{z_{\rm EQ}}
80: \def\he{$^4$He}
81: \def\VEV#1{\left\langle #1\right\rangle}
82: \def\fun#1#2{\lower3.6pt\vbox{\baselineskip0pt\lineskip.9pt
83: \ialign{$\mathsurround=0pt#1\hfil##\hfil$\crcr#2\crcr\sim\crcr}}}
84: \def\C{{\cal C}}
85: \def\TBD{{\bf TBD}}
86: \def\muK{\mu {\rm K}}
87: \def\hyi{H\thinspace{$\scriptstyle{\rm I}$}~}
88: \def\hii{H\thinspace{$\scriptstyle{\rm II}$}~}
89:
90: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
91: %%%%%%%%%%%% MST local macros %%%%%%%%%%%%%%%%%%%%%%%%%%%
92: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
93: \def\figcapsize{\footnotesize \baselineskip=0.1cm}
94: \def\vec{\bf}
95: \def\reff#1{$^{{#1})}$}
96: %\def\lq{\char 140}
97: \def\cc{{\rm~cm}^{-3}}
98: \def\hmpc{{\, {\rm h}^{-1}~\rm Mpc}}
99: \def\hkpc{{\, {\rm h}^{-1}~\rm kpc}}
100: \def\kms{{\rm~km~s^{-1}}}
101: \def\kev{{\rm~keV}}
102: \def\km{{\rm~km}}
103: \def\kpc{{\rm~kpc}}
104: \def\mpc{{\rm~Mpc}}
105: \def\msun{{\,M_\odot}}
106: \def\hmsun{{\, {\rm h}^{-1}\,M_\odot}}
107: \def\lsun{{\,L_\odot}}
108: \def\yr{{\rm~yr}}
109: \def\td{\Upsilon_d}
110: \def\Kel{{\rm~K}}
111: \def\deg{^\circ}
112: %\def\'{^{\prime}}
113: \def\avrg#1{{\langle #1 \rangle}}
114: \def\bra#1{{\langle #1 \vert}}
115: \def\ket#1{{\vert #1 \rangle}}
116: \def\abs#1{{\vert #1 \vert}}
117: \def\real{\Re}
118: \def\imag{\Im}
119: \def\vk{{\vec k}}
120: \def\vr{{\vec r}}
121: \def\vv{{\vec v}}
122: \def\vr{{\vec r}}
123: \def\vv{{\vec v}}
124: \def\vx{{\vec x}}
125: \def\vq{{\vec q}}
126: \def\bv{{\bf v}}
127: \def\bs{{\bf s}}
128: \def\bk{{\bf k}}
129: \def\br{{\bf r}}
130: \def\brpk{{\bf r}_{pk}}
131: \def\bx{{\bf x}}
132: \def\bxpk{{\bf x}_{pk}}
133: \def\bR{{\bf R}}
134: \def\bV{{\bf V}}
135: \def\bX{{\bf X}}
136: \def\bq{{\bf q}}
137: \def\bsb{{\bf s}_{\rm b}}
138: \def\bsf{{\bf s}_{\rm f}}
139: \def\hk{{\hat k}}
140: \def\hr{{\hat r}}
141: \def\hx{{\hat x}}
142: \def\hq{{\hat q}}
143: \def\c{{\cal C}}
144: \def\X{{\cal K}}
145: \def\eps{\varepsilon}
146: \def\pomega{\varpi}
147: \def\lbar{{\mathchar'26\mskip-9mu\lambda}}
148: \def\vs{\vskip 16pt}
149: \def\oneskip{\vskip 16pt}
150: \def\eg{{e.g., }}
151: \def\ie{{i.e., }}
152: \def\etal{{\it et al. }}
153: \def\et{{et al. }}
154: \def\etc{{etc. }}
155: \def\via{{via }}
156: \def\half{{\textstyle{1\over2}}}
157: \def\Rth{{R_{TH}}}
158: \def\Rf{{R_{ G}}}
159: \def\Rs{{R_{s}}}
160: \def\Rpk{{R_{pk}}}
161: \def\p3m{P$^3$M}
162: \def\zbox{z_{\rm box}}
163: \def\Tr{{\rm Tr}\,}
164: \def\rC{{\rm C}}
165: \def\mpl{m_{\cal P}}
166: \def\hz{{\hat z}}
167: \def\la{\mathrel{\mathpalette\fun <}}
168: \def\ga{\mathrel{\mathpalette\fun >}}
169: \def\fun#1#2{\lower3.6pt\vbox{\baselineskip0pt\lineskip.9pt
170: \ialign{$\mathsurround=0pt#1\hfil##\hfil$\crcr#2\crcr\sim\crcr}}}
171: %%%%%%%%%
172: \def\boldsymbol{\bf}
173:
174: \def\boldcdot{\mathbin{{\boldsymbol\cdot}}}
175: \def\boldnabla{{\boldsymbol\nabla}}
176: \font\BF=cmmib10
177: \def\gam{\hat{\gamma}}
178: \def\k{{\hbox{\BF k}}}
179: \def\x{{\hbox{\BF x}}}
180: \def\r{{\hbox{\BF r}}}
181: \def\u{{\hbox{\BF u}}}
182: \def\v{{\hbox{\BF v}}}
183: \def\d{\delta}
184: \def\dD{\delta_{\rm D}}
185: \newcommand{\lexp}{\mathop{\langle}}
186: \newcommand{\rexp}{\mathop{\rangle}}
187: \newcommand{\order}[1]{{\cal O}(#1)}
188: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
189: %%%%%%%%%%%% end local macros %%%%%%%%%%%%%%%%%%%%%%%%%%%
190: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
191:
192: \begin{document}
193: \twocolumn[\hsize\textwidth\columnwidth\hsize\csname @twocolumnfalse\endcsname
194: \draft
195: \title{CMB Power Spectrum Estimation via Hierarchical Decomposition}
196: \author{Olivier\ Dor\'e$^{1}$, Lloyd\ Knox$^{2}$,
197: and Alan\ Peel$^{2}$}
198: \address{$^1$ Institut d'Astrophysique de Paris,
199: 98bis Boulevard Arago, F-75014 Paris, FRANCE; dore@iap.fr}
200: \address{$^2$ Department of Physics, One Shields Avenue,
201: University of California, Davis, California 95616, USA;
202: \\ lknox@ucdavis.edu, apeel@bubba.ucdavis.edu}
203: \date{\today}
204: \maketitle
205:
206: \begin{abstract}
207:
208: We have developed a fast, accurate and generally applicable method for
209: inferring the power spectrum and its uncertainties from maps of the cosmic
210: microwave background (CMB) in the presence of inhomogeneous and correlated
211: noise. For maps with $10^4$ to $10^5$ pixels, we apply an exact power
212: spectrum estimation algorithm to submaps of the data at various
213: resolutions, and then combine the results in an optimal manner. To
214: analyze larger maps efficiently one must resort to sub--optimal
215: combinations in which cross--map power spectrum error correlations are
216: only calculated approximately. We expect such approximations to work well
217: in general, and in particular for the megapixel maps to come from the next
218: generation of satellite missions.
219:
220: \end{abstract}
221: \pacs{98.70.Vc}
222: %\narrowtext
223: ]
224: \section{Introduction}
225:
226: The anisotropy of the Cosmic Microwave Background (CMB) is proving to be a
227: powerful cosmological probe \cite{jaffe00}. Many cosmological parameters,
228: and the primordial power spectra of density and gravity--wave
229: perturbations, can be inferred from the statistical properties of the
230: CMB---in particular from its angular power spectrum \cite{forecast}.
231: Unfortunately, exact methods for calculating the power spectrum and its
232: uncertainties from real observations are very expensive
233: computationally\cite{bcjk}. Supercomputers are required for analysis of
234: current datasets and even they will not be sufficient for the next
235: generation of experiments \cite{borrill99}. Here we introduce an
236: approximate method for reducing a CMB map to a power spectrum and its
237: uncertainties.
238:
239: Generally applicable exact methods for finding the angular power spectrum,
240: $C_l$, that maximize the likelihood have operation counts proportional to
241: $N^3$ where $N$ is the number of pixels in the map. Our approach to
242: overcoming this $N^3$ scaling involves a hierarchical decomposition of the
243: map into a set of submaps. That is, we subdivide the original
244: (``primary'') map into non--overlapping regions, each with a manageable
245: number of pixels, in order to estimate the power spectrum from each of
246: these submaps using an exact algorithm. To study the larger angular scale
247: fluctuations we coarsen the primary map and if the number of these coarse
248: pixels is still too large, we again divide into submaps. To go to yet
249: larger angular scales, we coarsen the map further, etc... Then we
250: calculate the expected correlations between the power spectrum estimates
251: from all different submaps at all different resolution levels in order to
252: optimally average them together. A similar multi--grid technique was
253: recently developed for the reduction of time--ordered CMB data to
254: maps\cite{dore01}.
255:
256: Several other approaches to overcoming the $N^3$ scaling have been tried.
257: These include the ``pseudo--$C_l$'' method of \cite{wandelt}, and the
258: ``correlation--function'' approach of \cite{szapudi00}. We expect these
259: methods to work well in the case of homogeneous noise, but to be
260: significantly sub--optimal for the levels of inhomogeneity expected in
261: planned observational programs. None of these methods has been shown to
262: deal properly with correlated noise. Minor modifications of the
263: correlation--function approach may make this path very attractive, though
264: a remaining issue is the importance of noise correlations between pixels.
265:
266: The $N^3$ scaling has been overcome also by a special--purpose exact
267: method that is expected to be applicable to the maps generated by NASA's
268: Microwave Anisotropy Probe ({\it MAP\/})
269: satellite\footnote{\texttt{http://map.gsfc.nasa.gov/}}. This method
270: \cite{oh99} assumes the noise is not correlated from one pixel to another
271: and that the noise level variations are roughly azimuthally symmetric.
272: Some of the techniques used in \cite{oh99} may eventually find their place
273: in more generally applicable (and yet still exact) power spectrum
274: estimation algorithms, though the feasibility is not yet clear. Another
275: special--purpose exact algorithm is that of \cite{wandelt01}, which is
276: applicable to experiments that scan on rings. The main idea is to analyze
277: ring sets instead of maps since both the noise and signal covariance
278: structures are simple on the rings, whereas the noise structure can be
279: complicated in the map space. Although some of its critical hypotheses
280: have not been tested yet on realistic data, the ring--set approach might
281: still be of practical importance since it may provide a useful
282: zeroth--order solution for experiments that nearly scan on rings.
283:
284: In section II we describe our method in detail. In section III we present
285: the results of an application to a map with ten thousand pixels
286: ---comparable to the size of maps coming from long--duration balloon (LDB)
287: flights. In section IV we show results from a map four times larger and
288: discuss prospects for application of our method to even larger maps such
289: as those expected from {\it MAP\/} and {\it
290: Planck\/}\footnote{\texttt{http://astro.estec.esa.nl/SA-general/Projects/Planck/}}.
291: In section V we compare with other methods. In section VI we conclude.
292:
293: \section{Method}
294:
295: Here we first describe our method in the simplest conceptual terms, and
296: then go on to discuss subtleties which complicate our implementation.
297:
298: \subsection{From the Likelihood Function to the Quadratic Estimator}
299:
300: We describe here the use of a quadratic estimator to find the maximum of
301: the likelihood function, and the shape of the likelihood function near
302: that maximum, as described in \cite{bjk98}. Time--ordered data from
303: observation of the CMB are usually reduced to a set of pixelized maps
304: $\Delta_i$, $i=1,\ldots N$ which can be decomposed into the sum of a
305: signal and a noise contribution, $ \Delta = s + n \ $. Assuming that both
306: the noise and the signal are normally--distributed, their statistical
307: properties are fully characterized by the covariance matrices $S = \langle
308: s s^T \rangle$ and $N = \langle n n^T\rangle$. Assuming furthermore that
309: the noise and signal are not correlated with each other, we can define
310: \be
311: C \equiv \langle\Delta \Delta^T\rangle = S + N \; .
312: \ee
313:
314: The observed sky signal is assumed to be the realization of an isotropic
315: Gaussian random field whose power spectrum $C_l$ is the quantity we
316: want to measure. Thus we are interested in the likelihood function
317: ${\mathcal L}(\Delta \mid C_l)$ which is given by
318: \be
319: -2 \ln {\mathcal L}(\Delta \mid C_l) = \ln\
320: \mathrm{det}\ C + \Delta^TC^{-1}\Delta \; .
321: \ee
322: In particular we are interested in the location of the maximum of this
323: function (which is the most likely $C_l$) and the curvature at the
324: maximum, $-\partial^2\ln{\mathcal L}/\partial C_l \partial C_{l'}$ (which
325: is approximately the inverse of the covariance matrix for $C_l$). Note
326: that $C$ depends on $C_l$ since
327: \be
328: S_{ii'} = \sum_{\l} {2\l +1 \over 4\pi} C_l {\mathcal W}_{ii'}(l)
329: \ee
330: where $\mathcal{W}$ is the covariance window function of the experiment.
331:
332: Given an initial estimate of $C_l$ (hereafter, the input $C_l$) one can
333: reach the likelihood maximum as follows. By Taylor--expanding $\ln
334: {\mathcal L}$ to second order in $\delta C_l$ around $C_l$, and replacing
335: $-\partial^2\ln{\mathcal L}/\partial C_l \partial C_{l'}$ with its
336: expectation value one can find an expression for $\delta C_l$ such that
337: $C_l + \delta C_l$ maximizes the likelihood:
338: \be
339: \label{eqn:quadest}
340: \delta C_l = \sum_{l'} {1\over 2}F^{-1}_{ll'}{\rm Tr}\left[\left(\Delta
341: \Delta^{\rm T} - C\right)\left(C^{-1}{\partial C \over \partial C_{l'}}
342: C^{-1}\right)\right]
343: \ee
344: and
345: \be
346: \label{eqn:fish}
347: F_{ll'}
348: %\equiv \langle -{\partial^2 \ln{\mathcal L} \over \partial C_l
349: %\partial C_{l'}} \rangle
350: = {1\over 2}{\rm Tr}\left[C^{-1}
351: {\partial C \over \partial C_l}
352: C^{-1}{\partial C \over \partial C_{l'}}\right]
353: \ee
354: is the Fisher matrix \cite{tegmark97}.
355:
356: Equation~\ref{eqn:quadest} is a quadratic function of the data and hence
357: the expression ``quadratic estimator''. Note that we have suppressed the
358: pixel indices in the various vectors and matrices. Since $\ln{\mathcal
359: L}$ is not equal to its second--order Taylor expansion (i.e., ${\cal L}$
360: is not a Gaussian in $C_l$), some iteration is generally required to reach
361: the likelihood maximum.
362:
363: \subsection{Hierarchical Decomposition and Recombination}
364:
365: Now let us consider multiple maps and use Greek indices to label them.
366: Estimates of $\delta C_l$ from map $\alpha$, $\delta C^\alpha_l$, are
367: correlated with those from map $\beta$ with correlation matrix:
368: \bea
369: \label{eqn:correl}
370: \langle \delta C^\alpha_l \delta C^\beta_{l'} \rangle & \equiv &
371: {\mathcal F}^{-1}_{\alpha l,\beta l'} \\
372: & = & \sum_{l'',l'''} (F_\alpha^{-1})_{ll''}(F_\beta^{-1})_{l'l'''}
373: \times \nonumber \\
374: & & \ \ \ {1\over2}{\rm Tr}
375: \left[A_{\alpha,l''}C_{\alpha\beta}A_{\beta,l'''}C_{\beta\alpha}\right]\nonumber
376: \eea
377: where
378: \be
379: \label{eqn:defA}
380: A_{\alpha,l} \equiv C_{\alpha\alpha}^{-1}{\partial C_{\alpha\alpha} \over
381: \partial C_l}C_{\alpha\alpha}^{-1}\; .
382: \ee
383: Note that if $\alpha =\beta$ then Eq.~\ref{eqn:correl} simplifies to the
384: usual result:
385: \be
386: \langle \delta C_l \delta C_{l'} \rangle = F_{ll'}^{-1}\; .
387: \ee
388:
389: Given this result, we know how to combine the various $\delta C_l$
390: estimates from each submap into a final $\delta C_l$ estimate from all the
391: submaps in a minimum--variance (optimal) manner. The minimum--variance
392: $\delta C_{l'}$ satisfies
393: \be
394: \label{eqn:combine}
395: \sum_{l'}\left(\ \sum_{\alpha \beta}\ {\mathcal F}_{\alpha l,\beta l'}\
396: \right)\ \delta C_{l'}\ =\ \sum_{\alpha \beta l'}\ {\mathcal F}_{\alpha
397: l,\beta l'}\ \delta C^\alpha_{l'}
398: \ee
399: and has a weight matrix (inverse of covariance matrix):
400: \be
401: F_{ll'} = \sum_{\alpha \beta} {\mathcal F}_{\alpha l,\beta l'}\; .
402: \ee
403:
404: Although for simplicity we have written these expressions for estimating
405: individual $C_l$'s, issues of signal--to--noise and spectral resolution
406: usually lead us to estimate the power spectrum in bands of $\ell$, where
407: the shape of $C_l$ inside the bands is assumed. The usual assumption
408: (which we use in our applications) is that $l(l+1) C_l/(2\pi)$ is constant
409: inside the band.
410:
411: Our treatment of the correlations of the $\delta C_l$'s between pairs of
412: maps has been general. The maps may be spatially separate or overlapping;
413: they may have equivalent or different pixel sizes. Thus we have worked
414: out the most general solution to optimally combine the power spectra of
415: submaps which are the result of hierarchical decomposition (HD) of a
416: primary map.
417:
418: \subsection{Spectral Resolution}
419:
420: Even with optimal combining of the power spectrum estimates from the
421: various submaps, the HD procedure results in a sub--optimal estimation of
422: the power spectrum. Fortunately, in the cases we study, the sub--optimal
423: results are quite close to the optimal results. Departure from the
424: optimal results is almost entirely due to the degraded spectral resolution
425: of the high--resolution submaps. This loss of spectral resolution is the
426: primary drawback of the HD approach.
427:
428: The spectral resolution is most severely degraded at the highest
429: resolution levels where the submaps have the smallest spatial extents.
430: For any map of linear extent, $L$, it is difficult to distinguish the mode
431: $P_l(\cos\theta)$ from $P_{l+\delta l}(\cos\theta)$ where $\delta l \la
432: \pi/L$ \cite{endnote5}. If one wishes to achieve a spectral resolution of
433: $\delta l$ for a square map with linear pixel size $r_p$ then this map
434: must have $n$ pixels where
435: \be
436: n \simeq 2.5 \times 10^3 \left( {30 \over \delta l} {7' \over r_p}
437: \right)^2\; .
438: \ee
439: Fortunately $\delta l = 30$ and $r_p = 7'$ are expected to be adequate for
440: LDB--type maps and $2.6 \times 10^3$ pixels is a small enough submap size
441: to allow for reasonable computation times (as shown below).
442:
443: \subsection{Scaling}
444:
445: We now calculate how computation time scales with total number of pixels
446: in the full--resolution primary map, $N$, and the number of
447: multipole--moment bands, $N_b$. For simplicity we assume that all submaps
448: have the same number of pixels, $n$, and that we estimate the power
449: spectrum for each submap in the same number of bands. Estimating the power
450: spectrum and Fisher matrix for each submap takes on the order of
451: $N_b^2n^3$ operations so at the finest level we have on the order of
452: $N_b^2(N/n) n^3 = N_b^2N n^2$ operations. In a systematic coarsening
453: (such as the one below defined by combining four pixels at one resolution
454: into one larger pixel for the next coarser level), most of the submaps are
455: at the finest resolution and therefore analysis and combining of these
456: finest submaps dominates the demands on memory and CPU time.
457:
458: For large enough $N$, the dominant computational step will be in
459: calculating the correlations between submaps. The matrix multiplication
460: in Eq.~\ref{eqn:correl} takes on the order of $n^3$ operations, so
461: performing it for every pair of submaps and pair of bands takes on the
462: order of $N_b^2N^2 n$ operations.
463:
464: The procedure can in principle be parallelized for the efficient use of
465: $n_{\rm proc}$ processors, where $n_{\rm proc}$ ranges anywhere from $N_b$
466: to $\sim (N/n)^2 N_b^2$. The crucial use of parallelization comes in the
467: dominant combining stage, which scales as $(N/n)^2 N_b^2$, and involves
468: the combination of $\sim {1\over 2}(N/n)^2$ pairs of submaps. This type
469: of independent pair loading can be efficiently shared on any number of
470: processors lower than ${1\over 2}(N/n)^2$. For LDB--type missions one
471: might have $N_b \sim 10$ and $(N/n)^2 N_b^2 \sim 4\times 10^4$ and
472: approximately $200$ pairs of submaps to combine (if we indiscriminately
473: retain all submap-submap correlations (see Section IV)). For
474: supercomputers with $n_{\rm proc} \la 10^2$, every processor can be
475: efficiently used.
476:
477: \subsection{The Noise Matrix}
478:
479: Our approach assumes that we begin with a pixelized map and its
480: corresponding noise covariance matrix. Map--making procedures usually
481: produce a weight matrix, which is the inverse of the noise matrix.
482: Inverting an arbitrary weight matrix takes on the order of $N^3$
483: operations. Fortunately, this inversion only needs to be done once and is
484: feasible for LDB--size maps.
485:
486: For larger maps, treatment of the weight matrix by general matrix
487: inversion algorithms is impossible. Fast methods are being developed
488: \cite{borrill01} which rely on the origin of the map weight matrix in the
489: weight matrix of the time--ordered data. That is, the map weight matrix
490: is $A^TN^{-1}A$ where $N^{-1}_{tt'}$ is (here) the time--stream weight
491: matrix for a stationary noise process, and $A_{ti}$ is the pointing matrix
492: element that is one if at time--sample $t$ the telescope is sampling map
493: pixel $i$ and zero otherwise. This special structure allows for each
494: iteration of a conjugate gradient solution to be performed much faster
495: than for an arbitrary matrix.
496:
497: Another possibility (suggested in \cite{szapudi00}) is to calculate the
498: noise covariance matrix by Monte Carlo methods. In other words, one would
499: make repeated simulations of the map noise and average those together to
500: get any desired elements of the noise matrix. In addition to possible
501: speed advantages, this approach also has storage advantages since one
502: probably needs fewer than $N/2$ realizations to have a sufficiently
503: accurate estimate of the noise. One may still need thousands of
504: realizations of the noise---\eg 20,000 realizations are required for the
505: noise matrix elements to be accurate to within 1\% of the diagonal.
506:
507: \subsection{Coarsening}
508:
509: The amount of work to be done depends on the choice of number of
510: resolution levels, which is governed by how many pixels are combined to
511: form one pixel at the next--coarsest level. Greater coarsening between
512: levels leads to fewer required operations, but at the expense of greater
513: loss of information. Since the cost in computing time is slight for using
514: the most modest coarsening possible while maintaining (roughly) square
515: pixels, we always coarsen by averaging four pixels into one. This
516: coarsening is also easily implemented in the HEALPix pixelization scheme,
517: which we use \cite{GoHi98}.
518:
519: In general, one can create a coarse submap $\Delta$ from a fine submap
520: $\delta$ as follows:
521: \be
522: \label{eqn:coarsen}
523: \Delta = W^{-1} \alpha w \delta
524: \ee
525: where $\alpha_{ci}$ is one for all fine pixels $i$ in coarse pixel $c$ and
526: zero otherwise, $w$ is some weighting of the fine pixels and $W = \alpha w
527: \alpha^T$. The coarse--fine and coarse--coarse noise covariance matrices
528: are given by:
529: \bea
530: \label{eqn:finecoarsenoise}
531: \langle \Delta \delta^T \rangle_{\rm noise} &=& W^{-1}\alpha w N \\
532: &=& W^{-1}\alpha \ \ ({\rm if \ }w=N^{-1}) \nonumber \\
533: &=& {N\alpha\over 4} \ \ ({\rm if \ }w=I) \nonumber
534: \eea
535: and:
536: \bea
537: \label{eqn:coarsecoarsenoise}
538: \langle \Delta\Delta^T \rangle_{\rm noise} &=&
539: W^{-1}\alpha wNw^T\alpha^T W^{-1} \\
540: &=& W^{-1} \ \ ({\rm if}\ w=N^{-1}) \nonumber \\
541: &=& {\alpha N\alpha^T\over 16} \ \ ({\rm if}\ w=I)\; . \nonumber
542: \eea
543:
544: For optimal coarsening $w=N^{-1}$ and for uniform averaging, $w=I$. We
545: assume that we are coarsening four pixels into one and therefore that
546: $\alpha \alpha^T = 4I$. We see that uniform averaging leads to noise
547: covariance matrices that are easy to calculate. For optimal averaging we
548: need to invert $W$ which is substantially less challenging than inverting
549: $N^{-1}$ to get $N$ since it has 1/16 the number of elements. The same
550: technique used for calculating $N$ by exploiting the origin of $N^{-1}$ in
551: time--ordered data (as explained in the previous subsection) can be used
552: to get $W^{-1}$ \cite{jaffe01}.
553:
554: Coarsening will usually result in pixel sizes that are large compared to
555: the angular resolution of the instrument and therefore pixelization
556: effects must be taken into account. Our treatment of the effect of
557: pixelization on the signal correlation function is approximate, \ie we use
558: a pixel window which is the average of the evaluated power spectrum for
559: every individual pixel. To prevent these approximations from creating
560: errors in the final power spectrum, we ignore information from multipole
561: moments greater than some critical value where the approximation
562: introduces significant error. Pixelization effects are discussed in more
563: detail in the Application section.
564:
565: \subsection{Iteration}
566:
567: A single application of the quadratic estimator of Eq.~\ref{eqn:quadest}
568: might not result in a $C_l$ that is sufficiently close to the likelihood
569: maximum. This will be the case if the input $C_l$ is too far from the
570: likelihood maximum. Fortunately, iterative application of
571: Eq.~\ref{eqn:quadest} has been shown to converge quite rapidly
572: \cite{bjk98}.
573:
574: When using the hierarchical decomposition approach, it is important that
575: the iteration be done {\it globally}. That is, within each iteration, the
576: power spectrum from each submap should be estimated using the same input
577: $C_l$. If iteration is performed within the submaps, the combined result
578: will suffer from cosmic bias\cite{bjk00}, which results from the fact that
579: uncertainties in $C_l$ are not normally--distributed. For a
580: normally--distributed variable, the curvature of the log of the likelihood
581: function is independent of location in the parameter space (because the
582: likelihood is a Gaussian). However, for $C_l$, this curvature does depend
583: on location. For larger values of $C_l$ the curvature is smaller (\ie the
584: variance is larger). Thus, upward fluctuations should result in larger
585: variances than downward fluctuations and so if one combines them together
586: assuming Gaussianity, the net result is a downward bias due to the
587: over--weighting of the downward fluctuations.
588:
589: The combination procedure of Eq.~\ref{eqn:combine} implicitly assumes the
590: estimates are normally distributed. We avoid the cosmic bias that might
591: result from this assumption by weighting the downward and upward
592: fluctuations equally. That is, we make sure to calculate ${\mathcal
593: F}_{\alpha l,\beta l'}$ from the same $C_l$ for all submaps. Thus any
594: desired iteration, \eg motivated by a large correction from the input
595: $C_l$, should be done globally.
596:
597: Since the uncertainty in $C_l$ is non--Gaussian, specifying the $C_l$ that
598: maximizes the likelihood function, and $\langle \delta C_l \delta C_{l'}
599: \rangle$, does not completely characterize the uncertainty. The
600: uncertainty can be approximately characterized by use of the ``offset
601: log--normal form'' \cite{bjk00}. That is, error in the quantity $Z_l
602: \equiv \ln\left(C_l + x_l\right)$ {\it is} approximately
603: normally--distributed. The offset, $x_l$, is a measure of the noise
604: contribution to the uncertainty, as opposed to the sample--variance
605: contribution to the uncertainty. It can be calculated as outlined in
606: \cite{bjk00}.
607:
608: \section{Application}
609:
610: First we discuss the specifications for the simulated maps we used. Then
611: we compare the results of HD with those of the exact method.
612:
613: \subsection{Simulation Map Details}
614:
615: We have applied our method using a Fortran code, which we have named {\it
616: Madcumba}, to two different simulated maps, hereafter simulations $A$ and
617: $B$. In both cases, the angular--power spectrum used was that of a {\it
618: COBE}--normalized adiabatic, scale--invariant ``lambda'' cold dark matter
619: ($\Lambda$CDM) model with $\Omega_\Lambda = 0.6$, $\Omega_b = 0.05$,
620: $\Omega_{\rm cdm}=0.35$ and $H_0 = 75\ {\rm km\,sec^{-1}\,Mpc^{-1}}$ and
621: was generated by the publicly available code CMBfast \cite{seljak96}.
622: The simulated signal maps were generated using the \texttt{synfast}
623: routine in the publicly available HEALPix package \cite{GoHi98}, at
624: HEALPix $N_{\rm side} = 256$ (level 8, where $N_{\rm side} = 2^{\rm
625: level}$), in which the pixel solid angle is around $(13.7')^2$, assuming a
626: circular beam with full--width at half--maximum of $20'$. Finally, pixel
627: noise taken from a Gaussian distribution with zero correlations between
628: pixels was added to the maps. The only significant differences between our
629: two simulations are size and noise characteristics.
630:
631: The simulation $A$ map has $10^4$ pixels, is square in shape, and has a
632: homogeneous noise variance of $(20\ \mu K)^2$ in each pixel. Its
633: relatively small size allows for the power spectrum to be estimated by the
634: exact method (\ie without dividing into submaps) using the MADCAP
635: package\cite{borrill99}. This is compared to our calculation via HD into
636: four equal--area square 2500 pixel submaps at full resolution and one
637: coarse 2500 pixel submap at HEALPix $N_{\rm side} = 128$ (level 7) which
638: covers the same area as the primary map.
639:
640: The simulation $B$ map is also square in shape and has $4\times 10^4$
641: pixels with a noise variance that is cosine--modulated throughout the map,
642: varying from $(20\ \mu K)^2$ to $9 \times (20\ \mu K)^2$. Here, we
643: decompose the primary map into sixteen submaps at full resolution, four
644: submaps at the next coarser resolution and one coarsest resolution submap
645: which covers the same area as the primary map but, by being two levels
646: coarser, contains $1/16^{\rm th}$ as many pixels. Thus, as with simulation
647: $A$, we use $n=2500$ pixel submaps.
648:
649: \subsection{Comparison with Exact Method}
650:
651: The top panel of Fig.~\ref{fig:pow_smallmap} shows estimates of the powers
652: from the individual submaps in simulation $A$. The bottom panel shows
653: both the result of optimally combining them and the exact results obtained
654: directly from the primary map. The solid line in both panels is the
655: original power spectrum for the simulations. The differences between the
656: power estimates are less than 20\% of the standard error from the exact
657: method.
658:
659: Not only do the power spectrum estimates agree quite well, but so do the
660: estimates of the uncertainties. The error bars in
661: Fig.~\ref{fig:pow_smallmap} are the square roots of the diagonal elements
662: of the respective Fisher matrices. In Fig.~\ref{fig:fish_smallmap} one
663: can see how well entire rows of the exact and HD Fisher matrices agree.
664:
665: Clearly, the bigger the submaps at the finest resolution, the better this
666: approach works. For a fixed length scale of interest, larger submaps
667: contain a greater fraction of corresponding pixel pairs, and therefore
668: achieve better spectral resolution ($\delta \ell$). Unfortunately, the
669: compute--time, when dominated by the combine procedure, scales as $n$ and
670: therefore as $1/\delta \ell^2$ (or possibly $n^2$ but with a much smaller
671: pre--factor (see section IV)). Thus, choice of $n$ can be critical. We
672: studied how our information loss varies with $n$ by comparing the error
673: bars from the HD procedure to the full analysis for $n=2500$ (the case
674: above), $n=1600$, and $n=900$. The results are shown in
675: Fig.~\ref{fig:sigmab}. Note that for the $n=2500$ case all the error bars
676: are increased over the exact case by less than 10\%. These larger error
677: bars are consistent with the less than 20\% differences (in units of
678: variance of exact results) between the power estimates.
679:
680: \begin{figure}
681: \plotone{fig1.eps} \caption{Simulation $A$ Results. Top panel: Power
682: spectrum estimates from four individual full--resolution 2500 pixel
683: submaps (triangles) and one coarse 2500 pixel submap. Bottom panel:
684: Power spectrum estimates from optimally combining the top--panel results
685: (solid circles) and from the exact calculation (open circles). Note that
686: in both panels, points are slightly shifted horizontally for
687: clarity.\label{fig:pow_smallmap}}
688: \end{figure}
689:
690: The upward trend in error ratio with increasing band number is an effect
691: of decreasing spectral resolution. To understand this, we examine
692: Fig.~\ref{fig:weight} which shows the ratio of the HD over the exact
693: method of the band contributions to the total weight, $W_b$, where:
694: \be
695: W_b \equiv \sum_{b'} F_{bb'}
696: \ee
697: and the total weight of an experiment is $W \equiv \sum_{b} W_b$. For this
698: analysis we switch to a finer binning of 25 bands, each with width $\delta
699: l = 30$.
700:
701: Note first the short--dashed line which is four times the ratio of $W_b$
702: for one full resolution submap over the one for the primary map. If the
703: four submaps were uncorrelated, we would expect this ratio to be $\sim 1$.
704: However, since the submaps are correlated, this ratio is greater than 1.
705: We see that submap--submap correlations are more important at lower $\ell$
706: than higher $\ell$ values.
707:
708: \begin{figure}
709: \plotone{fig2.eps}
710: \caption{\label{fig:fish_smallmap}Three rows of the Fisher matrix
711: calculated exactly (solid lines) and also via the combination (HD)
712: procedure (dashed lines) for simulation $A$.}
713: \end{figure}
714:
715: \begin{figure}
716: \plotone{fig3.eps}
717: \caption{Error bars from HD divided by error bars for the exact analysis.
718: Each case represents a primary map with 4$n$ pixels divided into four
719: $n$--pixel full resolution submaps and one coarsened $n$-pixel map where
720: $n = 30 \times 30$ (triangles), $40 \times 40$ (squares) or $50 \times 50$
721: (hexagons).
722: \label{fig:sigmab}}
723: \end{figure}
724:
725: Though individual elements of the Fisher matrix may be larger for a
726: sub--optimal method than an optimal one, we know that the contribution
727: from a given band to the total weight {\it can not} be larger. Thus, the
728: best we could hope for is that the ratio of $W_b$ for the HD method over
729: the exact method is near unity. We see from Fig.~\ref{fig:weight} that it
730: is everywhere greater than 0.97. Thus the fact that the combine procedure
731: gives at most 10\% larger error bars (20\% larger variances) in
732: Fig.~\ref{fig:sigmab} can not be due to any reduction in the total weight
733: (which we see is negligible), but must be due to how each $W_b$ is
734: distributed among the $F_{bb'}$. In particular, it is the lower spectral
735: resolution of the smaller submaps which results in the $W_b$ being more
736: spread out within a Fisher matrix row and less concentrated in the
737: diagonal element $F_{bb}$ as is clear from the 7th row plotted in
738: Fig.~\ref{fig:fish_smallmap}.
739:
740: \begin{figure}
741: \plotone{fig4.eps}
742: \caption{$W_b/W_b^{\rm exact}$ where $W_b = \sum_{b'}F_{bb'}$. The $W_b$'s
743: are from analysis of the simulation $A$ map, but into finer bins of width
744: $\delta l = 30$. The short--dashed line is $4W_b/W_b^{\rm exact}$ where
745: $W_b$ is just from analysis of one of the four full--resolution submaps;
746: the long--dashed line is $W_b/W_b^{\rm exact}$ where $W_b$ is from
747: analysis of the coarse resolution submap; the solid line is $W_b/W_b^{\rm
748: exact}$ where $W_b$ is from combining information from all five submaps.
749: \label{fig:weight}}
750: \end{figure}
751:
752: A plot of $F_{bb}$ ratios (similar to the $W_b$ ratio plot of
753: Fig.~\ref{fig:weight}) shows that the cost of this weight redistribution
754: within a Fisher matrix row is a decrease in the diagonal Fisher elements
755: (in the $\ell=250$ to $\ell=600$ range) to 80-85\% of the exact ones.
756: Not only is $F_{bb}$ suppressed then, but the larger off--diagonal
757: elements also lead to larger diagonal elements of $F^{-1}$. With broader
758: bands (such as those used for Fig.~\ref{fig:sigmab}), the error--bar
759: increase due to degraded spectral resolution is not as severe. The effect
760: of the larger off--diagonal elements propagates from band--to--band and is
761: least significant at the lower bands which are benefiting from the full
762: spectral resolution of the coarse submap.
763:
764: Also in Fig.~\ref{fig:weight} one can see that the pixelization effects
765: can be fairly severe. This is unfortunate since we only treat the
766: pixelization influence on the signal--correlation matrix, $S$,
767: approximately. Our treatment is that provided with the HEALPix package,
768: which assumes that the correlation between two pixels only depends on the
769: angular distance between them and not on their orientation. This is an
770: approximation for two reasons: the pixels are anisotropic, and their
771: shapes depend on their location. The validity of the approximate
772: window--function can vary from submap to submap if the submaps are not
773: large enough to have a representative sampling of all pixel shapes. This
774: is another reason to use large submaps. We take each cross--level pixel
775: window function to be the geometric mean of the two auto--level pixel
776: window functions.
777:
778: Because our treatment of pixelization effects is approximate, we throw out
779: information from coarse submaps at a conservatively low $\ell$ value. In
780: simulation $A$, for example, powers from the coarse resolution submap were
781: only considered for $\ell < 225$. In the final combined results, the
782: higher bands only use information from the four fine resolution submaps.
783: We eliminate the influence of the coarse submap on the higher bands by
784: inserting very large numbers into diagonal elements of the $({\cal
785: F}^{-1})_{\alpha l,\alpha' l'}$ matrix. This marginalization technique is
786: described in Appendix A of \cite{bjk98} and can be understood as
787: artificially adding some noise to these particular bands so as to give
788: them very low weight.
789:
790: The upturn in Figure ~\ref{fig:sigmab} after $\ell = 225$ where the coarse
791: submap information is no longer used indicates that there may be an
792: advantage to keeping the coarse submap information to yet higher $\ell$.
793: This would require a more accurate treatment of the pixel effect on the
794: signal correlation function and its derivatives with respect to $C_l$.
795: One way to do this, which would be fairly easy to implement and not cause
796: significant speed reduction, would be to avoid using pixel window
797: functions by calculating coarsened signal matrices directly from finer
798: ones. For example, if the fine signal matrix is $s$ then the
799: next--coarser signal matrix, $S$, must be \be S= { {\alpha s
800: \alpha^T}\over 16} \ee where $\alpha_{ci}$ is one for all fine pixels $i$
801: in coarse pixel $c$ and zero otherwise. Once again, we are summing four
802: pixels into one. The only approximations here come from approximations
803: made in calculating $s$. If these approximations were acceptable for the
804: finer level, they will certainly be adequate for the coarser level.
805: Keeping the coarse level information out to higher bands may be very
806: important for extension to megapixel maps because it is the only other way
807: to improve spectral resolution besides increasing $n$ for the
808: highest--resolution submaps.
809:
810: \section{Analysis of General Megapixel Maps}
811:
812: The map from simulation $A$ has homogeneous white noise. Below we will
813: discuss results from HD analysis of the map from simulation B in which the
814: noise is inhomogeneous but still uncorrelated. Yet we believe HD will
815: work well on realistic maps with correlated noise. In this section we
816: briefly make the case for the success of HD in the presence of correlated
817: noise and then move on to discuss how HD can be made to work for primary
818: maps with 100 to 1000 times more pixels than the simulation $A$ map. We
819: will see that further approximations are necessary, but that they are
820: likely to work well.
821:
822: %HD can handle correlated noise
823: Even though our applications of HD have only been on simulated maps with
824: uncorrelated noise, we believe that HD will work well on realistic maps
825: with correlated noise. This is easiest to see for correlations on length
826: scales smaller than the size of the smallest submaps. Longer--range noise
827: correlations will not be treated accurately in the analysis of the
828: smallest submaps. But this does not matter because the effect will only
829: be on lower--$\ell$ bands where the smallest submaps do not have much
830: weight. The affected bands will be those determined by coarser and larger
831: submaps that will once again be large compared to the correlation length.
832: Thus the prospects for HD on maps with correlated noise are quite good.
833:
834: %HD needs further approximations to be practical for megapixel maps
835: Applying HD as we have described it to megapixel maps is prohibitively
836: expensive in terms of the demand on computing resources. A rough scaling
837: argument is sufficient to demonstrate this point. In the megapixel regime,
838: we are strongly dominated by the calculation of all the elements of ${\cal
839: F}^{-1}_{\alpha l,\alpha' l'}$. The number of elements in this matrix is
840: $\sim (N/n)^2 N_b^2$. On an SGI Origin 2000, the calculation of a single
841: element of ${\cal F}^{-1}_{\alpha l,\alpha' l'}$ takes 188 sec
842: $(n/2500)^3$ on a single MIPS R12000 300 MHz processor where $n$ is the
843: number of pixels in a submap. Thus the wall--clock time is
844: \be
845: \label{eqn:time1}
846: t \sim 1\ {\rm year} \left({500 \over n_{\rm
847: proc}}\right) \left({N\over 3 \times 10^6}\right)^2\left({n\over
848: 2500}\right) \left({N_b^2\over 1000}\right)
849: \ee
850: where we have assumed the efficient use of $n_{\rm proc}$ processors
851: \cite{endnote4}. Thus the need to avoid exact calculation of every
852: element of ${\cal F}^{-1}_{\alpha l,\alpha' l'}$ is apparent.
853:
854: To make the case for the likely success of fast approximations to ${\cal
855: F}^{-1}_{\alpha l,\alpha' l'}$ we turn to the results from simulation $B$.
856: In Fig.~\ref{fig:pow_bigmap}, we plot four power spectra: one is the
857: result of optimally combining the individual power spectra; one is the
858: power spectrum of the coarsest submap; the other two are the result of a
859: {\it simple} averaging of the power spectra for the submaps within a given
860: resolution level as if they were independent. Again, the solid line
861: represents the original input power spectrum.
862:
863: We find the {\it very} good agreement between simple averaging and the
864: exact combination (for the highest bands) to be very encouraging because
865: it is strong evidence that signal correlations between non--overlapping
866: submaps are not very important. We certainly see they are not important in
867: the highest bands which are influenced only by submaps with no spatial
868: overlap (since the submaps are all at the same resolution level). If any
869: given band is only influenced by at most two or three levels and we only
870: need to calculate correlations for non--zero submaps then the vast
871: majority of submap pairs can be ignored. Even if some cannot be ignored,
872: their relative insignificance means that there are probably crude
873: approximations to them that will work well.
874:
875: \begin{figure}
876: \plotone{fig5.eps}
877: \caption{Results from HD of the 200 by 200 pixel simulation $B$ map. There
878: are sixteen submaps at the finest resolution level (level 8 ($13.7'$)
879: pixels), four at the medium level (level 7 ($27.5'$) pixels) and one at
880: the coarsest level (level 6 ($55'$ pixels)). Triangles and squares
881: represent the result of doing a naively weighted average of the power
882: spectrum estimates, \ie neglecting correlations, from the sixteen fine
883: submaps and the four coarse submaps, respectively. Pentagons represent
884: results for the one coarsest submap. Filled circles show the results of
885: the optimal combination of all submaps in which all power--spectrum
886: correlations are computed exactly. As in Fig.~\ref{fig:pow_smallmap},
887: points are shifted horizontally for clarity.\label{fig:pow_bigmap}}
888: \end{figure}
889: \noindent
890:
891: Calculating only the correlations between overlapping submaps at adjacent
892: resolution levels takes time
893: \be
894: \label{eqn:time2}
895: t = 78\ {\rm hours} \left({200 \over n_{\rm
896: proc}}\right) \left({N\over 10^7}\right)\left({n\over
897: 5000}\right)^2 \left({N_b\over 40}\right)\left({\Delta N_b \over
898: 3}\right)
899: \ee
900:
901: where for each of the $N_b$ bands only the nearest $\Delta N_b$ bands are
902: considered \cite{endnote3}. Calculating correlations between overlapping
903: submap pairs whose resolution levels differ by 2 will, at most, double the
904: time. Correlations between non--overlapping map pairs may be significant
905: but can probably be treated approximately in an insignificant amount of
906: time. Development and study of these approximations is probably necessary
907: for practical application of HD to megapixel maps.
908:
909: We also see from Fig.~\ref{fig:pow_bigmap} that even when there is a mix
910: of resolution levels influencing a band, using just one of those levels
911: provides a rough approximation. A fairly good ``quick--and--dirty''
912: power--spectrum estimator is the coarsest submap's power spectrum for band
913: 1, the coarse submaps' power spectrum for bands 2 and 3, and the finest
914: submaps' power spectrum for bands 4 to 8. Such an estimator has its
915: applications, for example, finding a $C_l$ that is close enough to optimal
916: that one only needs a single iteration of the exact HD procedure.
917:
918: The scaling of $t$ with $N$ in Eq.~\ref{eqn:time2} is linear if $n$ is
919: fixed. But if we fix spectral resolution and the area of the primary map,
920: then $n \propto N$ and therefore $t \propto N^3$ once again! Or, at fixed
921: $N$ and primary map area, $t \propto (1/\delta l)^4$. Our fiducial choice
922: above of $n=5000$ corresponds for {\it Planck} with $N=10^7$ and $r_p =
923: 3.5'$ to $\delta_l=45$. This may be sufficient since physical models have
924: fairly smooth power--spectra. We see that the degree to which degraded
925: spectral resolution affects our ability to discriminate between different
926: models is a crucial issue for the applicability of HD to {\it Planck}.
927: We remind the reader that spectral resolution is the only thing that is
928: significantly compromised with HD; Fig.~\ref{fig:weight} shows the total
929: weight from each band is within a few percent of optimal.
930:
931: \section{Comparison with other methods}
932:
933: The HD method has many advantages over other fast, approximate methods.
934: Perhaps the {\it chief} advantage is its ability to handle maps with
935: correlated noise. Its main disadvantage is spectral resolution. To
936: understand better these competitive advantages/disadvantages it is worth
937: spending some time discussing these other methods---especially since we
938: will see they are somewhat complementary and hence a hybrid approach may
939: be useful.
940:
941: This discussion of other methods is facilitated by writing down the
942: following generalization of Eq.~\ref{eqn:quadest}:
943: \be
944: \label{eqn:quadestW}
945: C_l = \sum_{l'} {1\over 2}F^{-1}_{ll'}{\rm Tr}\left[W\left(\Delta
946: \Delta^{\rm T} - N\right)W{\partial C \over \partial C_{l'}}
947: \right]
948: \ee
949: and Eq.~\ref{eqn:fish}:
950: \be
951: \label{eqn:fishW}
952: F_{ll'} = {1\over 2}{\rm Tr}\left[W{\partial C \over \partial C_l}
953: W {\partial C \over \partial C_{l'}}\right].
954: \ee
955: These equations specify a general unbiased quadratic estimator, with pixel
956: pair--weighting determined by $W$. The $F_{ll'}$ matrix is derived by
957: demanding that the estimator be unbiased ($\langle C_l^{\rm estimate}
958: \rangle = C_l$). In general, its inverse is not equal to $\langle \delta
959: C_l \delta C_{l'} \rangle$ which is instead given by
960: \be
961: \label{eqn:realfishW}
962: {\cal F}^{-1}_{ll'} = {1\over 2} F^{-1}_{ll''}F^{-1}_{l'l'''}
963: {\rm Tr}\left[A_{l''}CA_{l'''}C\right]
964: \ee
965: where $A_l \equiv W{\partial C \over \partial C_l }W$, similar to
966: Eq.~\ref{eqn:defA}.
967:
968: For the minimum--variance estimator, $W = C^{-1}$. The
969: ``correlation--function'' approach (CF) of \cite{szapudi00} uses the
970: simpler $W=I$ in pixel space \cite{endnote1}. Spherical--harmonic
971: transforming the map and averaging $|a_{lm}|^2$'s over $m$ uses $W=I$ in
972: spherical--harmonic space. The multi--scale method we have just described
973: above likewise corresponds to a choice of $W$, although this $W$ is not
974: easily written down.
975:
976: It is worth pointing out that the estimator for CF requires on the order
977: of $N_b^2N^2$ operations where $N_b$ is the number of $\ell$-bands. One
978: can get rid of the $N_b^2$ factor by rewriting it as an estimator for
979: $C(\theta)$ in fine bins of $\theta$ and then Legendre--transforming the
980: result, as was done in \cite{szapudi00}. Further computational
981: accelerations are possible by use of KD--tree search techniques which use
982: coarse--graining at large distances \cite{colombi01,moore01}. In
983: addition, fast spherical harmonic transforms lead to great time--savings
984: in harmonic methods.
985:
986: However, the simplicity of these other choices for $W$ does have
987: drawbacks. Specifically, high--noise areas and low--noise areas make
988: equal contribution to the estimator. To date, the success of these
989: methods has only been demonstrated on simulations with homogeneous white
990: noise. The first obvious improvement to CF is to replace
991: $W_{ij}=\delta_{ij}$ with $W_{ij} = 1/\sigma_i^2\delta_{ij}$ (in pixel
992: space) as suggested in \cite{szapudi00}.
993:
994: What is less obvious is how to weight pixel pairs in the presence of
995: correlated noise. This is where further development of the CF approach is
996: most needed. One possible route to pursue is band--diagonal choices of
997: $W_{ij}$ which capture the spatially--local noise correlations.
998: Computation with band--diagonal $W$'s can still be quite fast; they are
999: still order of $N^2$ as long as the bandwidth is less than $\sqrt{N}$.
1000: Perhaps longer--range correlations could be included in some hybrid scheme
1001: of HD and CF. Here CF (with band--diagonal $W_{ij}$) would be used on the
1002: primary map and then HD would be used to calculate lower--$\ell$ values
1003: which may have been affected by long--range noise correlations. This
1004: hybrid scheme also has the advantage of complementing HD where its
1005: spectral resolution is lowest \cite{endnote2}.
1006:
1007: Although the calculation of $C_l$ is fast with simple choices for $W$, the
1008: calculation of the error covariance matrix (Eq.~\ref{eqn:realfishW}) is
1009: slow; i.e. the number of operations scales with $N^3$ because of the
1010: matrix multiplications. One option is to estimate the errors by
1011: Monte--Carlo methods \cite{szapudi00}. Another is to combine the CF and
1012: HD approaches in yet another way: use CF as a means to produce an input
1013: power spectrum sufficiently close to the optimal one that only a single
1014: iteration of HD is required.
1015:
1016: \section{Conclusions}
1017:
1018: We have concentrated on developing a fast and reliable method for
1019: calculating power spectra and their uncertainties from maps with $N =
1020: 10^4$ to $10^5$ pixels. Methods that work in this regime are of immediate
1021: practical importance. Our tests show very good agreement with exact
1022: methods at the lower end of our $N$ range where the exact analysis is
1023: feasible on a supercomputer. The HD method is the only existing method
1024: for calculating a power spectrum and its uncertainties from general,
1025: inhomogeneous correlated noise patterns with maps of this size in
1026: reasonable amounts of time \cite{hivon01}.
1027:
1028: We have not tested our method on maps with correlated noise. But since
1029: noise--correlations are taken into account exactly within each submap, we
1030: expect our method to handle correlated noise effectively, unlike the other
1031: fast methods mentioned above. These expectations will be put to the test
1032: soon as HD is applied to existing datasets from LDB flights, such as {\it
1033: Archeops\/}\footnote{\texttt{http://www-crtbt.polycnrs-gre.fr/archeops/}}
1034: and {\it TopHat\/}\footnote{\texttt{http://topweb.gsfc.nasa.gov/}}.
1035:
1036: The local nature of the method has some advantages for controlling
1037: contamination of the final power spectrum result. In the extreme, one can
1038: simply cull submaps with the largest foreground contamination. Less
1039: drastically, one could down--weight the power spectrum determinations from
1040: submaps according to the suspected level of contamination.
1041:
1042: To summarize, we have developed and investigated an HD method of
1043: power--spectrum estimation. We have demonstrated that for LDB--size maps
1044: HD is sufficiently fast and insignificantly sub--optimal. Its main
1045: advantages over other fast methods are its generality (including its
1046: ability to handle correlated noise) and the fact that the power spectrum
1047: uncertainties are calculated directly. Application to larger maps will
1048: rely on further approximations which we expect to work well but require
1049: further investigation. The main disadvantage to HD is the degraded
1050: spectral resolution at the smallest angular scales. The impact of this
1051: degradation on parameter--determination also warrants further
1052: investigation. The combination of HD with other methods may be fruitful.
1053:
1054: {\it Madcumba}, a Fortran 90 implementation of the HD procedure, will be
1055: made available for public use. Comments and questions should be directed
1056: to O. Dor\'e at {\texttt dore@iap.fr}.
1057:
1058: \acknowledgements
1059: %\vskip 0.2in
1060: %\noindent
1061: O.D. is grateful to the UC Davis Cosmology group for a warm hospitality.
1062: LK is grateful to IAP for the same. We benefited from conversations with
1063: J. R. Bond, J. Borrill, F. Bouchet, A. Jaffe, R. Stompor, P. Koev, D.
1064: Vibert and R. Teyssier and the computer resources of S. Colombi and NERSC.
1065:
1066: \begin{thebibliography}{ucsc}
1067:
1068: \bibitem{jaffe00} E.g., A. Jaffe et al., Phys. Rev. Lett. {\bf 86}, 3475-3479
1069: (2000).
1070:
1071: \bibitem{forecast} L. Knox,
1072: %``Determination of Inflationary Observables from CMB Anisotropy Experiments'',
1073: Phys. Rev. D48, 3502 (1995);
1074: G. Jungman, M. Kamionkowski, A. Kosowsky, and D. Spergel 1996, Phys.
1075: Rev. D {\bf D54}, 1332 (1996); J. R. Bond, G. Efstathiou, and M. Tegmark
1076: Mon. Not. Roy. Astron. Soc., {\bf 291}, L33 (1997);
1077: D. Eisenstein, W. Hu
1078: and M. Tegmark,
1079: %``Cosmic Complementarity: H 0 and Omega M from
1080: %Combining Cosmic Microwave Background Experiments and Redshift Surveys'',
1081: Astrophys. J. {\bf 504}, 57L (1998).
1082:
1083: \bibitem{bcjk} J.~R. Bond, R. Crittenden, A.~H. Jaffe and L. Knox,
1084: %``Computing Challenges of the Cosmic Microwave Background'',
1085: Computing in Science and Engineering, vol. 1, no. 2, 21 (1999).
1086:
1087: \bibitem{borrill99} J. Borrill, Phys. Rev. D {\bf 59}, 027302 (1999).
1088:
1089: \bibitem{dore01} O. Dor\'e, R. Teyssier, F.R. Bouchet, D. Vibert,
1090: astro-ph/0101112, see also http://ulysse.iap.fr/download/mapcumba
1091:
1092: \bibitem{wandelt} B.D. Wandelt, E. Hivon \& K. G{\'o}rski,
1093: astro-ph/0008111;
1094: %The Pseudo-$C_l$ method: Cosmic microwave background anisotropy
1095: %power spectrum statistics for high precision cosmology
1096: %Submitted to Physical Review D in January 2000
1097: astro-ph/9808292
1098:
1099: \bibitem{szapudi00} I. Szapudi, S. Prunet, D. Pogosyan, A. Szalay and J.R. Bond,
1100: astro-ph/0010256
1101: % Title: Fast CMB Analyses via Correlation Functions
1102:
1103: \bibitem{oh99} S.P. Oh, D.N. Spergel and G. Hinshaw,
1104: Astrophys. J. {\bf 510}, 551 (1999).
1105:
1106: \bibitem{wandelt01} B. Wandelt 2001, Proceedings of
1107: MPA/MPE/ESO Conference "Mining the Sky", July 31 - August 4, 2000,
1108: Garching, Germany, astro-ph/0012333, astro-ph/0012416
1109:
1110: \bibitem{bjk98} J.R. Bond, A.H. Jaffe \& L. Knox, Phys. Rev. D {\bf 57},
1111: 2117 (1998).
1112:
1113: \bibitem{tegmark97} These equations were independently derived as
1114: the optimal, unbiased quadratic estimator in
1115: M. Tegmark, Phys. Rev. D {\bf 55}, 5895 (1997).
1116: %``How to
1117: %measure CMB power spectra without losing information'', Phys. Rev. D
1118:
1119: \bibitem{endnote5} This can also be understood as the usual
1120: problem of localizing simultaneously in position and momentum:
1121: M. Tegmark, Mon.Not.Roy.Astron.Soc. {\bf 280}, 299-308 (1996).
1122:
1123: \bibitem{borrill01} J. Borrill and P. Koev 2001, in preparation.
1124:
1125: \bibitem{GoHi98} G{\'o}rski E.K., Hivon E., Wandelt B.D.
1126: in proceedings of the MPA/ESO Garching Conference 1998, eds Banday
1127: A.J., Sheth K. and L. Da Costa and
1128: http://www.eso.org/~kgorski/healpix/
1129:
1130: \bibitem{jaffe01} A. Jaffe, private communication.
1131:
1132: \bibitem{bjk00} J.R. Bond, A.H. Jaffe \& L. Knox, \apj
1133: {\bf 533}, 19 (2000).
1134:
1135: \bibitem{seljak96} U. Seljak \& M. Zaldarriaga, \apj, {\bf 469}, 437, 1996
1136:
1137: \bibitem{endnote4}We
1138: tested the scaling with $n_{\rm proc}$ by running {\it Madcumba} using up
1139: to
1140: $77$ processors.
1141:
1142: \bibitem{endnote3}We remind the reader that application of HD to
1143: megapixel and larger maps requires some way to calculate $N$ for the
1144: sub--maps from the time--ordered data. This could be accomplished
1145: by the method we briefly described in subsection IID, which will
1146: be described in more detail in \cite{borrill01}.
1147:
1148: \bibitem{endnote1} That
1149: the correlation--function approach can be regarded as a quadratic
1150: estimator with sub--optimal weighting was emphasized in \cite{szapudi00}.
1151:
1152: \bibitem{colombi01} S. Colombi {\it et al.} 2001, in preparation.
1153:
1154: \bibitem{moore01} A. Moore {\it et al.} 2001, Fast Algorithms and
1155: Efficient Statistics: N-point Correlation Functions, Proceedings of
1156: MPA/MPE/ESO Conference "Mining the Sky", July 31 - August 4, 2000,
1157: Garching, Germany, astro-ph/0012333
1158:
1159: \bibitem{endnote2} The speed--up with KD--tree search techniques
1160: will also lead to some spectral resolution degradation.
1161:
1162: \bibitem{hivon01} A possible exception is
1163: a monte--Carlo pseudo-$C_l$ method to be described in
1164: E. Hivon {\it et al.} 2001, in preparation.
1165:
1166: \end{thebibliography}
1167: \end{document}
1168:
1169:
1170: