0104:astro-ph0104443/ms.tex

1: %\documentstyle[aps,twocolumn,prl,tighten,flushrt]{revtex} %eqsecnum,

2: \documentstyle[aps,twocolumn,tighten]{revtex}

3: %\documentstyle[preprint,aps,prl]{revtex}

4:

5: %\documentclass{revtex}

6: %\usepackage{aps}

7: %\usepackage{twocolumn}

8: %\usepackage{tighten}

9:

10: \def\btt#1{{\tt$\backslash$#1}}

11: \input epsf

12: \def\plotone#1{\centering \leavevmode

13: \epsfxsize= 1.0\columnwidth \epsfbox{#1}}

14: \def\plottwo#1{\centering \leavevmode

15: \epsfxsize= 1.0\columnwidth \epsfbox{#1}}

16: \def\plotfiddle#1#2#3#4#5#6#7{\centering \leavevmode

17: \vbox to#2{\rule{0pt}{#2}}

18: \special{psfile=#1 voffset=#7 hoffset=#6 vscale=#5 hscale=#4 angle=#3}}

19: %\def\plotrotate#1{\centering \leavevmode

20: %\epsfxsize= 0.8\columnwidth  \epsfbox{#1  {angle=-90}}}

21: \def\plotrotate#1{\centering

22: %\leavevmode

23: %\epsfxsize= 1.7\columnwidth

24: \epsfbox{#1 angle=-90}}

25:

26: \def\bff{}

27: %\newcommand\apj[3]{ {\it Astrophys. J.} {\bf #1}, #2 (19#3) }

28: \def\apjl{Astrophys. J. Lett.}

29: %\newcommand\prd[3]{ {\it Physical Review D} {\bf #1}, #2 (19#3) }

30: %\newcommand\prl[3]{ {\it Physical Review Letters} {\bf #1}, #2 (19#3) }

31: %\newcommand\np[3]{ {\it Nucl.~Phys.} {\bf #1}, #2 (19#3) }

32: \def\mnras{Mon.Not.Roy.As.Soc.}

33: \def\araa{Annu. Rev. Astron. Astrophys.}

34: \def\aj{Astron. J.}

35: \def\asap{Astron. Astrophys.}

36:

37: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

38: %%%%%%%%%%%%%%%%%%%   begin local macros %%%%%%%%%%%%%%%%%%%%%%

39: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

40: \def\be{\begin{equation}}

41: \def\ee{\end{equation}}

42: \def\bea{\begin{eqnarray}}

43: \def\eea{\end{eqnarray}}

44: \def\sm{{\rm M}_\odot}

45: \def\uline#1{$\underline{\smash{\hbox{#1}}}$}

46:

47: \def\muk{\mu{\rm K}}

48:

49: \def\ang{\,{\rm\AA}}

50: \def\flux{\,{\rm erg\,cm^{-2}\,arcsec^{-2}\,\AA^{-1}\,s^{-1}}}

51: \def\GeV{\,{\rm GeV}}

52: \def\TeV{\,{\rm TeV}}

53: \def\gev{\,{\rm GeV}}

54: \def\keV{\,{\rm keV}}

55: \def\MeV{\,{\rm MeV}}

56: \def\sec{\,{\rm sec}}

57: \def\Gyr{\,{\rm Gyr}}

58: \def\yr{\,{\rm yr}}

59: \def\rcm{\,{\rm cm}}

60: \def\pc{\,{\rm pc}}

61: \def\kpc{\,{\rm kpc}}

62: \def\Mpc{\,{\rm Mpc}}

63: \def\mpc{\,{\rm Mpc}}

64: \def\eV{{\,\rm eV}}

65: \def\ev{{\,\rm eV}}

66: \def\erg{{\,\rm erg}}

67: \def\cmm2{{\,\rm cm^{-2}}}

68: \def\cm2{{\,{\rm cm}^2}}

69: \def\cmm3{{\,{\rm cm}^{-3}}}

70: \def\gcmm3{{\,{\rm g\,cm^{-3}}}}

71: \def\kms{\,{\rm km\,s^{-1}}}

72: \def\HO{{100h\,{\rm km\,sec^{-1}\,Mpc^{-1}}}}

73: \def\mpl{{m_{\rm Pl}}}

74: \def\mpp{{m_{\rm Pl,0}}}

75: \def\trh{T_{\rm RH}}

76: \def\g{\tilde g}

77: \def\R{{\cal R}}

78: \def\zl{z_{\rm LSS}}

79: \def\zeq{z_{\rm EQ}}

80: \def\he{$^4$He}

81: \def\VEV#1{\left\langle #1\right\rangle}

82: \def\fun#1#2{\lower3.6pt\vbox{\baselineskip0pt\lineskip.9pt

83: \ialign{$\mathsurround=0pt#1\hfil##\hfil$\crcr#2\crcr\sim\crcr}}}

84: \def\C{{\cal C}}

85: \def\TBD{{\bf TBD}}

86: \def\muK{\mu {\rm K}}

87: \def\hyi{H\thinspace{$\scriptstyle{\rm I}$}~}

88: \def\hii{H\thinspace{$\scriptstyle{\rm II}$}~}

89:

90: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

91: %%%%%%%%%%%%        MST local macros      %%%%%%%%%%%%%%%%%%%%%%%%%%%

92: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

93: \def\figcapsize{\footnotesize \baselineskip=0.1cm}

94: \def\vec{\bf}

95: \def\reff#1{$^{{#1})}$}

96: %\def\lq{\char 140}

97: \def\cc{{\rm~cm}^{-3}}

98: \def\hmpc{{\, {\rm h}^{-1}~\rm Mpc}}

99: \def\hkpc{{\, {\rm h}^{-1}~\rm kpc}}

100: \def\kms{{\rm~km~s^{-1}}}

101: \def\kev{{\rm~keV}}

102: \def\km{{\rm~km}}

103: \def\kpc{{\rm~kpc}}

104: \def\mpc{{\rm~Mpc}}

105: \def\msun{{\,M_\odot}}

106: \def\hmsun{{\, {\rm h}^{-1}\,M_\odot}}

107: \def\lsun{{\,L_\odot}}

108: \def\yr{{\rm~yr}}

109: \def\td{\Upsilon_d}

110: \def\Kel{{\rm~K}}

111: \def\deg{^\circ}

112: %\def\'{^{\prime}}

113: \def\avrg#1{{\langle #1 \rangle}}

114: \def\bra#1{{\langle #1 \vert}}

115: \def\ket#1{{\vert #1 \rangle}}

116: \def\abs#1{{\vert #1 \vert}}

117: \def\real{\Re}

118: \def\imag{\Im}

119: \def\vk{{\vec k}}

120: \def\vr{{\vec r}}

121: \def\vv{{\vec v}}

122: \def\vr{{\vec r}}

123: \def\vv{{\vec v}}

124: \def\vx{{\vec x}}

125: \def\vq{{\vec q}}

126: \def\bv{{\bf v}}

127: \def\bs{{\bf s}}

128: \def\bk{{\bf k}}

129: \def\br{{\bf r}}

130: \def\brpk{{\bf r}_{pk}}

131: \def\bx{{\bf x}}

132: \def\bxpk{{\bf x}_{pk}}

133: \def\bR{{\bf R}}

134: \def\bV{{\bf V}}

135: \def\bX{{\bf X}}

136: \def\bq{{\bf q}}

137: \def\bsb{{\bf s}_{\rm b}}

138: \def\bsf{{\bf s}_{\rm f}}

139: \def\hk{{\hat k}}

140: \def\hr{{\hat r}}

141: \def\hx{{\hat x}}

142: \def\hq{{\hat q}}

143: \def\c{{\cal C}}

144: \def\X{{\cal K}}

145: \def\eps{\varepsilon}

146: \def\pomega{\varpi}

147: \def\lbar{{\mathchar'26\mskip-9mu\lambda}}

148: \def\vs{\vskip 16pt}

149: \def\oneskip{\vskip 16pt}

150: \def\eg{{e.g., }}

151: \def\ie{{i.e., }}

152: \def\etal{{\it et al. }}

153: \def\et{{et al. }}

154: \def\etc{{etc. }}

155: \def\via{{via }}

156: \def\half{{\textstyle{1\over2}}}

157: \def\Rth{{R_{TH}}}

158: \def\Rf{{R_{ G}}}

159: \def\Rs{{R_{s}}}

160: \def\Rpk{{R_{pk}}}

161: \def\p3m{P$^3$M}

162: \def\zbox{z_{\rm box}}

163: \def\Tr{{\rm Tr}\,}

164: \def\rC{{\rm C}}

165: \def\mpl{m_{\cal P}}

166: \def\hz{{\hat z}}

167: \def\la{\mathrel{\mathpalette\fun <}}

168: \def\ga{\mathrel{\mathpalette\fun >}}

169: \def\fun#1#2{\lower3.6pt\vbox{\baselineskip0pt\lineskip.9pt

170:   \ialign{$\mathsurround=0pt#1\hfil##\hfil$\crcr#2\crcr\sim\crcr}}}

171: %%%%%%%%%

172: \def\boldsymbol{\bf}

173:

174: \def\boldcdot{\mathbin{{\boldsymbol\cdot}}}

175: \def\boldnabla{{\boldsymbol\nabla}}

176: \font\BF=cmmib10

177: \def\gam{\hat{\gamma}}

178: \def\k{{\hbox{\BF k}}}

179: \def\x{{\hbox{\BF x}}}

180: \def\r{{\hbox{\BF r}}}

181: \def\u{{\hbox{\BF u}}}

182: \def\v{{\hbox{\BF v}}}

183: \def\d{\delta}

184: \def\dD{\delta_{\rm D}}

185: \newcommand{\lexp}{\mathop{\langle}}

186: \newcommand{\rexp}{\mathop{\rangle}}

187: \newcommand{\order}[1]{{\cal O}(#1)}

188: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

189: %%%%%%%%%%%%        end local macros      %%%%%%%%%%%%%%%%%%%%%%%%%%%

190: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

191:

192: \begin{document}

193: \twocolumn[\hsize\textwidth\columnwidth\hsize\csname @twocolumnfalse\endcsname

194: \draft

195: \title{CMB Power Spectrum Estimation via Hierarchical Decomposition}

196: \author{Olivier\ Dor\'e$^{1}$, Lloyd\ Knox$^{2}$,

197: and Alan\ Peel$^{2}$}

198: \address{$^1$ Institut d'Astrophysique de Paris,

199: 98bis Boulevard Arago, F-75014 Paris, FRANCE; dore@iap.fr}

200: \address{$^2$ Department of Physics, One Shields Avenue,

201: University of California, Davis, California 95616, USA;

202: \\ lknox@ucdavis.edu, apeel@bubba.ucdavis.edu}

203: \date{\today}

204: \maketitle

205:

206: \begin{abstract}

207:

208: We have developed a fast, accurate and generally applicable method for

209: inferring the power spectrum and its uncertainties from maps of the cosmic

210: microwave background (CMB) in the presence of inhomogeneous and correlated

211: noise.  For maps with $10^4$ to $10^5$ pixels, we apply an exact power

212: spectrum estimation algorithm to submaps of the data at various

213: resolutions, and then combine the results in an optimal manner.  To

214: analyze larger maps efficiently one must resort to sub--optimal

215: combinations in which cross--map power spectrum error correlations are

216: only calculated approximately.  We expect such approximations to work well

217: in general, and in particular for the megapixel maps to come from the next

218: generation of satellite missions.

219:

220: \end{abstract}

221: \pacs{98.70.Vc}

222: %\narrowtext

223: ]

224: \section{Introduction}

225:

226: The anisotropy of the Cosmic Microwave Background (CMB) is proving to be a

227: powerful cosmological probe \cite{jaffe00}.  Many cosmological parameters,

228: and the primordial power spectra of density and gravity--wave

229: perturbations, can be inferred from the statistical properties of the

230: CMB---in particular from its angular power spectrum \cite{forecast}.

231: Unfortunately, exact methods for calculating the power spectrum and its

232: uncertainties from real observations are very expensive

233: computationally\cite{bcjk}.  Supercomputers are required for analysis of

234: current datasets and even they will not be sufficient for the next

235: generation of experiments \cite{borrill99}.  Here we introduce an

236: approximate method for reducing a CMB map to a power spectrum and its

237: uncertainties.

238:

239: Generally applicable exact methods for finding the angular power spectrum,

240: $C_l$, that maximize the likelihood have operation counts proportional to

241: $N^3$ where $N$ is the number of pixels in the map.  Our approach to

242: overcoming this $N^3$ scaling involves a hierarchical decomposition of the

243: map into a set of submaps. That is, we subdivide the original

244: (``primary'') map into non--overlapping regions, each with a manageable

245: number of pixels, in order to estimate the power spectrum from each of

246: these submaps using an exact algorithm.  To study the larger angular scale

247: fluctuations we coarsen the primary map and if the number of these coarse

248: pixels is still too large, we again divide into submaps.  To go to yet

249: larger angular scales, we coarsen the map further, etc...  Then we

250: calculate the expected correlations between the power spectrum estimates

251: from all different submaps at all different resolution levels in order to

252: optimally average them together.  A similar multi--grid technique was

253: recently developed for the reduction of time--ordered CMB data to

254: maps\cite{dore01}.

255:

256: Several other approaches to overcoming the $N^3$ scaling have been tried.

257: These include the ``pseudo--$C_l$'' method of \cite{wandelt}, and the

258: ``correlation--function'' approach of \cite{szapudi00}.  We expect these

259: methods to work well in the case of homogeneous noise, but to be

260: significantly sub--optimal for the levels of inhomogeneity expected in

261: planned observational programs.  None of these methods has been shown to

262: deal properly with correlated noise.  Minor modifications of the

263: correlation--function approach may make this path very attractive, though

264: a remaining issue is the importance of noise correlations between pixels.

265:

266: The $N^3$ scaling has been overcome also by a special--purpose exact

267: method that is expected to be applicable to the maps generated by NASA's

268: Microwave Anisotropy Probe ({\it MAP\/})

269: satellite\footnote{\texttt{http://map.gsfc.nasa.gov/}}.  This method

270: \cite{oh99} assumes the noise is not correlated from one pixel to another

271: and that the noise level variations are roughly azimuthally symmetric.

272: Some of the techniques used in \cite{oh99} may eventually find their place

273: in more generally applicable (and yet still exact) power spectrum

274: estimation algorithms, though the feasibility is not yet clear.  Another

275: special--purpose exact algorithm is that of \cite{wandelt01}, which is

276: applicable to experiments that scan on rings.  The main idea is to analyze

277: ring sets instead of maps since both the noise and signal covariance

278: structures are simple on the rings, whereas the noise structure can be

279: complicated in the map space.  Although some of its critical hypotheses

280: have not been tested yet on realistic data, the ring--set approach might

281: still be of practical importance since it may provide a useful

282: zeroth--order solution for experiments that nearly scan on rings.

283:

284: In section II we describe our method in detail.  In section III we present

285: the results of an application to a map with ten thousand pixels

286: ---comparable to the size of maps coming from long--duration balloon (LDB)

287: flights.  In section IV we show results from a map four times larger and

288: discuss prospects for application of our method to even larger maps such

289: as those expected from {\it MAP\/} and {\it

290: Planck\/}\footnote{\texttt{http://astro.estec.esa.nl/SA-general/Projects/Planck/}}.

291: In section V we compare with other methods. In section VI we conclude.

292:

293: \section{Method}

294:

295: Here we first describe our method in the simplest conceptual terms, and

296: then go on to discuss subtleties which complicate our implementation.

297:

298: \subsection{From the Likelihood Function to the Quadratic Estimator}

299:

300: We describe here the use of a quadratic estimator to find the maximum of

301: the likelihood function, and the shape of the likelihood function near

302: that maximum, as described in \cite{bjk98}.  Time--ordered data from

303: observation of the CMB are usually reduced to a set of pixelized maps

304: $\Delta_i$, $i=1,\ldots N$ which can be decomposed into the sum of a

305: signal and a noise contribution, $ \Delta = s + n \ $.  Assuming that both

306: the noise and the signal are normally--distributed, their statistical

307: properties are fully characterized by the covariance matrices $S = \langle

308: s s^T \rangle$ and $N = \langle n n^T\rangle$.  Assuming furthermore that

309: the noise and signal are not correlated with each other, we can define

310: \be

311: C \equiv \langle\Delta \Delta^T\rangle = S + N \; .

312: \ee

313:

314: The observed sky signal is assumed to be the realization of an isotropic

315: Gaussian random field whose power spectrum $C_l$ is the quantity we

316: want to measure.  Thus we are interested in the likelihood function

317: ${\mathcal L}(\Delta \mid C_l)$ which is given by

318: \be

319: -2 \ln {\mathcal L}(\Delta \mid C_l) = \ln\

320: \mathrm{det}\ C + \Delta^TC^{-1}\Delta \; .

321: \ee

322: In particular we are interested in the location of the maximum of this

323: function (which is the most likely $C_l$) and the curvature at the

324: maximum, $-\partial^2\ln{\mathcal L}/\partial C_l \partial C_{l'}$ (which

325: is approximately the inverse of the covariance matrix for $C_l$). Note

326: that $C$ depends on $C_l$ since

327: \be

328: S_{ii'} = \sum_{\l} {2\l +1 \over 4\pi} C_l {\mathcal W}_{ii'}(l)

329: \ee

330: where $\mathcal{W}$ is the covariance window function of the experiment.

331:

332: Given an initial estimate of $C_l$ (hereafter, the input $C_l$) one can

333: reach the likelihood maximum as follows.  By Taylor--expanding $\ln

334: {\mathcal L}$ to second order in $\delta C_l$ around $C_l$, and replacing

335: $-\partial^2\ln{\mathcal L}/\partial C_l \partial C_{l'}$ with its

336: expectation value one can find an expression for $\delta C_l$ such that

337: $C_l + \delta C_l$ maximizes the likelihood:

338: \be

339: \label{eqn:quadest}

340: \delta C_l = \sum_{l'} {1\over 2}F^{-1}_{ll'}{\rm Tr}\left[\left(\Delta

341: \Delta^{\rm T} - C\right)\left(C^{-1}{\partial C \over \partial C_{l'}}

342: C^{-1}\right)\right]

343: \ee

344: and

345: \be

346: \label{eqn:fish}

347: F_{ll'}

348: %\equiv \langle -{\partial^2 \ln{\mathcal L} \over \partial C_l

349: %\partial C_{l'}} \rangle

350: = {1\over 2}{\rm Tr}\left[C^{-1}

351: {\partial C \over \partial C_l}

352: C^{-1}{\partial C \over \partial C_{l'}}\right]

353: \ee

354: is the Fisher matrix \cite{tegmark97}.

355:

356: Equation~\ref{eqn:quadest} is a quadratic function of the data and hence

357: the expression ``quadratic estimator''. Note that we have suppressed the

358: pixel indices in the various vectors and matrices.  Since $\ln{\mathcal

359: L}$ is not equal to its second--order Taylor expansion (i.e., ${\cal L}$

360: is not a Gaussian in $C_l$), some iteration is generally required to reach

361: the likelihood maximum.

362:

363: \subsection{Hierarchical Decomposition and Recombination}

364:

365: Now let us consider multiple maps and use Greek indices to label them.

366: Estimates of $\delta C_l$ from map $\alpha$, $\delta C^\alpha_l$, are

367: correlated with those from map $\beta$ with correlation matrix:

368: \bea

369: \label{eqn:correl}

370: \langle \delta C^\alpha_l \delta C^\beta_{l'} \rangle & \equiv &

371: {\mathcal F}^{-1}_{\alpha l,\beta l'} \\

372: & = & \sum_{l'',l'''} (F_\alpha^{-1})_{ll''}(F_\beta^{-1})_{l'l'''}

373: \times \nonumber \\

374: & & \ \ \ {1\over2}{\rm Tr}

375: \left[A_{\alpha,l''}C_{\alpha\beta}A_{\beta,l'''}C_{\beta\alpha}\right]\nonumber

376: \eea

377: where

378: \be

379: \label{eqn:defA}

380: A_{\alpha,l} \equiv C_{\alpha\alpha}^{-1}{\partial C_{\alpha\alpha} \over

381: \partial C_l}C_{\alpha\alpha}^{-1}\; .

382: \ee

383: Note that if $\alpha =\beta$ then Eq.~\ref{eqn:correl} simplifies to the

384: usual result:

385: \be

386: \langle \delta C_l \delta C_{l'} \rangle = F_{ll'}^{-1}\; .

387: \ee

388:

389: Given this result, we know how to combine the various $\delta C_l$

390: estimates from each submap into a final $\delta C_l$ estimate from all the

391: submaps in a minimum--variance (optimal) manner.  The minimum--variance

392: $\delta C_{l'}$ satisfies

393: \be

394: \label{eqn:combine}

395: \sum_{l'}\left(\ \sum_{\alpha \beta}\ {\mathcal F}_{\alpha l,\beta l'}\

396: \right)\ \delta C_{l'}\ =\ \sum_{\alpha \beta l'}\ {\mathcal F}_{\alpha

397: l,\beta l'}\ \delta C^\alpha_{l'}

398: \ee

399: and has a weight matrix (inverse of covariance matrix):

400: \be

401: F_{ll'} = \sum_{\alpha \beta} {\mathcal F}_{\alpha l,\beta l'}\; .

402: \ee

403:

404: Although for simplicity we have written these expressions for estimating

405: individual $C_l$'s, issues of signal--to--noise and spectral resolution

406: usually lead us to estimate the power spectrum in bands of $\ell$, where

407: the shape of $C_l$ inside the bands is assumed.  The usual assumption

408: (which we use in our applications) is that $l(l+1) C_l/(2\pi)$ is constant

409: inside the band.

410:

411: Our treatment of the correlations of the $\delta C_l$'s between pairs of

412: maps has been general.  The maps may be spatially separate or overlapping;

413: they may have equivalent or different pixel sizes.  Thus we have worked

414: out the most general solution to optimally combine the power spectra of

415: submaps which are the result of hierarchical decomposition (HD) of a

416: primary map.

417:

418: \subsection{Spectral Resolution}

419:

420: Even with optimal combining of the power spectrum estimates from the

421: various submaps, the HD procedure results in a sub--optimal estimation of

422: the power spectrum.  Fortunately, in the cases we study, the sub--optimal

423: results are quite close to the optimal results.  Departure from the

424: optimal results is almost entirely due to the degraded spectral resolution

425: of the high--resolution submaps. This loss of spectral resolution is the

426: primary drawback of the HD approach.

427:

428: The spectral resolution is most severely degraded at the highest

429: resolution levels where the submaps have the smallest spatial extents.

430: For any map of linear extent, $L$, it is difficult to distinguish the mode

431: $P_l(\cos\theta)$ from $P_{l+\delta l}(\cos\theta)$ where $\delta l \la

432: \pi/L$ \cite{endnote5}.  If one wishes to achieve a spectral resolution of

433: $\delta l$ for a square map with linear pixel size $r_p$ then this map

434: must have $n$ pixels where

435: \be

436: n \simeq 2.5 \times 10^3 \left( {30 \over \delta l} {7' \over r_p}

437: \right)^2\; .

438: \ee

439: Fortunately $\delta l = 30$ and $r_p = 7'$ are expected to be adequate for

440: LDB--type maps and $2.6 \times 10^3$ pixels is a small enough submap size

441: to allow for reasonable computation times (as shown below).

442:

443: \subsection{Scaling}

444:

445: We now calculate how computation time scales with total number of pixels

446: in the full--resolution primary map, $N$, and the number of

447: multipole--moment bands, $N_b$.  For simplicity we assume that all submaps

448: have the same number of pixels, $n$, and that we estimate the power

449: spectrum for each submap in the same number of bands. Estimating the power

450: spectrum and Fisher matrix for each submap takes on the order of

451: $N_b^2n^3$ operations so at the finest level we have on the order of

452: $N_b^2(N/n) n^3 = N_b^2N n^2$ operations.  In a systematic coarsening

453: (such as the one below defined by combining four pixels at one resolution

454: into one larger pixel for the next coarser level), most of the submaps are

455: at the finest resolution and therefore analysis and combining of these

456: finest submaps dominates the demands on memory and CPU time.

457:

458: For large enough $N$, the dominant computational step will be in

459: calculating the correlations between submaps.  The matrix multiplication

460: in Eq.~\ref{eqn:correl} takes on the order of $n^3$ operations, so

461: performing it for every pair of submaps and pair of bands takes on the

462: order of $N_b^2N^2 n$ operations.

463:

464: The procedure can in principle be parallelized for the efficient use of

465: $n_{\rm proc}$ processors, where $n_{\rm proc}$ ranges anywhere from $N_b$

466: to $\sim (N/n)^2 N_b^2$.  The crucial use of parallelization comes in the

467: dominant combining stage, which scales as $(N/n)^2 N_b^2$, and involves

468: the combination of $\sim {1\over 2}(N/n)^2$ pairs of submaps.  This type

469: of independent pair loading can be efficiently shared on any number of

470: processors lower than ${1\over 2}(N/n)^2$. For LDB--type missions one

471: might have $N_b \sim 10$ and $(N/n)^2 N_b^2 \sim 4\times 10^4$ and

472: approximately $200$ pairs of submaps to combine (if we indiscriminately

473: retain all submap-submap correlations (see Section IV)).  For

474: supercomputers with $n_{\rm proc} \la 10^2$, every processor can be

475: efficiently used.

476:

477: \subsection{The Noise Matrix}

478:

479: Our approach assumes that we begin with a pixelized map and its

480: corresponding noise covariance matrix.  Map--making procedures usually

481: produce a weight matrix, which is the inverse of the noise matrix.

482: Inverting an arbitrary weight matrix takes on the order of $N^3$

483: operations. Fortunately, this inversion only needs to be done once and is

484: feasible for LDB--size maps.

485:

486: For larger maps, treatment of the weight matrix by general matrix

487: inversion algorithms is impossible.  Fast methods are being developed

488: \cite{borrill01} which rely on the origin of the map weight matrix in the

489: weight matrix of the time--ordered data.  That is, the map weight matrix

490: is $A^TN^{-1}A$ where $N^{-1}_{tt'}$ is (here) the time--stream weight

491: matrix for a stationary noise process, and $A_{ti}$ is the pointing matrix

492: element that is one if at time--sample $t$ the telescope is sampling map

493: pixel $i$ and zero otherwise.  This special structure allows for each

494: iteration of a conjugate gradient solution to be performed much faster

495: than for an arbitrary matrix.

496:

497: Another possibility (suggested in \cite{szapudi00}) is to calculate the

498: noise covariance matrix by Monte Carlo methods.  In other words, one would

499: make repeated simulations of the map noise and average those together to

500: get any desired elements of the noise matrix.  In addition to possible

501: speed advantages, this approach also has storage advantages since one

502: probably needs fewer than $N/2$ realizations to have a sufficiently

503: accurate estimate of the noise.  One may still need thousands of

504: realizations of the noise---\eg 20,000 realizations are required for the

505: noise matrix elements to be accurate to within 1\% of the diagonal.

506:

507: \subsection{Coarsening}

508:

509: The amount of work to be done depends on the choice of number of

510: resolution levels, which is governed by how many pixels are combined to

511: form one pixel at the next--coarsest level.  Greater coarsening between

512: levels leads to fewer required operations, but at the expense of greater

513: loss of information.  Since the cost in computing time is slight for using

514: the most modest coarsening possible while maintaining (roughly) square

515: pixels, we always coarsen by averaging four pixels into one.  This

516: coarsening is also easily implemented in the HEALPix pixelization scheme,

517: which we use \cite{GoHi98}.

518:

519: In general, one can create a coarse submap $\Delta$ from a fine submap

520: $\delta$ as follows:

521: \be

522: \label{eqn:coarsen}

523: \Delta = W^{-1} \alpha w \delta

524: \ee

525: where $\alpha_{ci}$ is one for all fine pixels $i$ in coarse pixel $c$ and

526: zero otherwise, $w$ is some weighting of the fine pixels and $W = \alpha w

527: \alpha^T$.  The coarse--fine and coarse--coarse noise covariance matrices

528: are given by:

529: \bea

530: \label{eqn:finecoarsenoise}

531: \langle \Delta \delta^T \rangle_{\rm noise} &=& W^{-1}\alpha w N \\

532:  &=& W^{-1}\alpha \ \ ({\rm if \ }w=N^{-1}) \nonumber \\

533:  &=& {N\alpha\over 4} \ \ ({\rm if \ }w=I) \nonumber

534: \eea

535: and:

536: \bea

537: \label{eqn:coarsecoarsenoise}

538: \langle \Delta\Delta^T \rangle_{\rm noise} &=&

539: W^{-1}\alpha wNw^T\alpha^T W^{-1} \\

540:  &=& W^{-1} \ \ ({\rm if}\ w=N^{-1}) \nonumber \\

541:  &=& {\alpha N\alpha^T\over 16} \ \ ({\rm if}\ w=I)\; . \nonumber

542: \eea

543:

544: For optimal coarsening $w=N^{-1}$ and for uniform averaging, $w=I$.  We

545: assume that we are coarsening four pixels into one and therefore that

546: $\alpha \alpha^T = 4I$.  We see that uniform averaging leads to noise

547: covariance matrices that are easy to calculate.  For optimal averaging we

548: need to invert $W$ which is substantially less challenging than inverting

549: $N^{-1}$ to get $N$ since it has 1/16 the number of elements. The same

550: technique used for calculating $N$ by exploiting the origin of $N^{-1}$ in

551: time--ordered data (as explained in the previous subsection) can be used

552: to get $W^{-1}$ \cite{jaffe01}.

553:

554: Coarsening will usually result in pixel sizes that are large compared to

555: the angular resolution of the instrument and therefore pixelization

556: effects must be taken into account.  Our treatment of the effect of

557: pixelization on the signal correlation function is approximate, \ie we use

558: a pixel window which is the average of the evaluated power spectrum for

559: every individual pixel.  To prevent these approximations from creating

560: errors in the final power spectrum, we ignore information from multipole

561: moments greater than some critical value where the approximation

562: introduces significant error.  Pixelization effects are discussed in more

563: detail in the Application section.

564:

565: \subsection{Iteration}

566:

567: A single application of the quadratic estimator of Eq.~\ref{eqn:quadest}

568: might not result in a $C_l$ that is sufficiently close to the likelihood

569: maximum.  This will be the case if the input $C_l$ is too far from the

570: likelihood maximum.  Fortunately, iterative application of

571: Eq.~\ref{eqn:quadest} has been shown to converge quite rapidly

572: \cite{bjk98}.

573:

574: When using the hierarchical decomposition approach, it is important that

575: the iteration be done {\it globally}.  That is, within each iteration, the

576: power spectrum from each submap should be estimated using the same input

577: $C_l$.  If iteration is performed within the submaps, the combined result

578: will suffer from cosmic bias\cite{bjk00}, which results from the fact that

579: uncertainties in $C_l$ are not normally--distributed.  For a

580: normally--distributed variable, the curvature of the log of the likelihood

581: function is independent of location in the parameter space (because the

582: likelihood is a Gaussian).  However, for $C_l$, this curvature does depend

583: on location.  For larger values of $C_l$ the curvature is smaller (\ie the

584: variance is larger).  Thus, upward fluctuations should result in larger

585: variances than downward fluctuations and so if one combines them together

586: assuming Gaussianity, the net result is a downward bias due to the

587: over--weighting of the downward fluctuations.

588:

589: The combination procedure of Eq.~\ref{eqn:combine} implicitly assumes the

590: estimates are normally distributed.  We avoid the cosmic bias that might

591: result from this assumption by weighting the downward and upward

592: fluctuations equally. That is, we make sure to calculate ${\mathcal

593: F}_{\alpha l,\beta l'}$ from the same $C_l$ for all submaps.  Thus any

594: desired iteration, \eg motivated by a large correction from the input

595: $C_l$, should be done globally.

596:

597: Since the uncertainty in $C_l$ is non--Gaussian, specifying the $C_l$ that

598: maximizes the likelihood function, and $\langle \delta C_l \delta C_{l'}

599: \rangle$, does not completely characterize the uncertainty.  The

600: uncertainty can be approximately characterized by use of the ``offset

601: log--normal form'' \cite{bjk00}.  That is, error in the quantity $Z_l

602: \equiv \ln\left(C_l + x_l\right)$ {\it is} approximately

603: normally--distributed.  The offset, $x_l$, is a measure of the noise

604: contribution to the uncertainty, as opposed to the sample--variance

605: contribution to the uncertainty.  It can be calculated as outlined in

606: \cite{bjk00}.

607:

608: \section{Application}

609:

610: First we discuss the specifications for the simulated maps we used. Then

611: we compare the results of HD with those of the exact method.

612:

613: \subsection{Simulation Map Details}

614:

615: We have applied our method using a Fortran code, which we have named {\it

616: Madcumba}, to two different simulated maps, hereafter simulations $A$ and

617: $B$. In both cases, the angular--power spectrum used was that of a {\it

618: COBE}--normalized adiabatic, scale--invariant ``lambda'' cold dark matter

619: ($\Lambda$CDM) model with $\Omega_\Lambda = 0.6$, $\Omega_b = 0.05$,

620: $\Omega_{\rm cdm}=0.35$ and $H_0 = 75\ {\rm km\,sec^{-1}\,Mpc^{-1}}$ and

621: was generated by the publicly available code CMBfast \cite{seljak96}.

622: The simulated signal maps were generated using the \texttt{synfast}

623: routine in the publicly available HEALPix package \cite{GoHi98}, at

624: HEALPix $N_{\rm side} = 256$ (level 8, where $N_{\rm side} = 2^{\rm

625: level}$), in which the pixel solid angle is around $(13.7')^2$, assuming a

626: circular beam with full--width at half--maximum of $20'$.  Finally, pixel

627: noise taken from a Gaussian distribution with zero correlations between

628: pixels was added to the maps. The only significant differences between our

629: two simulations are size and noise characteristics.

630:

631: The simulation $A$ map has $10^4$ pixels, is square in shape, and has a

632: homogeneous noise variance of $(20\ \mu K)^2$ in each pixel.  Its

633: relatively small size allows for the power spectrum to be estimated by the

634: exact method (\ie without dividing into submaps) using the MADCAP

635: package\cite{borrill99}.  This is compared to our calculation via HD into

636: four equal--area square 2500 pixel submaps at full resolution and one

637: coarse 2500 pixel submap at HEALPix $N_{\rm side} = 128$ (level 7) which

638: covers the same area as the primary map.

639:

640: The simulation $B$ map is also square in shape and has $4\times 10^4$

641: pixels with a noise variance that is cosine--modulated throughout the map,

642: varying from $(20\ \mu K)^2$ to $9 \times (20\ \mu K)^2$. Here, we

643: decompose the primary map into sixteen submaps at full resolution, four

644: submaps at the next coarser resolution and one coarsest resolution submap

645: which covers the same area as the primary map but, by being two levels

646: coarser, contains $1/16^{\rm th}$ as many pixels. Thus, as with simulation

647: $A$, we use $n=2500$ pixel submaps.

648:

649: \subsection{Comparison with Exact Method}

650:

651: The top panel of Fig.~\ref{fig:pow_smallmap} shows estimates of the powers

652: from the individual submaps in simulation $A$.  The bottom panel shows

653: both the result of optimally combining them and the exact results obtained

654: directly from the primary map.  The solid line in both panels is the

655: original power spectrum for the simulations.  The differences between the

656: power estimates are less than 20\% of the standard error from the exact

657: method.

658:

659: Not only do the power spectrum estimates agree quite well, but so do the

660: estimates of the uncertainties.  The error bars in

661: Fig.~\ref{fig:pow_smallmap} are the square roots of the diagonal elements

662: of the respective Fisher matrices.  In Fig.~\ref{fig:fish_smallmap} one

663: can see how well entire rows of the exact and HD Fisher matrices agree.

664:

665: Clearly, the bigger the submaps at the finest resolution, the better this

666: approach works.  For a fixed length scale of interest, larger submaps

667: contain a greater fraction of corresponding pixel pairs, and therefore

668: achieve better spectral resolution ($\delta \ell$).  Unfortunately, the

669: compute--time, when dominated by the combine procedure, scales as $n$ and

670: therefore as $1/\delta \ell^2$ (or possibly $n^2$ but with a much smaller

671: pre--factor (see section IV)). Thus, choice of $n$ can be critical.  We

672: studied how our information loss varies with $n$ by comparing the error

673: bars from the HD procedure to the full analysis for $n=2500$ (the case

674: above), $n=1600$, and $n=900$.  The results are shown in

675: Fig.~\ref{fig:sigmab}.  Note that for the $n=2500$ case all the error bars

676: are increased over the exact case by less than 10\%.  These larger error

677: bars are consistent with the less than 20\% differences (in units of

678: variance of exact results) between the power estimates.

679:

680: \begin{figure}

681: \plotone{fig1.eps} \caption{Simulation $A$ Results.  Top panel:  Power

682: spectrum estimates from four individual full--resolution 2500 pixel

683: submaps (triangles) and one coarse 2500 pixel submap.  Bottom panel:

684: Power spectrum estimates from optimally combining the top--panel results

685: (solid circles) and from the exact calculation (open circles).  Note that

686: in both panels, points are slightly shifted horizontally for

687: clarity.\label{fig:pow_smallmap}}

688: \end{figure}

689:

690: The upward trend in error ratio with increasing band number is an effect

691: of decreasing spectral resolution.  To understand this, we examine

692: Fig.~\ref{fig:weight} which shows the ratio of the HD over the exact

693: method of the band contributions to the total weight, $W_b$, where:

694: \be

695: W_b \equiv \sum_{b'} F_{bb'}

696: \ee

697: and the total weight of an experiment is $W \equiv \sum_{b} W_b$. For this

698: analysis we switch to a finer binning of 25 bands, each with width $\delta

699: l = 30$.

700:

701: Note first the short--dashed line which is four times the ratio of $W_b$

702: for one full resolution submap over the one for the primary map.  If the

703: four submaps were uncorrelated, we would expect this ratio to be $\sim 1$.

704: However, since the submaps are correlated, this ratio is greater than 1.

705: We see that submap--submap correlations are more important at lower $\ell$

706: than higher $\ell$ values.

707:

708: \begin{figure}

709: \plotone{fig2.eps}

710: \caption{\label{fig:fish_smallmap}Three rows of the Fisher matrix

711: calculated exactly (solid lines) and also via the combination (HD)

712: procedure (dashed lines) for simulation $A$.}

713: \end{figure}

714:

715: \begin{figure}

716: \plotone{fig3.eps}

717: \caption{Error bars from HD divided by error bars for the exact analysis.

718: Each case represents a primary map with 4$n$ pixels divided into four

719: $n$--pixel full resolution submaps and one coarsened $n$-pixel map where

720: $n = 30 \times 30$ (triangles), $40 \times 40$ (squares) or $50 \times 50$

721: (hexagons).

722: \label{fig:sigmab}}

723: \end{figure}

724:

725: Though individual elements of the Fisher matrix may be larger for a

726: sub--optimal method than an optimal one, we know that the contribution

727: from a given band to the total weight {\it can not} be larger.  Thus, the

728: best we could hope for is that the ratio of $W_b$ for the HD method over

729: the exact method is near unity.  We see from Fig.~\ref{fig:weight} that it

730: is everywhere greater than 0.97.  Thus the fact that the combine procedure

731: gives at most 10\% larger error bars (20\% larger variances) in

732: Fig.~\ref{fig:sigmab} can not be due to any reduction in the total weight

733: (which we see is negligible), but must be due to how each $W_b$ is

734: distributed among the $F_{bb'}$.  In particular, it is the lower spectral

735: resolution of the smaller submaps which results in the $W_b$ being more

736: spread out within a Fisher matrix row and less concentrated in the

737: diagonal element $F_{bb}$ as is clear from the 7th row plotted in

738: Fig.~\ref{fig:fish_smallmap}.

739:

740: \begin{figure}

741: \plotone{fig4.eps}

742: \caption{$W_b/W_b^{\rm exact}$ where $W_b = \sum_{b'}F_{bb'}$. The $W_b$'s

743: are from analysis of the simulation $A$ map, but into finer bins of width

744: $\delta l = 30$.  The short--dashed line is $4W_b/W_b^{\rm exact}$ where

745: $W_b$ is just from analysis of one of the four full--resolution submaps;

746: the long--dashed line is $W_b/W_b^{\rm exact}$ where $W_b$ is from

747: analysis of the coarse resolution submap; the solid line is $W_b/W_b^{\rm

748: exact}$ where $W_b$ is from combining information from all five submaps.

749: \label{fig:weight}}

750: \end{figure}

751:

752: A plot of $F_{bb}$ ratios (similar to the $W_b$ ratio plot of

753: Fig.~\ref{fig:weight}) shows that the cost of this weight redistribution

754: within a Fisher matrix row is a decrease in the diagonal Fisher elements

755: (in the $\ell=250$ to $\ell=600$ range) to 80-85\% of the exact ones.

756: Not only is $F_{bb}$ suppressed then, but the larger off--diagonal

757: elements also lead to larger diagonal elements of $F^{-1}$.  With broader

758: bands (such as those used for Fig.~\ref{fig:sigmab}), the error--bar

759: increase due to degraded spectral resolution is not as severe.  The effect

760: of the larger off--diagonal elements propagates from band--to--band and is

761: least significant at the lower bands which are benefiting from the full

762: spectral resolution of the coarse submap.

763:

764: Also in Fig.~\ref{fig:weight} one can see that the pixelization effects

765: can be fairly severe.  This is unfortunate since we only treat the

766: pixelization influence on the signal--correlation matrix, $S$,

767: approximately. Our treatment is that provided with the HEALPix package,

768: which assumes that the correlation between two pixels only depends on the

769: angular distance between them and not on their orientation.  This is an

770: approximation for two reasons:  the pixels are anisotropic, and their

771: shapes depend on their location.  The validity of the approximate

772: window--function can vary from submap to submap if the submaps are not

773: large enough to have a representative sampling of all pixel shapes. This

774: is another reason to use large submaps.  We take each cross--level pixel

775: window function to be the geometric mean of the two auto--level pixel

776: window functions.

777:

778: Because our treatment of pixelization effects is approximate, we throw out

779: information from coarse submaps at a conservatively low $\ell$ value.  In

780: simulation $A$, for example, powers from the coarse resolution submap were

781: only considered for $\ell < 225$.  In the final combined results, the

782: higher bands only use information from the four fine resolution submaps.

783: We eliminate the influence of the coarse submap on the higher bands by

784: inserting very large numbers into diagonal elements of the $({\cal

785: F}^{-1})_{\alpha l,\alpha' l'}$ matrix.  This marginalization technique is

786: described in Appendix A of \cite{bjk98} and can be understood as

787: artificially adding some noise to these particular bands so as to give

788: them very low weight.

789:

790: The upturn in Figure ~\ref{fig:sigmab} after $\ell = 225$ where the coarse

791: submap information is no longer used indicates that there may be an

792: advantage to keeping the coarse submap information to yet higher $\ell$.

793: This would require a more accurate treatment of the pixel effect on the

794: signal correlation function and its derivatives with respect to $C_l$.

795: One way to do this, which would be fairly easy to implement and not cause

796: significant speed reduction, would be to avoid using pixel window

797: functions by calculating coarsened signal matrices directly from finer

798: ones.  For example, if the fine signal matrix is $s$ then the

799: next--coarser signal matrix, $S$, must be \be S= { {\alpha s

800: \alpha^T}\over 16} \ee where $\alpha_{ci}$ is one for all fine pixels $i$

801: in coarse pixel $c$ and zero otherwise.  Once again, we are summing four

802: pixels into one. The only approximations here come from approximations

803: made in calculating $s$.  If these approximations were acceptable for the

804: finer level, they will certainly be adequate for the coarser level.

805: Keeping the coarse level information out to higher bands may be very

806: important for extension to megapixel maps because it is the only other way

807: to improve spectral resolution besides increasing $n$ for the

808: highest--resolution submaps.

809:

810: \section{Analysis of General Megapixel Maps}

811:

812: The map from simulation $A$ has homogeneous white noise.  Below we will

813: discuss results from HD analysis of the map from simulation B in which the

814: noise is inhomogeneous but still uncorrelated.  Yet we believe HD will

815: work well on realistic maps with correlated noise.  In this section we

816: briefly make the case for the success of HD in the presence of correlated

817: noise and then move on to discuss how HD can be made to work for primary

818: maps with 100 to 1000 times more pixels than the simulation $A$ map.  We

819: will see that further approximations are necessary, but that they are

820: likely to work well.

821:

822: %HD can handle correlated noise

823: Even though our applications of HD have only been on simulated maps with

824: uncorrelated noise, we believe that HD will work well on realistic maps

825: with correlated noise.  This is easiest to see for correlations on length

826: scales smaller than the size of the smallest submaps.  Longer--range noise

827: correlations will not be treated accurately in the analysis of the

828: smallest submaps.  But this does not matter because the effect will only

829: be on lower--$\ell$ bands where the smallest submaps do not have much

830: weight. The affected bands will be those determined by coarser and larger

831: submaps that will once again be large compared to the correlation length.

832: Thus the prospects for HD on maps with correlated noise are quite good.

833:

834: %HD needs further approximations to be practical for megapixel maps

835: Applying HD as we have described it to megapixel maps is prohibitively

836: expensive in terms of the demand on computing resources. A rough scaling

837: argument is sufficient to demonstrate this point. In the megapixel regime,

838: we are strongly dominated by the calculation of all the elements of ${\cal

839: F}^{-1}_{\alpha l,\alpha' l'}$. The number of elements in this matrix is

840: $\sim (N/n)^2 N_b^2$. On an SGI Origin 2000, the calculation of a single

841: element of ${\cal F}^{-1}_{\alpha l,\alpha' l'}$ takes 188 sec

842: $(n/2500)^3$ on a single MIPS R12000 300 MHz processor where $n$ is the

843: number of pixels in a submap.  Thus the wall--clock time is

844: \be

845: \label{eqn:time1}

846: t \sim 1\ {\rm year} \left({500 \over n_{\rm

847: proc}}\right) \left({N\over 3 \times 10^6}\right)^2\left({n\over

848: 2500}\right) \left({N_b^2\over 1000}\right)

849: \ee

850: where we have assumed the efficient use of $n_{\rm proc}$ processors

851: \cite{endnote4}.  Thus the need to avoid exact calculation of every

852: element of ${\cal F}^{-1}_{\alpha l,\alpha' l'}$ is apparent.

853:

854: To make the case for the likely success of fast approximations to ${\cal

855: F}^{-1}_{\alpha l,\alpha' l'}$ we turn to the results from simulation $B$.

856: In Fig.~\ref{fig:pow_bigmap}, we plot four power spectra: one is the

857: result of optimally combining the individual power spectra; one is the

858: power spectrum of the coarsest submap; the other two are the result of a

859: {\it simple} averaging of the power spectra for the submaps within a given

860: resolution level as if they were independent.  Again, the solid line

861: represents the original input power spectrum.

862:

863: We find the {\it very} good agreement between simple averaging and the

864: exact combination (for the highest bands) to be very encouraging because

865: it is strong evidence that signal correlations between non--overlapping

866: submaps are not very important. We certainly see they are not important in

867: the highest bands which are influenced only by submaps with no spatial

868: overlap (since the submaps are all at the same resolution level).  If any

869: given band is only influenced by at most two or three levels and we only

870: need to calculate correlations for non--zero submaps then the vast

871: majority of submap pairs can be ignored.  Even if some cannot be ignored,

872: their relative insignificance means that there are probably crude

873: approximations to them that will work well.

874:

875: \begin{figure}

876: \plotone{fig5.eps}

877: \caption{Results from HD of the 200 by 200 pixel simulation $B$ map. There

878: are sixteen submaps at the finest resolution level (level 8 ($13.7'$)

879: pixels), four at the medium level (level 7 ($27.5'$) pixels) and one at

880: the coarsest level (level 6 ($55'$ pixels)). Triangles and squares

881: represent the result of doing a naively weighted average of the power

882: spectrum estimates, \ie neglecting correlations, from the sixteen fine

883: submaps and the four coarse submaps, respectively. Pentagons represent

884: results for the one coarsest submap.  Filled circles show the results of

885: the optimal combination of all submaps in which all power--spectrum

886: correlations are computed exactly. As in Fig.~\ref{fig:pow_smallmap},

887: points are shifted horizontally for clarity.\label{fig:pow_bigmap}}

888: \end{figure}

889: \noindent

890:

891: Calculating only the correlations between overlapping submaps at adjacent

892: resolution levels takes time

893: \be

894: \label{eqn:time2}

895: t = 78\ {\rm hours} \left({200 \over n_{\rm

896: proc}}\right) \left({N\over 10^7}\right)\left({n\over

897: 5000}\right)^2 \left({N_b\over 40}\right)\left({\Delta N_b \over

898: 3}\right)

899: \ee

900:

901: where for each of the $N_b$ bands only the nearest $\Delta N_b$ bands are

902: considered \cite{endnote3}.  Calculating correlations between overlapping

903: submap pairs whose resolution levels differ by 2 will, at most, double the

904: time.  Correlations between non--overlapping map pairs may be significant

905: but can probably be treated approximately in an insignificant amount of

906: time.  Development and study of these approximations is probably necessary

907: for practical application of HD to megapixel maps.

908:

909: We also see from Fig.~\ref{fig:pow_bigmap} that even when there is a mix

910: of resolution levels influencing a band, using just one of those levels

911: provides a rough approximation. A fairly good ``quick--and--dirty''

912: power--spectrum estimator is the coarsest submap's power spectrum for band

913: 1, the coarse submaps' power spectrum for bands 2 and 3, and the finest

914: submaps' power spectrum for bands 4 to 8.  Such an estimator has its

915: applications, for example, finding a $C_l$ that is close enough to optimal

916: that one only needs a single iteration of the exact HD procedure.

917:

918: The scaling of $t$ with $N$ in Eq.~\ref{eqn:time2} is linear if $n$ is

919: fixed.  But if we fix spectral resolution and the area of the primary map,

920: then $n \propto N$ and therefore $t \propto N^3$ once again!  Or, at fixed

921: $N$ and primary map area, $t \propto (1/\delta l)^4$.  Our fiducial choice

922: above of $n=5000$ corresponds for {\it Planck} with $N=10^7$ and $r_p =

923: 3.5'$ to $\delta_l=45$.  This may be sufficient since physical models have

924: fairly smooth power--spectra.  We see that the degree to which degraded

925: spectral resolution affects our ability to discriminate between different

926: models is a crucial issue for the applicability of HD to {\it Planck}.

927: We remind the reader that spectral resolution is the only thing that is

928: significantly compromised with HD;  Fig.~\ref{fig:weight} shows the total

929: weight from each band is within a few percent of optimal.

930:

931: \section{Comparison with other methods}

932:

933: The HD method has many advantages over other fast, approximate methods.

934: Perhaps the {\it chief} advantage is its ability to handle maps with

935: correlated noise.  Its main disadvantage is spectral resolution.  To

936: understand better these competitive advantages/disadvantages it is worth

937: spending some time discussing these other methods---especially since we

938: will see they are somewhat complementary and hence a hybrid approach may

939: be useful.

940:

941: This discussion of other methods is facilitated by writing down the

942: following generalization of Eq.~\ref{eqn:quadest}:

943: \be

944: \label{eqn:quadestW}

945: C_l = \sum_{l'} {1\over 2}F^{-1}_{ll'}{\rm Tr}\left[W\left(\Delta

946: \Delta^{\rm T} - N\right)W{\partial C \over \partial C_{l'}}

947: \right]

948: \ee

949: and Eq.~\ref{eqn:fish}:

950: \be

951: \label{eqn:fishW}

952: F_{ll'} = {1\over 2}{\rm Tr}\left[W{\partial C \over \partial C_l}

953: W {\partial C \over \partial C_{l'}}\right].

954: \ee

955: These equations specify a general unbiased quadratic estimator, with pixel

956: pair--weighting determined by $W$.  The $F_{ll'}$ matrix is derived by

957: demanding that the estimator be unbiased ($\langle C_l^{\rm estimate}

958: \rangle = C_l$).  In general, its inverse is not equal to $\langle \delta

959: C_l \delta C_{l'} \rangle$ which is instead given by

960: \be

961: \label{eqn:realfishW}

962: {\cal F}^{-1}_{ll'} = {1\over 2} F^{-1}_{ll''}F^{-1}_{l'l'''}

963: {\rm Tr}\left[A_{l''}CA_{l'''}C\right]

964: \ee

965: where $A_l \equiv W{\partial C \over \partial C_l }W$, similar to

966: Eq.~\ref{eqn:defA}.

967:

968: For the minimum--variance estimator, $W = C^{-1}$.  The

969: ``correlation--function'' approach (CF) of \cite{szapudi00} uses the

970: simpler $W=I$ in pixel space \cite{endnote1}.  Spherical--harmonic

971: transforming the map and averaging $|a_{lm}|^2$'s over $m$ uses $W=I$ in

972: spherical--harmonic space.  The multi--scale method we have just described

973: above likewise corresponds to a choice of $W$, although this $W$ is not

974: easily written down.

975:

976: It is worth pointing out that the estimator for CF requires on the order

977: of $N_b^2N^2$ operations where $N_b$ is the number of $\ell$-bands.  One

978: can get rid of the $N_b^2$ factor by rewriting it as an estimator for

979: $C(\theta)$ in fine bins of $\theta$ and then Legendre--transforming the

980: result, as was done in \cite{szapudi00}.  Further computational

981: accelerations are possible by use of KD--tree search techniques which use

982: coarse--graining at large distances \cite{colombi01,moore01}.  In

983: addition, fast spherical harmonic transforms lead to great time--savings

984: in harmonic methods.

985:

986: However, the simplicity of these other choices for $W$ does have

987: drawbacks.  Specifically, high--noise areas and low--noise areas make

988: equal contribution to the estimator.  To date, the success of these

989: methods has only been demonstrated on simulations with homogeneous white

990: noise. The first obvious improvement to CF is to replace

991: $W_{ij}=\delta_{ij}$ with $W_{ij} = 1/\sigma_i^2\delta_{ij}$ (in pixel

992: space) as suggested in \cite{szapudi00}.

993:

994: What is less obvious is how to weight pixel pairs in the presence of

995: correlated noise.  This is where further development of the CF approach is

996: most needed. One possible route to pursue is band--diagonal choices of

997: $W_{ij}$ which capture the spatially--local noise correlations.

998: Computation with band--diagonal $W$'s can still be quite fast; they are

999: still order of $N^2$ as long as the bandwidth is less than $\sqrt{N}$.

1000: Perhaps longer--range correlations could be included in some hybrid scheme

1001: of HD and CF.  Here CF (with band--diagonal $W_{ij}$) would be used on the

1002: primary map and then HD would be used to calculate lower--$\ell$ values

1003: which may have been affected by long--range noise correlations.  This

1004: hybrid scheme also has the advantage of complementing HD where its

1005: spectral resolution is lowest \cite{endnote2}.

1006:

1007: Although the calculation of $C_l$ is fast with simple choices for $W$, the

1008: calculation of the error covariance matrix (Eq.~\ref{eqn:realfishW}) is

1009: slow; i.e. the number of operations scales with $N^3$ because of the

1010: matrix multiplications.  One option is to estimate the errors by

1011: Monte--Carlo methods \cite{szapudi00}.  Another is to combine the CF and

1012: HD approaches in yet another way:  use CF as a means to produce an input

1013: power spectrum sufficiently close to the optimal one that only a single

1014: iteration of HD is required.

1015:

1016: \section{Conclusions}

1017:

1018: We have concentrated on developing a fast and reliable method for

1019: calculating power spectra and their uncertainties from maps with $N =

1020: 10^4$ to $10^5$ pixels.  Methods that work in this regime are of immediate

1021: practical importance.  Our tests show very good agreement with exact

1022: methods at the lower end of our $N$ range where the exact analysis is

1023: feasible on a supercomputer.  The HD method is the only existing method

1024: for calculating a power spectrum and its uncertainties from general,

1025: inhomogeneous correlated noise patterns with maps of this size in

1026: reasonable amounts of time \cite{hivon01}.

1027:

1028: We have not tested our method on maps with correlated noise.  But since

1029: noise--correlations are taken into account exactly within each submap, we

1030: expect our method to handle correlated noise effectively, unlike the other

1031: fast methods mentioned above.  These expectations will be put to the test

1032: soon as HD is applied to existing datasets from LDB flights, such as {\it

1033: Archeops\/}\footnote{\texttt{http://www-crtbt.polycnrs-gre.fr/archeops/}}

1034: and {\it TopHat\/}\footnote{\texttt{http://topweb.gsfc.nasa.gov/}}.

1035:

1036: The local nature of the method has some advantages for controlling

1037: contamination of the final power spectrum result.  In the extreme, one can

1038: simply cull submaps with the largest foreground contamination.  Less

1039: drastically, one could down--weight the power spectrum determinations from

1040: submaps according to the suspected level of contamination.

1041:

1042: To summarize, we have developed and investigated an HD method of

1043: power--spectrum estimation.  We have demonstrated that for LDB--size maps

1044: HD is sufficiently fast and insignificantly sub--optimal.  Its main

1045: advantages over other fast methods are its generality (including its

1046: ability to handle correlated noise) and the fact that the power spectrum

1047: uncertainties are calculated directly.  Application to larger maps will

1048: rely on further approximations which we expect to work well but require

1049: further investigation.  The main disadvantage to HD is the degraded

1050: spectral resolution at the smallest angular scales.  The impact of this

1051: degradation on parameter--determination also warrants further

1052: investigation.  The combination of HD with other methods may be fruitful.

1053:

1054: {\it Madcumba}, a Fortran 90 implementation of the HD procedure, will be

1055: made available for public use. Comments and questions should be directed

1056: to O. Dor\'e at {\texttt dore@iap.fr}.

1057:

1058: \acknowledgements

1059: %\vskip 0.2in

1060: %\noindent

1061: O.D. is grateful to the UC Davis Cosmology group for a warm hospitality.

1062: LK is grateful to IAP for the same.  We benefited from conversations with

1063: J. R. Bond, J. Borrill, F. Bouchet, A. Jaffe, R. Stompor, P. Koev, D.

1064: Vibert and R. Teyssier and the computer resources of S. Colombi and NERSC.

1065:

1066: \begin{thebibliography}{ucsc}

1067:

1068: \bibitem{jaffe00} E.g., A. Jaffe et al., Phys. Rev. Lett. {\bf 86}, 3475-3479

1069: (2000).

1070:

1071: \bibitem{forecast} L. Knox,

1072: %``Determination of Inflationary Observables from CMB Anisotropy Experiments'',

1073: Phys. Rev. D48, 3502 (1995);

1074: G. Jungman, M. Kamionkowski, A. Kosowsky, and D. Spergel 1996, Phys.

1075: Rev. D {\bf D54}, 1332 (1996); J. R. Bond, G. Efstathiou, and M. Tegmark

1076: Mon. Not. Roy. Astron. Soc., {\bf 291}, L33 (1997);

1077: D. Eisenstein, W. Hu

1078: and M. Tegmark,

1079: %``Cosmic Complementarity: H 0 and Omega M from

1080: %Combining Cosmic Microwave Background Experiments and Redshift Surveys'',

1081: Astrophys. J. {\bf 504}, 57L (1998).

1082:

1083: \bibitem{bcjk} J.~R. Bond, R. Crittenden, A.~H. Jaffe and L. Knox,

1084: %``Computing Challenges of the Cosmic Microwave Background'',

1085: Computing in Science and Engineering, vol. 1, no. 2, 21 (1999).

1086:

1087: \bibitem{borrill99} J. Borrill, Phys. Rev. D {\bf 59}, 027302 (1999).

1088:

1089: \bibitem{dore01} O. Dor\'e, R. Teyssier, F.R. Bouchet, D. Vibert,

1090: astro-ph/0101112, see also http://ulysse.iap.fr/download/mapcumba

1091:

1092: \bibitem{wandelt} B.D. Wandelt, E. Hivon \& K. G{\'o}rski,

1093: astro-ph/0008111;

1094: %The Pseudo-$C_l$ method: Cosmic microwave background anisotropy

1095: %power spectrum statistics for high precision cosmology

1096: %Submitted to Physical Review D in January 2000

1097: astro-ph/9808292

1098:

1099: \bibitem{szapudi00} I. Szapudi, S. Prunet, D. Pogosyan, A. Szalay and J.R. Bond,

1100: astro-ph/0010256

1101: %       Title: Fast CMB Analyses via Correlation Functions

1102:

1103: \bibitem{oh99} S.P. Oh, D.N. Spergel and G. Hinshaw,

1104: Astrophys. J. {\bf 510}, 551 (1999).

1105:

1106: \bibitem{wandelt01} B. Wandelt 2001, Proceedings of

1107: MPA/MPE/ESO Conference "Mining the Sky", July 31 - August 4, 2000,

1108: Garching, Germany, astro-ph/0012333, astro-ph/0012416

1109:

1110: \bibitem{bjk98} J.R. Bond, A.H. Jaffe \& L. Knox, Phys. Rev. D {\bf 57},

1111: 2117 (1998).

1112:

1113: \bibitem{tegmark97} These equations were independently derived as

1114: the optimal, unbiased quadratic estimator in

1115: M. Tegmark, Phys. Rev. D {\bf 55}, 5895 (1997).

1116: %``How to

1117: %measure CMB power spectra without losing information'', Phys. Rev. D

1118:

1119: \bibitem{endnote5} This can also be understood as the usual

1120: problem of localizing simultaneously in position and momentum:

1121: M. Tegmark,  Mon.Not.Roy.Astron.Soc. {\bf 280}, 299-308 (1996).

1122:

1123: \bibitem{borrill01} J. Borrill and P. Koev 2001, in preparation.

1124:

1125: \bibitem{GoHi98} G{\'o}rski E.K., Hivon E., Wandelt B.D.

1126: in proceedings of the MPA/ESO Garching Conference 1998, eds Banday

1127: A.J., Sheth K. and L. Da Costa and

1128: http://www.eso.org/~kgorski/healpix/

1129:

1130: \bibitem{jaffe01} A. Jaffe, private communication.

1131:

1132: \bibitem{bjk00} J.R. Bond, A.H. Jaffe \& L. Knox, \apj

1133: {\bf 533}, 19 (2000).

1134:

1135: \bibitem{seljak96} U. Seljak \& M. Zaldarriaga, \apj, {\bf 469}, 437, 1996

1136:

1137: \bibitem{endnote4}We

1138: tested the scaling with $n_{\rm proc}$ by running {\it Madcumba} using up

1139: to

1140: $77$ processors.

1141:

1142: \bibitem{endnote3}We remind the reader that application of HD to

1143: megapixel and larger maps requires some way to calculate $N$ for the

1144: sub--maps from the time--ordered data.  This could be accomplished

1145: by the method we briefly described in subsection IID, which will

1146: be described in more detail in \cite{borrill01}.

1147:

1148: \bibitem{endnote1} That

1149: the correlation--function approach can be regarded as a quadratic

1150: estimator with sub--optimal weighting was emphasized in \cite{szapudi00}.

1151:

1152: \bibitem{colombi01} S. Colombi {\it et al.} 2001, in preparation.

1153:

1154: \bibitem{moore01} A. Moore {\it et al.} 2001, Fast Algorithms and

1155: Efficient Statistics: N-point Correlation Functions, Proceedings of

1156: MPA/MPE/ESO Conference "Mining the Sky", July 31 - August 4, 2000,

1157: Garching, Germany, astro-ph/0012333

1158:

1159: \bibitem{endnote2} The speed--up with KD--tree search techniques

1160: will also lead to some spectral resolution degradation.

1161:

1162: \bibitem{hivon01} A possible exception is

1163: a monte--Carlo pseudo-$C_l$ method to be described in

1164: E. Hivon {\it et al.} 2001, in preparation.

1165:

1166: \end{thebibliography}

1167: \end{document}

1168:

1169:

1170: