0809.2794/ms.tex
1: 
2: 
3: %\documentclass[aps,nofootinbib]{revtex4}
4: \documentclass[]{emulateapj}
5: \usepackage{epsfig, natbib, graphicx, color}
6: \usepackage{apjfonts}
7: 
8: \def\black{\color{black}}
9: \def\white{\color{white}}
10: 
11: \input epsf
12: \newcommand{\sfig}[2]{\centerline{ \epsfxsize = #2 \epsfbox{#1} }}
13: 
14: 
15: %--------------------------------------------------
16: %--------------------------------------------------
17: % Cosmology
18: 
19: \newcommand{\OL}{\Omega_\Lambda}
20: \newcommand{\Om}{\Omega_m}
21: 
22: 
23: %--------------------------------------------------
24: %--------------------------------------------------
25: 
26: \newcommand{\Mpc}{\mbox{Mpc}}
27: \newcommand{\hMpc}{h^{-1}\mbox{Mpc}}
28: \newcommand{\msun}{M_\odot}
29: \newcommand{\bm}[1]{\mathbf{#1}}
30: 
31: \newcommand{\ncl}[0]{\bar{n}_{cl}}
32: \newcommand{\nn}{\nonumber}
33: \newcommand{\avgN}{\langle N|m \rangle}
34: \newcommand{\NNm}{\langle N(N-1)|m \rangle}
35: \newcommand{\NNNm}{\langle N(N-1)(N-2)|m \rangle}
36: \newcommand{\avgn}{\langle \bar n \rangle}
37: 
38: \newcommand{\avgNa}{\langle \bar N_a \rangle}
39: 
40: \newcommand{\eval}[0]{\mbox{\Large $\vert$\normalsize}}
41: 
42: \newcommand{\na}{\bar n_a}
43: \newcommand{\nap}{\bar n_{a'}}
44: \newcommand{\ba}{b_a}
45: \newcommand{\bap}{b_{a'}}
46: \newcommand{\psia}{\psi_a}
47: \newcommand{\psiap}{\psi_{a'}}
48: \newcommand{\tpsia}{\tilde\psi_a}
49: \newcommand{\avgpsia}{\langle \psia|m \rangle}
50: \newcommand{\avgpsiap}{\langle \psiap|m' \rangle}
51: \newcommand{\avgsbcga}{\langle\sigma_{BCG}^2|a \rangle}
52: 
53: \newcommand{\Na}{N_a}
54: \newcommand{\Nap}{N_{a'}}
55: 
56: 
57: \newcommand{\sbcg}{\sigma_{BCG}^2}
58: \newcommand{\zetabcg}{\zeta_{BCG}}
59: \newcommand{\zetasat}{\zeta_{sat}}
60: 
61: \newcommand{\cs}{c_{s^2}}
62: 
63: \newcommand{\poa}{p^0_a}
64: \newcommand{\pa}{p_a}
65: \newcommand{\pap}{p_{a'}}
66: \newcommand{\pgala}{p^{gal}_a}
67: 
68: \newcommand{\fa}{f_{a,a'}}
69: 
70: \newcommand{\nm}{ \frac{d\avgn}{dm}}
71: \newcommand{\nmp}{\frac{d\avgn}{dm'}}
72: \newcommand{\avgna}{\langle \na \rangle}
73: \newcommand{\avgnap}{\langle \nap \rangle}
74: 
75: \newcommand{\sa}{s_a^2}
76: \newcommand{\sap}{s_{a'}^2}
77: \newcommand{\avgsa}{\langle s_a^2 \rangle}
78: \newcommand{\avgsigma}{\langle \sigma^2|m\rangle}
79: 
80: \newcommand{\ssat}{\sigma_{sat}^2}
81: 
82: \newcommand{\fsat}{f_{sat}}
83: 
84: \newcommand{\Nt}{R_{T}}
85: \newcommand{\Ng}{N_{\rm gals}}
86: \newcommand{\N}{R_{obs}}
87: \newcommand{\Nmin}{N_{min}}
88: \newcommand{\mmin}{m_{min}}
89: \newcommand{\Nsat}{R_{sat}}
90: 
91: \newcommand{\om}{\Omega_m}
92: \newcommand{\ode}{\Omega_{DE}}
93: \newcommand{\ob}{\Omega_b}
94: 
95: \newcommand{\sA}{\sigma_{15}^2}
96: \newcommand{\NA}{R_{15}}
97: \newcommand{\aN}{{\alpha_N}}
98: \newcommand{\as}{{\alpha_\sigma}}
99: 
100: \newcommand{\avg}[1]{\left\langle #1 \right\rangle}
101: 
102: \newcommand{\Pchi}{P_{\chi^2}}
103: 
104: \newcommand{\pnn}{P(\N|\Nt)}
105: \newcommand{\ps}{P_s(\N|\Nt)}
106: \newcommand{\pn}{P_n(\N|\Nt)}
107: 
108: \newcommand{\corr}{C_{a,a'}}
109: \newcommand{\lk}{{\cal{L}}}
110: 
111: \newcommand{\zh}{z_h}
112: \newcommand{\zhp}{{z_h'}}
113: \newcommand{\zc}{z_c}
114: \newcommand{\bz}{b_z}
115: \newcommand{\zmin}{z_{min}}
116: \newcommand{\zmax}{z_{max}}
117: \newcommand{\rhob}{\rho_b}
118: 
119: \newcommand{\chip}{{\chi'}}
120: 
121: \newcommand{\hatn}{\bm{\hat n}}
122: \newcommand{\hatnp}{\bm{\hat{n}'}}
123: 
124: \newcommand{\gaa}{g_a}
125: \newcommand{\gaap}{g_{a'}}
126: \newcommand{\lmin}{L_{min}}
127: 
128: \newcommand{\Ns}{N_s}
129: 
130: \newcommand{\bn}{\hat \bm{n}}
131: 
132: \newcommand{\bx}{\bm{x}}
133: \newcommand{\bp}{\bm{p}}
134: \newcommand{\bq}{\bm{q}}
135: \newcommand{\br}{\bm{r}}
136: 
137: \newcommand{\LCDM}{\Lambda\mbox{CDM}}
138: \newcommand{\Rmax}{R_{max}}
139: 
140: \newcommand{\xioh}{\xi^{1h}(r|\bx')}
141: \newcommand{\xith}{\xi^{2h}(r|\bx')}
142: 
143: \newcommand{\Rc}{R_c}
144: \newcommand{\LBCG}{L_{BCG}}
145: \newcommand{\Lsat}{L_{sat}}
146: 
147: \newcommand{\kpc}{\mbox{kpc}}
148: 
149: \newcommand{\Lx}{L_X}
150: \newcommand{\Lxa}{\tilde\Lx}
151: 
152: \newcommand{\Var}{\mbox{Var}}
153: 
154: \newcommand{\keV}{\mbox{keV}}
155: 
156: \newcommand{\ergs}{\mbox{ergs}}
157: \newcommand{\erf}{\mbox{erf}}
158: 
159: 
160: \citestyle{aa}
161: \shortauthors{Rozo et al.}
162: \shorttitle{Scatter in the Mass-Richness Relation}
163: 
164: \begin{document}
165: \title{Constraining the Scatter in the Mass-Richness Relation of maxBCG Clusters With Weak Lensing
166: and X-ray Data}
167: \author{Eduardo Rozo\altaffilmark{1}, Eli S. Rykoff\altaffilmark{2}, August Evrard\altaffilmark{3,4,5}, Matthew Becker\altaffilmark{6},
168: Timothy McKay\altaffilmark{3,4,5}, Risa H. Wechsler\altaffilmark{7}, Benjamin P. Koester\altaffilmark{8,9},
169: Jiangang Hao\altaffilmark{3}, Sarah Hansen\altaffilmark{8,9}, 
170: Erin Sheldon\altaffilmark{10}, David Johnston\altaffilmark{11}, James Annis\altaffilmark{12}, Joshua Frieman\altaffilmark{8,9,12}}
171: 
172: \altaffiltext{1}{Center for Cosmology and Astro-Particle Physics (CCAPP), The Ohio State University, Columbus, OH 43210}
173: \altaffiltext{2}{TABASGO Fellow, Physics Department, University of California at Santa Barbara, 2233B Broida Hall, Santa Barbara, CA 93106}
174: \altaffiltext{3}{Physics Department, University of Michigan, Ann Arbor, MI 48109}
175: \altaffiltext{4}{Astronomy Department, Universityof Michigan, AnnArbor, MI 48109}
176: \altaffiltext{5}{Michigan Center for Theoretical Physics, Ann Arbor, MI 48109}
177: \altaffiltext{6}{Department of Physics, The University of Chicago, Chicago, IL 60637}
178: \altaffiltext{7}{Kavli Institute for Particle Astrophysics \& Cosmology,
179:   Physics Department, and Stanford Linear Accelerator Center,
180:   Stanford University, Stanford, CA 94305}
181: \altaffiltext{8}{Department of Astronomy and Astrophysics, The University of Chicago, Chicago, IL 60637}
182: \altaffiltext{9}{Kavli Institute for Cosmological Physics, The University of Chicago, Chicago, IL 60637} 
183: \altaffiltext{10}{Center for Cosmology and Particle Physics, Physics Department, New York University, New York, NY 10003}
184: \altaffiltext{11}{ Jet Propulsion Laboratory, 4800 Oak Grove Drive, Pasadena, CA 91109}
185: \altaffiltext{12}{Fermi National Accelerator Laboratory, P.O. Box500, Batavia, IL 60510}
186: 
187: 
188: \begin{abstract}
189: We measure the logarithmic scatter in mass at fixed richness for clusters in the maxBCG cluster catalog, an
190: optically selected cluster sample drawn from SDSS imaging data.  Our measurement is achieved by demanding
191: consistency between available weak lensing
192: and X-ray measurements of the maxBCG clusters, and the X-ray luminosity--mass relation inferred from the
193: 400d X-ray cluster survey, a flux limited X-ray cluster survey.
194: We find $\sigma_{\ln M|N_{200}}=0.45^{+0.20}_{-0.18}$
195: ($95\%$ CL) at $N_{200}\approx 40$, where $N_{200}$ is the number of red sequence galaxies in
196: a cluster.  As a byproduct of our analysis, we also obtain a constraint
197: on the correlation coefficient between $\ln \Lx$ and $\ln M$ at fixed richness, which is best expressed
198: as a lower limit, $r_{L,M|N} \geq 0.85\ (95\%\ \mbox{CL})$.  This is the first observational constraint
199: placed on a correlation coefficient involving two different cluster mass tracers.
200: We use our results to produce a state of the art estimate of the halo mass function at $z=0.23$ --- the
201: median redshift of the maxBCG cluster sample --- 
202: and find that it is consistent with the WMAP5 cosmology.   Both the mass function data
203: and its covariance matrix are presented.
204: \end{abstract}
205: 
206:  \keywords{galaxies: clusters -- X-rays: galaxies: clusters - cosmology: observation}
207: 
208: 
209: \section{Introduction}
210: 
211: The space density of galaxy clusters as a function of cluster mass is a well-known
212: cosmological probe \citep[see e.g.][]{holderetal01,haimanetal01,rozoetal04,limahu04}, 
213: and ranks among the best observational tools
214: for constraining $\sigma_8$, 
215: the normalization of the matter power spectrum in the low redshift
216: universe \citep[see e.g.][]{frenketal90,henry91,
217: schueckeretal03,gladdersetal07,rozoetal07a}.\footnote{$\sigma_8$ 
218: is formally defined
219: as the variance of the linear matter density averaged over spheres with radius
220: $R=8\ h^{-1}\ \Mpc$.}  The basic idea is this: in the high mass limit,
221: the cluster mass function falls off exponentially with mass, with the fall-off  
222: depending sensitively on the amplitude of the matter density fluctuations.  
223: Observing this exponential cutoff can thus place tight constraints 
224: on $\sigma_8$.
225: In practice, however, the same exponential dependence that makes cluster abundances a powerful
226: cosmological probe also renders it susceptible to an important systematic effect,
227: namely uncertainties in the estimated masses of clusters.  
228: 
229: Because mass is not
230: a direct observable, cluster masses must be determined using observable mass
231: tracers such as X-ray emission, SZ decrements, weak lensing shear, or cluster richness (a measure of the galaxy
232: content of the cluster).   Of course, such mass estimators are noisy,
233: meaning there can be significant scatter between the observable mass tracer
234: and cluster mass.  Since the mass function declines steeply with mass, up-scattering of low mass 
235: systems into high mass bins can result in a significant boost to the number of systems with apparently 
236: high mass \citep{limahu05}.  If this effect is not properly modeled,
237: the value of $\sigma_8$ derived from such a cluster sample will be overestimated.
238: 
239: One approach for dealing with this difficulty is to employ mass tracers
240: that have minimal scatter, thereby reducing the impact of
241: said scatter on the recovered halo mass function. 
242: For instance, \citet{kravtsovetal06} introduced a new X-ray mass estimator, $Y_X=M_{gas}T_X$,
243: which in their simulations exhibits an intrinsic scatter of only $\approx 8\%$, independent
244: of the dynamical state of the cluster. 
245: Use of a mass estimator with such low scatter should lead to 
246: improved estimates of $\sigma_8$ from X-ray cluster surveys  \citep{pierpaolietal01,rb02,schueckeretal03,
247: henry04,staneketal06}.
248: 
249: Such tightly-correlated mass tracers are not always available.
250: In such cases, determination of the scatter in the mass-observable relation 
251: is critical to accurately inferring the mass function and thereby determining cosmological parameters.
252: Of course, in practice, it is impossible to determine this scatter to arbitrary
253: accuracy, but since the systematic boost to the mass function is
254: proportional to the square of the scatter \citep{limahu05} (i.e. the variance), even moderate constraints
255: on the scatter can result in tight $\sigma_8$ constraints.
256: 
257: In this paper, we use optical and X-ray observations to constrain the scatter in the mass--richness 
258: relation for the
259: maxBCG cluster catalog presented in \citet{koesteretal07a}.
260: Specifically, we use observational constraints on the mean 
261: mass--richness relation, and on the mean and scatter of the $\Lx-$richness relation,
262: to convert independent estimates of the scatter in the $\Lx-M$ relation into estimates
263: of the scatter in the mass--richness relation.  An interesting byproduct
264: of our analysis is a constraint on the correlation coefficient between 
265: mass and X-ray luminosity at fixed richness.  To our knowledge, this is the first
266: time that a correlation coefficient involving multiple cluster mass tracers
267: has been empirically determined.
268: 
269: The layout of the paper is as follows. In section \ref{sec:notation} we lay out the notation and definitions 
270: used throughout the paper.  Section \ref{sec:data} presents the
271: data sets used in our analysis.  In section \ref{sec:rough} we present a pedagogical
272: description of our method for constraining the scatter in the richness-mass relation,
273: while section \ref{sec:formalism} formalizes the argument.  Our results are found
274: in section \ref{sec:results}, and we compare them to previous work in section \ref{sec:other_work}.
275: In section \ref{sec:mf}, we use our result to estimate the halo mass function in the local
276: universe at $z=0.23$, the median redshift of the maxBCG cluster sample, and we demonstrate
277: that our recovered mass function is consistent with the latest cosmological constraints
278: from WMAP \citep{wmap08}.  A detailed cosmological analysis of our results will be presented
279: in a forthcoming paper (Rozo et al., in preparation).  Our summary and conclusions are 
280: presented in section \ref{sec:conclusions}.
281: 
282: \subsection{Notation and Conventions}
283: \label{sec:notation}
284: 
285: We summarize here the notation and conventions
286: employed in this work.
287: Given any three cluster mass tracers (possibly including mass itself) $X,Y,$
288: and $Z$, we make the standard assumption that the probability distribution $P(X,Y|Z)$ is 
289: a bivariate lognormal.  The parameters $A_{X|Z}$, $B_{X|Z}$, and $\alpha_{X|Z}$ are defined such that
290: %
291: \begin{eqnarray}
292: \avg{\ln X|Z} & = & A_{X|Z}+\alpha_{X|Z}\ln Z \\
293: \ln \avg{X|Z} & = & B_{X|Z} + \alpha_{X|Z}\ln Z.
294: \end{eqnarray}
295: %
296: Note the slopes of the mean and logarithmic mean are the same, as appropriate
297: for a log-normal distribution.  The scatter in $\ln X$ at fixed $Z$ is denoted $\sigma_{X|Z}$,
298: and the correlation coefficient between $\ln X$ and $\ln Y$ at fixed $Z$ is denoted
299: $r_{X,Y|Z}$.  {\it We emphasize that all quoted scatters are the scatter in the natural logarithm,
300: not in dex.}
301: Note these parameters are simply the elements of the covariance matrix
302: specifying the Gaussian distribution $P(\ln X,\ln Y|\ln Z)$.
303: Under our lognormal assumption for $P(X,Y|Z)$, the parameters $A_{X|Z}$ and 
304: $B_{X|Z}$ are related via
305: %
306: \begin{equation}
307: B_{X|Z} = A_{X|Z}+\frac{1}{2}\sigma_{X|Z}^2.
308: \end{equation}
309: 
310: In this work, the quantities of interest are cluster mass $M$, X-ray luminosity $\Lx$, and 
311: cluster richness $N$.  Unless otherwise specified, cluster mass is defined as 
312: $M_{500c}$, the mass contained within an overdensity of 
313: 500 relative to critical.  $\Lx$ is the total luminosity in the rest-frame $0.5-2.0\ \keV$ band,
314: and $N$ is the maxBCG richness measure $N_{200}$, the number of red sequence galaxies
315: with luminosity above $0.4L_*$ within an aperture such that the mean density within said
316: radius is, on average, $200\Omega_m^{-1}$ times the mean galaxy density assuming $\Omega_m=0.3$.  
317: Likewise, unless otherwise
318: stated all parameters governing the relations between $M$, $\Lx$, and $N$ assume that
319: $M$ is measured in units of $10^{14}\ \msun$, $\Lx$ is measured in units of
320: $10^{43}\ \mathrm{ergs}/\mathrm{s}$, and $N$ is measured in ``units'' of $40$ galaxies.
321: For instance, including units explicitly, the mean relation between
322: cluster mass and richness reads
323: %
324: \begin{equation}
325: \frac{\avg{M|N}}{10^{14}\ \msun} = \exp(B_{M|N}) \left (\frac{N}{40}\right)^{\alpha_{M|N}}.
326: \end{equation}
327: %
328: A Hubble constant parameter $h=0.71$ is assumed through out.\footnote{For
329: other values of $h$, our weak lensing masses scale as $M \propto h^{-1}$ and the X-ray
330: luminosities as $\Lx \propto h^{-2}$.}  In addition, the weak lensing data presented in this
331: analysis assumed a flat $\Lambda$CDM cosmology with $\Omega_m=0.27$.   
332: The recovered mass function has the standard hubble parameter
333: degeneracy.
334: 
335: %-----------------------------------------------------
336: %-----------------------------------------------------
337: %-----------------------------------------------------
338: %-----------------------------------------------------
339: %-----------------------------------------------------
340: %-----------------------------------------------------
341: %-----------------------------------------------------
342: 
343: \section{Data Sets}
344: \label{sec:data}
345: 
346: In this work we use the public maxBCG cluster catalog presented in \citet{koesteretal07a},
347: which is an optically selected volume limited catalog of close to $14,000$ clusters 
348: over the redshift range $z\in[0.1,0.3]$.  These clusters were found
349: in $~7500\ \deg^2$ of imaging data from the Sloan Digital Sky Survey \citep[SDSS,][]{yorketal00}
350: using the maxBCG cluster finding algorithm \citep{koesteretal07}.  This algorithm identifies
351: clusters as overdensities of red sequence galaxies. All clusters are assigned a redshift
352: based on the SDSS photometric data only, and these redshifts are known to be accurate 
353: to within a dispersion $\Delta z \approx 0.01$.  Every cluster is also assigned a richness
354: measure $N_{200}$, which is the number of red sequence galaxies above a luminosity cut
355: of $0.4L_*$ and within a
356: specified scaled aperture, centered on the Brightest Cluster Galaxy (BCG) of
357: each cluster.  Only clusters with $N_{200} \geq 10$ are included in the final catalog.
358: Interested readers are referred to \citet{koesteretal07a} and \citet{koesteretal07} for further 
359: details.  In the interest
360: of economy of notation, from now on we denote the maxBCG richness measure
361: simply as $N$.
362: 
363: The relationship between cluster richness and various well known mass tracers
364: has been studied in large, homogeneous samples, such as 2MASS \citep{daietal07}
365: and SDSS \citep{beckeretal07, 
366: johnstonetal07,rykoffetal08a,mandelbaumetal08}.  Of particular
367: interest to us are the weak lensing measurements of the mean mass as a function of richness, 
368: and the X-ray measurements of the mean and scatter of the
369: X-ray luminosity as a function of richness.   The former analysis has been carried out
370: by \citet{johnstonetal07} based on the weak lensing data presented in
371: \citet{sheldonetal07}, and independently by \citet{mandelbaumetal08b}.
372: In short, \citet{sheldonetal07} stacked maxBCG clusters 
373: within narrow richness bins, and measured the average weak lensing
374: shear profile of the clusters.
375: These shear profiles were turned into surface mass density contrast profiles
376: using the redshift distribution of background sources estimated with the methods
377: of \citet{limaetal08} and the neural net photometric redshift estimators described
378: in \citet{oyaizuetal08}. Then,
379: \citet{johnstonetal07} fit the resulting profiles using a halo model scheme 
380: to obtain tight constraints on the mean mass of maxBCG clusters for 
381: each of the richness bins under consideration.  
382: The \citet{mandelbaumetal08} analysis is very similar in spirit to the one described above.
383: The main differences are the way the source redshift distribution is estimated, and 
384: the details of the model fitting use to recover the masses.  The differences in
385: the results between these two analysis are discussed in appendix \ref{sec:m-n_priors},
386: where we use them to set priors on the mass--richness relation.
387: 
388: The measurement of the mean X-ray luminosity of maxBCG clusters has
389: been carried out by \cite{rykoffetal08a} following an approach similar to that pioneered
390: in \citet{daietal07}.  The necessary X-ray data is readily available from the ROSAT All-Sky
391: Survey \citep[RASS,][]{vogesetal99}.  In short, \cite{rykoffetal08a} stacked
392: the RASS photon maps \citep{vogesetal01} centered on
393: maxBCG clusters in narrow richness bins.  The background subtracted 
394: stacked photon counts within a $750\ h^{-1}\ \kpc$ aperture were used to 
395: estimate the mean X-ray luminosity $\Lx$ in the $0.1-2.4\ \keV$ rest frame
396: of the clusters.  In addition, \cite{rykoffetal08a} measured the scatter in
397: X-ray luminosity at fixed richness by individually measuring $\Lx$ for
398: all maxBCG clusters with $N\geq 30$.  It is worth noting that due to the 
399: shallowness of RASS,  many of the maxBCG clusters are not X-ray 
400: luminous enough to be detected individually.  However, non-detection
401: and upper limits for $\Lx$ for individual systems 
402: were properly taken into consideration using the
403: Bayesian approach detailed in \citet{kelly07}, and the recovered mean
404: X-ray luminosity from this Baysian analysis was fully consistent
405: with the stacked means.
406: 
407: In addition to the data sets above, we use the constraints on the $\Lx-M$ relation from 
408: \citet{vikhlininetal08}.  These constraints are based on the 400d cluster X-ray survey, a flux
409: limited cluster survey based on ROSAT pointed observations with an effective
410: sky coverage of 397 $\deg^2$ \citep{bureninetal07}.  Briefly, \citet{vikhlininetal08} measured 
411: both the total soft band X-ray luminosity and the cluster
412: mass for each cluster in the sample.  X-ray luminosities are estimated from
413: ROSAT data, and measure the luminosity in the rest-fram $0.5-2.0\ \keV$ band,
414: extrapolated to infinity assuming standard $\beta$ profiles.
415: Cluster masses are estimated based
416: on the values of $Y_X$ derived from followup Chandra observations, though they note
417: that the results they obtain using different mass tracers such as X-ray temperature and
418: total gas mass are very similar.  The $M-Y_X$ relation is
419: itself calibrated based on hydro-static mass estimates.   Importantly, \citet{vikhlininetal08}
420: explicitly correct for the Malmquist bias expected for a flux limited cluster sample, so the
421: $\Lx-M$ relation they derive can be interpreted as the relation one would obtain using a
422: mass limited cluster sample.
423: 
424: For this work, we have repeated the analysis in \citet{rykoffetal08a}
425: with a slightly different definition for $\Lx$.  In particular, we measure
426: the X-ray luminosity in the rest-frame $0.5-2.0\ \keV$ band within a $1\ h^{-1} \Mpc$
427: aperture.  The change in band is tailored to match the energy band
428: used by \citet{vikhlininetal08} , which we used to place priors on the 
429: $\Lx-M$ relation.  It is worth noting that \citet{vikhlininetal08} do not
430: use a $1\ h^{-1}\Mpc$ aperture, as we do.  We have, however, carefully
431: calibrated the scaling between our $\Lx$ definition and that of \citet{vikhlininetal08}
432: so as to be able to use their results in our analysis.end
433: A detailed description of  our measurements can be found in appendix \ref{app:lx-n_priors}.
434: 
435: 
436: 
437: %-----------------------------------------------------
438: %-----------------------------------------------------
439: %-----------------------------------------------------
440: %-----------------------------------------------------
441: %-----------------------------------------------------
442: %-----------------------------------------------------
443: %-----------------------------------------------------
444: 
445: 
446: 
447: \section{Relating Cluster Mass, X-ray Luminosity, and Richness}
448: \label{sec:rough}
449: 
450: The problem we are confronted with is the following: we have four pieces of observational
451: data, namely
452: %
453: \begin{itemize}
454: \item The abundance of galaxy clusters as a function of richness.
455: \item The mean relation between cluster richness and mass.
456: \item The mean and variance of the relation between cluster richness and
457: X-ray luminosity.
458: \item The mean and variance of the relation between cluster X-ray luminosity and
459: mass.
460: \end{itemize}
461: %
462: From this data, we wish to determine the scatter in mass
463: at fixed richness for the cluster sample under consideration.
464: 
465: The basic idea behind our analysis is as follows. Consider the
466: probability $P(M,L_X|N)$, which we take to be Gaussian in $\ln M$ and $\ln \Lx$.  
467: This probability distribution is completely specified
468: by the mean and variance of both $M$ and $\Lx$ at fixed richness, and by the
469: correlation coefficient between $M$ and $\Lx$.   Of these, there are only two
470: quantities that are not already observationally constrained: $\sigma_{M|N}$, 
471: the scatter in mass
472: at fixed richness, and $r_{M,L|N}$, the correlation coefficient between mass and $\Lx$ at fixed richness.
473: 
474: Suppose now that we guessed values for these two quantities, so that
475: the probability distribution $P(M,\Lx|N)$ is fully specified.  Given the 
476: abundance function $n(N)$,
477: we can use $P(M,\Lx|N)$ to randomly assign a mass and an X-ray
478: luminosity to every cluster in the sample.  We can then select a mass
479: limited sub-sample, and measure the corresponding
480: $\Lx-M$ relation, comparing it to the $\Lx-M$ measurement from \citet[][]{vikhlininetal08}.
481: Since the $\Lx-M$ relation
482: we predict depends on our assumptions about $P(M,\Lx|N)$,
483: there should only be a small region in parameters space where
484: our predictions are consistent with independent observational constraints on the
485: $\Lx-M$ relation.
486: 
487: %----------------------------------------------------------------------- 
488: %----------------------------------------------------------------------- 
489: 
490: \begin{figure}[t]
491: \epsscale{1.2}
492: \plotone{f1.eps}
493: \caption{Contours of constant $\Lx-M$ parameters.  For each assumed
494: value of the scatter $\sigma_{M|N}$ and correlation coefficient
495: parameter $r_{M,L|N}$, we predict the amplitude, slope, and scatter
496: of the $\Lx-M$ relation of a mass selected sample of clusters with
497: $M\geq 3\times 10^{14}\ \msun$.
498: Contours of constant amplitude, slope, and scatter are shown with
499: the solid, dashed, and dotted lines respectively.  The thicker lines
500: correspond to the central values of the $\Lx-M$ priors discussed in appendix
501: \ref{app:lx-m_priors} and summarized in Table \ref{tab:priors}, 
502: while the the other two contours enclose
503: the $95\%$ confidence region for each of the parameters.  The second slope contour
504: falls to outside the region of parameter space shown in the figure.
505: The intersection of the three separate regions correspond to acceptable 
506: values for the two unknown parameters $\sigma_{M|N}$ and $r_{M,L|N}$.
507: }
508: \label{fig:contours}
509: \end{figure} 
510: 
511: %----------------------------------------------------------------------- 
512: %----------------------------------------------------------------------- 
513: 
514: Figure \ref{fig:contours} illustrates this idea.  To create the figure, we have
515: set every observed parameter of the distribution $P(M,\Lx|N)$ to the
516: central value of the priors described in appendix \ref{app:priors} and summarized
517: in table \ref{tab:priors}.   We then
518: defined a grid in the two dimensional space spanned by $\sigma_{M|N}$
519: and $r_{M,L|N}$, and carried through the argument described above.
520: The resulting predictions for the amplitude, slope, and scatter of the
521: $\Lx-M$ relation as a function of $\sigma_{M|N}$ and $r_{M,L|N}$
522: are shown in the figure.
523: We plot contours of constant amplitude, slope, and scatter of the $\Lx-M$
524: relation as solid, dashed, and dotted lines respectively.  The thicker curves
525: correspond to the central values of the priors,
526: while thinner curves demark
527: the corresponding $95\%$ confidence limits. 
528: As we can see, all three contours intersect in a finite
529: region of parameter space, indicating good agreement between our weak lensing
530: and X-ray data, and the independent determination of the $\Lx-M$ relation.   
531: Based on Figure \ref{fig:contours}, we expect a detailed analysis should constrain
532: our parameters to
533: $\sigma_{M|N}\approx 0.40$, and $r_{M,L|N}\approx 0.9$.  
534: The rest of this paper is simply a way of formalizing the argument described 
535: above in order to place errors on both $\sigma_{M|N}$ and $r_{M,L|N}$.
536: 
537: 
538: %-----------------------------------------------------
539: %-----------------------------------------------------
540: %-----------------------------------------------------
541: %-----------------------------------------------------
542: %-----------------------------------------------------
543: %-----------------------------------------------------
544: %-----------------------------------------------------
545: 
546: 
547: \section{Formalism}
548: \label{sec:formalism}
549: 
550: We wish to formalize the above argument in order to place quantitative
551: constraints on the scatter in mass at fixed richness.  Details of how we go 
552: about doing so are presented below.  Readers interested only in our results
553: can move directly to section \ref{sec:results}.
554: 
555: \subsection{Likelihood Model}
556: 
557: As we mentioned above,
558: the key point in our analysis is our ability to compute 
559: the amplitude and slope of the mean relation $\avg{\ln \Lx|M}$, 
560: and the scatter about this mean, as a function of our two parameters
561: of interest: the scatter in mass at fixed richness and the correlation 
562: coefficient between $M$ and $\Lx$ at fixed $N$.  
563: Let us define $\bx=\{ A_{L|M},\alpha_{L|M}, \sigma_{L|M} \}$, and let
564: $\bp=\{ \sigma_{M|N}, r_{M,L|N} \}$ denote our parameters of interest.
565: Our predictions for the $\Lx-M$ relation as a function of our parameters
566: of interest can be summarized simply as $\bx(\bp)$.
567: Now, adopting a Bayesian framework, a set of priors on $\bx$ is simply a probability
568: distribution $P_\bx(\bx)$.   Since $\bx$ is a function of $\bp$, the priors immediately define
569: a probability distribution over $\bp$ given by
570: %
571: \begin{equation}
572: P(\bp) = P_\bx(\bx(\bp))\det (\partial \bx/\partial \bp).
573: \end{equation}
574: %
575: Since we know how to compute both $P_\bx(\bx)$ and $\bx(\bp)$,
576: we can find any confidence regions for our parameters of interest.
577: 
578: The problem we are confronted with, however, is slightly more complicated, in that
579: the functions $\bx$ depend not only on $\bp$, but also on additional nuisance parameters $\bq$.
580: Indeed, our predictions for
581: the observable parameters of the $\Lx-M$ relation depend on both the abundance
582: function of clusters and $P(M,\Lx|N)$.  The abundance function can
583: be accurately described by a Schechter function (we explicitly checked a Schechter function is
584: statistically acceptable),
585: %
586: \begin{equation}
587: n(N) \propto N^{-\tau}\exp( - N/N_* ).
588: \label{eq:abundance}
589: \end{equation}
590: %
591: Given a Schechter fit, 
592: our prediction for the $\Lx-M$ relation will also depend on the value
593: of the parameters $\tau$ and $N_*$.  Likewise, the distribution $P(M,\Lx|N)$
594: also depends on the
595: amplitude and slope of the means $\avg{M|N}$ and $\avg{\Lx|N}$, as well as
596: the scatter in $\Lx$ at fixed $N$.  All in all, we have six additional
597: nuisance parameters
598: $\bq=\{ N_*, \tau, B_{M|N},\alpha_{M|N}, A_{L|N}, \alpha_{L|N}, \sigma_{L|N} \}$.
599: Let $\br=\{\bp,\bq\}$ denote the full set of parameters.  The priors
600: from the $\Lx-M$ relation define a probability distribution over $\br$ given by
601: %
602: \begin{equation}
603: P(\br) = P_\bx(\bx(\br))\det (\partial \bx/\partial \br).
604: \end{equation}
605: %
606: Since we have a total of 8 parameters, and only three observables from the $\Lx-M$ relation, 
607: it is obvious that the above likelihood function will result in large degeneracies because
608: the parameters are under-constrained.
609: If one has priors $P_0(\bq)$ in the nuisance parameters, however,
610: the probability distribution $P(\bp)$ in the parameters of interest is given by
611: %
612: \begin{equation}
613: P(\bp) = \int d\bq\ P_0(\bq) P_\bx(\bx(\bp,\bq))\det(\partial\bx/\partial \br).
614: \label{eq:prob}
615: \end{equation}
616: %
617: This equation allows us to compute $P(\bp)$, and therefore place constraints
618: on our parameters of interest.
619: In practice, we will ignore the determinant term in the probability distribution defined
620: in equation \ref{eq:prob}.  This is because the function $\bx(\br)$ is estimated using
621: a Monte Carlo approach, implying that accurate numerical estimates of the 
622: Jacobian $\partial \bx/\partial \br$ would be too computationally intensive to
623: be performed.  Fortunately, the determinant typically introduces only 
624: slight modulations of the likelihood, so we do not expect our results to
625: be adversely affected by this.  
626: 
627: %-----------------------------------------------------
628: %-----------------------------------------------------
629: %-----------------------------------------------------
630: %-----------------------------------------------------
631: %-----------------------------------------------------
632: %-----------------------------------------------------
633: %-----------------------------------------------------
634: 
635: \subsection{Implementation}
636: \label{sec:implementation}
637: 
638: We estimate the probability distribution $P(\bp)$ using a Monte Carlo approach.
639: Ignoring an overall normalization constant and setting 
640: $\det (\partial \bx/\partial \br)=constant$, we have
641: %
642: \begin{equation}
643: \hat P(\bp) = \frac{1}{N_{draws}} \sum_{i=1}^{N_{draws}} P_\bx(\bx(\bp,\bq_i))
644: \label{eq:probestimator}
645: \end{equation}
646: %
647: where $\bq_i$ for $i=1$ through $N_{draws}$ are random draws of the
648: nuisance parameters $\bq_i$, drawn from the prior distribution $P_0(\bq_i)$.
649: We set $N_{draws}=3000$ as our default value (see below for further discussion).
650: 
651: The prior distributions for our nuisance parameters are characterized
652: by a statistical and a systematic error.  The former is modeled as 
653: Gaussian and the latter using a top-hat distribution.  Thus, given a prior
654: of the form
655: %
656: \begin{equation}
657: q=\bar q \pm \sigma_q^{stat} \pm \sigma_q^{sys},
658: \end{equation}
659: %
660: a random draw is obtained by setting
661: %
662: \begin{equation}
663: \bq_i = \bar \bq + \Delta \bq_i^{stat} + \Delta \bq_i^{sys}
664: \end{equation}
665: %
666: where $\Delta \bq_i^{stat}$ is drawn from a Gaussian of zero mean with a covariance matrix
667: defined by the statistical errors, and $\Delta \bq_i^{sys}$ is drawn from a top hat distribution
668: that is non-zero only for $|\Delta q^{sys} | \leq \sigma_q^{sys}$.
669: 
670: The probability distribution $P_\bx(\bx(\bp,\bq))$ used in equation 
671: \ref{eq:probestimator} is the product of the likelihoods $P_{x}(x(\bp,\bq))$
672: for each of the $\Lx-M$ parameters $x \in \bx=\{ A_{L|M}, \alpha_{L|M},\sigma_{L|M} \}$.
673: The probability for each $\Lx-M$ parameter is given by 
674: the convolution of the top-hat and Gaussian distributions defined by the statistical and systematic
675: errors of $x$, so that
676: %
677: \begin{equation}
678: P_{x}(x(\bp,\bq)) = \frac{1}{4\sigma_x^{sys}} [ \erf(x_+) - \erf(x_-) ]
679: \end{equation}
680: %
681: where
682: %
683: \begin{equation}
684: x_\pm = \frac{ \pm \sigma_x^{sys} - ( x(\bp,\bq) - \bar x ) }{\sqrt{2}\sigma_x^{stat}}.
685: \end{equation}
686: %
687: Note that the above equations are appropriate only when the various $\Lx-M$
688: parameters are uncorrelated, so it is important to place the priors at the pivot
689: point of the $\Lx-M$ relation ($M_{pivot}=3.9\times10^{14}\ \msun$).  
690: This explains why Table \ref{tab:priors} quotes 
691: a prior on $A_{L|M}+1.361\alpha_{L|M}+1.5(\sigma_{L|M}^2-0.40^2)$ rather than
692: on $A_{L|M}$ alone.
693: 
694: We also need to specify how the function $\bx(\bp,\bq)$ is evaluated.  We do this
695: using a Monte Carlo approach.  Given $\bp$ and $\bq$, we generate $N_{cl}=10^5$ 
696: mock clusters in the richness range
697: $N\in[10,200]$.
698: We then randomly draw mass and X-ray luminosity values for each of these clusters based on 
699: the distribution $P(M,\Lx|N)$, and select a mass limited subsample of clusters using a mass
700: cut $M\geq M_{min}$ with
701: $M_{min}=3\times 10^{14} \msun$ (the reason for this particular value is explained below).
702: Using a least squares fitting routine, we find the best fit line between
703: $\ln \Lx$ and $\ln M$.  This defines both $A_{L|M}(\bp,\bq)$ and 
704: $\alpha_{L|M}(\bp,\bq)$.  The scatter $\sigma_{L|M}(\bp,\bq)$ is defined
705: as the root mean square fluctuation about the best fit line.  
706: 
707: Using equation \ref{eq:probestimator} and the function $\bx(\bp,\bq)$ defined
708: above, we evaluate the probability distribution $P(\bp)$ along a grid of points in 
709: $\sigma_{M|N}\in[0.2,0.85]$ and $r\in[0.75,1.0]$ with $25$ grid points per axis.
710: A full run of our code then requires we perform $25^2$ Monte Carlo integrals with
711: $N_{draws}=3000$ points in each integration.  Each draw also requires us to
712: evaluate the function $\bx(\bp,\bq)$, which in turn requires generating a 
713: mock catalog with $N_{cl}=10^5$ clusters, so the procedure as a whole
714: is computationally expensive.
715: To increase computational efficiency, for each Monte Carlo evaluation of $P(\bp)$
716: we generate a single cluster catalog that is used to estimate the likelihood 
717: at every grid point.  This correlates the values of $\hat P$ along our grid,
718: but does not otherwise adversely affect our results.
719: 
720: Our Monte Carlo approach requires that
721: both the number of clusters in the random catalogs $N_{cl}$ and the
722: number of times the likelihood function is evaluated $N_{draws}$ is 
723: sufficiently large to achieve convergence.   Our default values for $N_{cl}$ and $N_{draws}$
724: were selected to ensure the recovered likelihood is accurate to
725: within a dispersion of $\sim 1-2\%$ inside high likelihood regions.
726: The error in the recovered likelihood increases with decreasing likelihood,
727: but even in the tails of the distributions our estimates are accurate to about $10\%$.
728: This was explicitly tested by running a coarse grid with our default values for $N_{draws}$ 
729: and $N_{cl}$, and by repeating the analysis with both of these parameters increased by a factor 
730: of two.\footnote{It is worth noting that in order to create Figure \ref{fig:contours},
731: one needs to generate cluster catalogs with $N_{cl}\gtrsim 10^7$ clusters in order
732: for the contours to appear smooth by eye.  However, $N_{cl}=10^5$ is a sufficient number of 
733: clusters for our analysis,
734: since we only require that the noise in the likelihood be much smaller
735: than the width of the priors. Since the latter are quite wide, even relatively noisy estimates
736: of the $\Lx-M$ relation are sufficient for constraining the marginalized distribution.}
737: 
738: Finally, we emphasize that it is necessary to explicitly check whether our results are 
739: sensitive to the $N\geq 10$ cut applied to the maxBCG clusters sample.  In particular,
740: when selecting a mass limited subsample of clusters, we need to ensure that the mass
741: limit $M_{min}$ be sufficiently large that the number of clusters with $N\leq 10$ and
742: $M\geq M_{min}$ is insignificant.  We have explicitly checked that for our adopted
743: low mass cut $M_{min}\geq 3\times 10^{14}\ \msun$ our results are robust to the 
744: richness cut $N\geq 10$ by repeating the analysis in a coarse grid using
745: an $N\geq 8$ richness cut instead.  We find that the likelihood
746: estimates in both cases are in agreement to within the expected accuracy
747: of our Monte Carlo approach.
748: 
749: 
750: %-----------------------------------------------------
751: %-----------------------------------------------------
752: %-----------------------------------------------------
753: %-----------------------------------------------------
754: %-----------------------------------------------------
755: %-----------------------------------------------------
756: %-----------------------------------------------------
757: 
758: 
759: \subsection{Priors}
760: \label{sec:priors}
761: 
762: The priors used in our analysis 
763: are summarized in Table \ref{tab:priors}.  We follow the notation
764: %
765: \begin{equation}
766: q = \bar q \pm \sigma_q^{stat}\ (stat) \pm \sigma_q^{sys}\ (sys)
767: \end{equation}
768: %
769: where $\bar q$ is the central value, $\sigma_q^{stat}$ is the 1$\sigma$ statistical error
770: on the parameter $q$ marginalized over all other parameters, and $\sigma_q^{sys}$
771: is the systematic error.  In all cases,
772: we model statistical errors as Gaussian, and we include known covariances between
773: different parameters.  Systematic errors are assumed to follow top-hat distributions, and
774: the final prior distribution is given by the convolution of these two functions.
775: 
776: \begin{deluxetable}{|c|c|}
777: \tablecaption{Scaling Relation and Cluster Abundance Priors}
778: \startdata
779: \hline
780: \hline
781: Parameter & Prior \\
782: \hline
783: $\ln N_*$ & \hspace{0.0 in} $3.66 \pm 0.10\ (stat) \pm 0.01\ (sys)$\hspace{0.0 in} \\
784: \hline
785: $\tau$ & \hspace{0.0 in} $2.61 \pm 0.06\ (stat)\pm 0.05\ (sys) $ \hspace{0.0 in} \\
786: \hline
787: $B_{M|N}$ & \hspace{0.0 in} $0.95 \pm 0.07\ (stat) \pm 0.10\ (sys)$  \hspace{0.0 in} \\
788: \hline
789: $\alpha_{M|N}$ & \hspace{0.0 in} $1.06 \pm 0.08\ (stat) \pm 0.08\ (sys)$   \hspace{0.0 in} \\
790: \hline
791: $B_{L|N}$ & \hspace{0.0 in} $1.91\pm 0.04\ (stat) \pm 0.09\ (sys)$\hspace{0.0 in} \\
792: \hline
793: $\alpha_{L|N}$ & \hspace{0.0 in} $1.63\pm 0.06\ (stat) \pm 0.05\ (sys)$  \hspace{0.0 in} \\
794: \hline
795: $\sigma_{L|N}$ & \hspace{0.0 in} $0.83\pm 0.03\ (stat) \pm 0.10\ (sys)$ \hspace{0.0 in} \\
796: \hline
797: \hline
798: \hspace{0.05 in} $A_{L|M}+1.361\alpha_{L|M}  +1.5(\sigma_{L|M}^2-0.40^2)  $ \hspace{0.05 in}  
799: 	& \hspace{0.0 in} $2.45 \pm 0.08\ (stat) \pm 0.23\ (sys) $ \hspace{0.0 in} \\
800: \hline
801: $\alpha_{L|M}$ & \hspace{0.0 in} $1.61 \pm 0.14\ (stat)$   \hspace{0.0 in} \\
802: \hline
803: $\sigma_{L|M}$ & \hspace{0.0 in} $0.40 \pm 0.04\ (stat)$  \hspace{0.0 in} 
804: \enddata
805: \tablenotetext{}{Priors on the abundance function parameters ($N_*$ and $\tau$), as well as those
806: from the $M-N$ and $\Lx-N$ relations are not taken directly from any single work in the literature,
807: but are discussed in detail in Appendix \ref{app:priors}.  Priors on the $\Lx-M$ relation are taken
808: from \citet{vikhlininetal08}.  Overall, we believe these priors are fair, that is, they are neither
809: overly optimistic nor overly pessimistic.}
810: \label{tab:priors}
811: \end{deluxetable}
812: 
813: We believe that the priors contained in table \ref{tab:priors} are fair, that is, they are neither
814: overly aggressive nor overly conservative.  A detailed discussion of our priors
815: can be found in appendix \ref{app:priors}.
816: 
817: 
818: %-----------------------------------------------------
819: %-----------------------------------------------------
820: %-----------------------------------------------------
821: %-----------------------------------------------------
822: %-----------------------------------------------------
823: %-----------------------------------------------------
824: %-----------------------------------------------------
825: 
826: 
827: 
828: \section{Results}
829: \label{sec:results}
830: 
831: Figure \ref{fig:lkhd} shows the $68\%$ and $95\%$ probability contours for the parameters
832: $\sigma_{M|N}$ and $r_{M,L|N}$.   The likelihood peak occurs at
833: $\sigma_{M|N}=0.46$ and $r_{M,L|N}=0.90$.   The marginalized means
834: are $\avg{\sigma_{M|N}}=0.45$ and $\avg{r_{M,L|N}}=0.91$.
835: 
836: We wish to determine whether the breadth of the likelihood region in Figure \ref{fig:lkhd}
837: is limited by uncertainties in the scaling relations
838: of maxBCG clusters, or by uncertainties in the $\Lx-M$ relation.  To do so, we repeat
839: our analysis with two new sets of priors: for the first, we use a tight $0.05$ statistical
840: prior on all nuisance parameters, but let the $\Lx-M$ parameters float.  The second set of priors 
841: uses a tight $0.05$ prior on each of the  
842: $\Lx-M$ parameters, but floats all other nuisance parameters with the original priors.
843: We find that using tight priors on our nuisance parameters has negligible impact on 
844: the likelihood regions recovered from our analysis.  
845: On the other hand, the confidence regions obtained with the tight $\Lx-M$ priors, 
846: shown in Figure \ref{fig:lkhd} as dashed curves,
847: are tighter than those derived from our original analysis.
848: Thus, the dominant source of error in our analysis is the uncertainty in the values of the
849: $\Lx-M$ parameters.  This can be easily understood based on Figure \ref{fig:contours}.
850: We can see from the figure that the uncertainty in $r_{M,L|N}$ is largely due to
851: the prior on the scatter in $\Lx$ at fixed $M$, which is already tight and thus does not change
852: between our fiducial prior and our tight priors.   On the other hand, we can see that both
853: the amplitude and slope priors cut-off regions with high scatter.  Tightening
854: these priors excludes a larger section of parameter space, and results in the tighter contours observed
855: in Figure \ref{fig:lkhd}.
856: 
857: 
858: %----------------------------------------------------------------------- 
859: %----------------------------------------------------------------------- 
860: 
861: \begin{figure}[t]
862: \epsscale{1.2}
863: \plotone{f2.eps}
864: \caption{$68\%$ and $95\%$ confidence contours for $\sigma_{M|N}$ and $r_{L,M|N}$. 
865: Solid lines show the results of our analysis.   We find that
866: X-ray luminosity and mass are correlated at fixed richness.  The breadth of
867: the degeneracy region shown above is almost exclusively due to uncertainties
868: in the $\Lx-M$ relation parameters.  Dashed contours demonstrate how our
869: results would improve if the $\Lx-M$ amplitude and slope were known to within
870: an accuracy of $\Delta A_{L|M} = \Delta \alpha_{L|M}=0.05$.}
871: \label{fig:lkhd}
872: \end{figure} 
873: 
874: %----------------------------------------------------------------------- 
875: %----------------------------------------------------------------------- 
876: 
877: Figure \ref{fig:marg} shows the marginalized probability distributions for 
878: $\sigma_{M|N}$ and $r_{M,L|N}$.
879: The solid curves correspond to our original analysis, while the dashed curves illustrate
880: the results one expects assuming our hypothetical tight priors for the $\Lx-M$ relation parameters.
881: We find that the logarithmic scatter in mass at fixed richness
882: and the correlation coefficient between $\ln M$ and $\ln \Lx$ are
883: %
884: \begin{eqnarray}
885: \sigma_{M|N} & = & 0.45^{+0.20}_{-0.18}\ (95\%\ \mbox{CL}) \\
886: r_{L,M|N} & \geq & 0.85\ (95\%\ \mbox{CL}).
887: \end{eqnarray}
888: %
889: Assuming our hypothetical tight $\Lx-M$ priors, the constraints
890: become $\sigma_{M|N} = 0.42^{+0.07}_{-0.09}$ and 
891: $r_{L,M|N} \geq 0.85\ (95\%\ \mbox{CL})$.
892: We emphasize that these latter constraints are only meant as a guide to 
893: the accuracy one could achieve with this method if the $\Lx-M$
894: relation were known to about $5\%$ accuracy.
895: 
896: It is evident from our results that cluster richness is not as effective a
897: mass tracer as X-ray derived masses. Indeed, even total (i.e. not core-core excluded) 
898: X-ray luminosity is a more faithful mass tracer than
899: the adopted richness measure of the maxBCG catalog, as demonstrated both 
900: by the smaller scatter and the very large correlation
901: coefficient. Note that the latter indicates that, at fixed richness, over-luminous
902: clusters are almost guaranteed to also be more massive than average.  This
903: is an important result which forms the basis for a concurrant paper in which we improve our richness
904: estimates by demanding tighter correlations in the $\Lx-$richness relation~\citep{rozoetal08b}.
905: 
906: %----------------------------------------------------------------------- 
907: %----------------------------------------------------------------------- 
908: 
909: \begin{figure}[t]
910: \epsscale{1.2}
911: \plotone{f3a.eps}
912: \plotone{f3b.eps}
913: \caption{Likelihood distributions for $\sigma_{M|N}$ and $r_{M,L|N}$.  The distributions
914: are marginalized over all other parameters.  Solid lines are the results of our analysis,
915: while dashed lines are the results obtained assuming tight priors on the $\Lx-M$ parameters
916: Note the latter set of curves are presented 
917: only to give a sense of how our result would improve with
918: better understanding of the $\Lx-M$ relation.}
919: \label{fig:marg}
920: \end{figure} 
921: 
922: %----------------------------------------------------------------------- 
923: %----------------------------------------------------------------------- 
924: 
925: 
926: %-----------------------------------------------------
927: %-----------------------------------------------------
928: %-----------------------------------------------------
929: %-----------------------------------------------------
930: %-----------------------------------------------------
931: %-----------------------------------------------------
932: %-----------------------------------------------------
933: 
934: 
935: \section{Comparison to Other Work}
936: \label{sec:other_work}
937: 
938: There are not many previous results against which our measurements of scatter in mass 
939: at fixed richness may be compared.
940: One possible reference point is the upper limit based on
941: the error bar in the weak lensing mass estimates of \citet{johnstonetal07}.  More specifically,
942: assuming that the error in $\avg{M|N}$ is entirely due to the intrinsic scatter
943: in $M$ at fixed $N$, it follows that the error in the mass is simply 
944: $\Delta M/\avg{M|N} \approx \Delta \ln M = \sigma_{M|N}/\sqrt{n(N)}$ where $\Delta M$ is the observed error
945: and $n(N)$ is the number of clusters with richness $N$.  
946: For the richest bin, which provides the tightest constraint, 
947: \citet{johnstonetal07} find 
948: $\avg{M}=(8.1\pm 1.3)\times 10^{14}\ \msun$.  The bin contains $n=47$
949: clusters, so an upper limit to the scatter in mass at fixed richness is
950: $\sigma_{M|N} \leq \sqrt{n}(\Delta M/\avg{M})=1.10$.  Figure \ref{fig:marg}
951: shows that our results easily satisfy this upper limit on the scatter.
952: 
953: The only other measurement of the scatter in mass at fixed richness for maxBCG clusters
954: is that found in \citet{beckeretal07}.  These scatter estimates
955: are obtained as follows: first, \citet{beckeretal07} select all maxBCG clusters whose
956: central galaxy has a spectroscopic redshift.  They then bin the clusters in richness,
957: and compute the velocity relative
958: to the BCG of every galaxy member with spectroscopic data.  The recovered velocity
959: distribution of galaxies is found to be non-Gaussian.  Assuming that the velocity distribution
960: of galaxies of halos of fixed mass is exactly Gaussian, and that the observed non-Gaussianity
961: is entirely due to mass-mixing within a richness bin, \citet{beckeretal07} estimate the scatter in mass 
962: at fixed richness based on the observed non-Gaussianity of the velocity distribution.
963: 
964: An updated version of the results from \citet{beckeretal07} can be seen in 
965: Figure \ref{fig:compare}.  The only difference between this plot and the corresponding
966: figure in \citet{beckeretal07} is that here we have made used of the additional spectroscopic
967: data from the SDSS Data Release 6 \citep{dr6}, which results in tighter error bars.  
968: Also shown in the figure as a horizontal
969: band is the $95\%$ confidence region from our analysis.  As we can see, our scatter 
970: estimate appears to be systematically lower than that of \citet{beckeretal07}, a discrepancy
971: first noted in \citet[][more on the relation between our work and theirs below]{rykoffetal08b}.
972: 
973: %----------------------------------------------------------------------- 
974: %----------------------------------------------------------------------- 
975: 
976: \begin{figure}[t]
977: \epsscale{1.2}
978: \plotone{f4.eps}
979: \caption{Comparison of the scatter in mass at fixed richness estimated
980: in this work (solid band) and that of \citet{beckeretal07} (diamonds with
981: error bars).  The dashed band
982: shows how the scatter we measured is expected to be affected by miscentering,
983: which allows us better compare our results to those of \citet{beckeretal07}.
984: We find that, once 
985: miscentering is properly taken into account, the two results appear to be
986: in reasonable agreement.}
987: \label{fig:compare}
988: \end{figure} 
989: 
990: %----------------------------------------------------------------------- 
991: %----------------------------------------------------------------------- 
992: 
993: Such a bias is not entirely unexpected, as we now know that a significant fraction
994: of cluster have their BCGs miss-identified, a problem that was not
995: yet known -- and was therefore unaccounted for -- at the time the 
996: \citet{beckeretal07} results came out.
997: To get a better understanding of how our results and those of \citet{beckeretal07}
998: compare, we can use our results along with the miscentering probability model 
999: from \citet{johnstonetal07} to predict the scatter that \citet{beckeretal07} observed
1000: given this miscentering systematic.  We proceed as follows.  First, we
1001: use our best fit model for the abundance distribution 
1002: to generate a mock catalog with $2\times 10^5$ clusters with $N\geq 10$.
1003: Each of these clusters is assigned a mass by drawing from the $P(M|N)$
1004: distribution defined by the values of $\sigma_{M|N}$ corresponding
1005: to the two $95\%$ confidence limits on $\sigma_{M|N}$.
1006: These assigned masses
1007: are then turned into velocity dispersions using the scaling relation from \citet{evrardetal08}.
1008: 
1009: At this point, we have a cluster catalog where each cluster has a richness and a velocity
1010: dispersion.  If a cluster is miscentered, we expect that in most cases the new center
1011: will be a cluster galaxy.  Assuming this is the case, and that 
1012: BCGs are at rest at the center of a cluster, 
1013: the velocity dispersion of cluster galaxies relative to random satellites will be a
1014: factor of $\sqrt{2}$ high than relative to the BCG.
1015: Using the miscentering model described in \citet{johnstonetal07} for $p(N)$, the probability 
1016: that a cluster of richness $N$ be correctly centered, 
1017: we randomly label clusters as properly centered or miscentered, and boost their 
1018: "observed" velocity dispersion for those clusters labeled as miscentered by the expected amount.
1019: The clusters are assigned a new mass based on their
1020: ``observed'' velocity dispersions, and the corresponding scatter in the 
1021: $M-N$ relation is estimated.  We repeat this procedure $10^3$ times
1022: in order to compute the mean systematic correction due to miscentering.
1023: 
1024: Our predictions for the scatter values observed by \citet{beckeretal07} are
1025: shown in Figure \ref{fig:compare} with dashed lines, and correspond to the $95\%$
1026: confidence interval from our analysis.  
1027: We see that miscentering introduces a richness dependent correction that boosts 
1028: the scatter in the recovered velocity dispersion  and places it in significantly better
1029: agreement with the data from \citet{beckeretal07}.  
1030: 
1031: The agreement with the \citet{beckeretal07} data is an interesting
1032: result.  Perhaps the single most difficult systematic effect that had to be addressed in the
1033: \citet{beckeretal07} analysis is the validity of the assumption that non-Gaussianities in the
1034: velocity distribution of stacked clusters are entirely due to mass-mixing is a valid.  
1035: The reasonable agreement between our results and those of \citet{beckeretal07}
1036: suggests that their assumption is indeed justified, though a robust conclusion will
1037: have to wait until a more detailed analysis is performed, especially given the possibility
1038: of velocity bias of the galaxy population (i.e. if satellite galaxies have a velocity dispersion different
1039: from that of the dark matter).
1040: 
1041: The analysis in this work is also very closely related to that of \citet{rykoffetal08b}.  \citet{rykoffetal08b}
1042: sought to constrain the $\Lx-M$ relation of clusters by fitting the scaling of $\avg{\Lx|N}$ with
1043: $\avg{M|N}$.  However, as recognized in \citet{rykoffetal08b},
1044: in order to fully interpret their result in terms of the traditional definition of the $\Lx-M$ relation, i.e. the mean
1045: X-ray luminosity at fixed mass, one needs to know both the scatter in mass at fixed richness, and the corresponding
1046: correlation coefficient with $\Lx$.  
1047: Given that these two quantities are unknown, but that the $\Lx-M$ relation is already constrained from X-ray surveys, 
1048: it seems reasonable to suggest that a better use of the lensing and X-ray data of maxBCG clusters is to 
1049: use our knowledge of the $\Lx-M$ relation to constrain the scatter in mass at fixed richness and the corresponding
1050: correlation coefficient, as was done in this work.
1051: 
1052: Our work differs from the ideas presented in \citet{rykoffetal08b} in another significant way.   While our analysis
1053: employs only $P(\Lx,M|N)$ and $n(N)$, \citet{rykoffetal08b} used the halo mass function $dn/dM$ and the
1054: probability distribution $P(\Lx,N|M)$ to interpret their measurements.  This has the important drawback that
1055: in doing so, one needs to assume a cosmological model in order to compute the halo mass function, rendering
1056: their interpretation cosmology dependent.  By focusing on the quantities that are directly observable, i.e.
1057: $n(N)$ and $P(\Lx,M|N)$, we are able to avoid this difficulty.   The price we pay for this is that rather than constraining
1058: the scatter in richness at fixed mass, which is the more directly relevant quantity from a cosmological perspective,
1059: we constrain instead the scatter in mass at fixed richness.  While this makes implementing
1060: such a constraint a little more cumbersome in a cosmological analysis, the fact that the constraint itself is cosmology
1061: independent is obviously of paramount importance.  
1062: 
1063: %-----------------------------------------------------
1064: %-----------------------------------------------------
1065: %-----------------------------------------------------
1066: %-----------------------------------------------------
1067: %-----------------------------------------------------
1068: %-----------------------------------------------------
1069: %-----------------------------------------------------
1070: 
1071: \section{Cosmological Consequences}
1072: \label{sec:mf}
1073: 
1074: As mentioned in the introduction, to obtain an unbiased estimate of the halo mass function
1075: based on the observed cluster richness function requires that we understand the scatter between
1076: cluster richness and halo mass.
1077: Given our lognormal assumption, and the fact that the mean
1078: mass--richness relation is already known from weak lensing, our measurement of the scatter in this
1079: scaling relation fully determines the probability distribution $P(M|N)$.  Thus, 
1080: we are now in a position to determine the halo mass function of the local universe 
1081: with the maxBCG cluster catalog.
1082: 
1083: Let us define then
1084: $n_i=n(M_i)$ as the number of halos within a logarithmic mass bin of width
1085: $\Delta \ln M$ centered about $M_i$,
1086: %
1087: \begin{equation}
1088: n_i = \left . \frac{dn}{d\ln M} \right \vert_{M_i} \Delta \ln M.
1089: \end{equation}
1090: %
1091: Given our cluster catalog and $P(M|N)$, we construct an estimator
1092: $\hat n_i$ for $n_i$ by randomly drawing a mass from $P(M|N)$ 
1093: for each halo in the cluster catalog, and then counting the number of halos
1094: within the logarithmic mass bin centered about $M_i$.  
1095: Note that since the mass of each cluster is a random variable, our
1096: mass function estimator $\hat n_i$ is itself a random
1097: variable.  The mean and correlation matrix of $\hat n_i$ can easily be obtained by making multiple
1098: realizations of $\hat n_i$, and averaging the resulting mass functions.
1099: 
1100: In practice, we also need to marginalize our results over uncertainties
1101: in $P(M|N)$ and over uncertainties in the richness function $n(N)$.
1102: To do so, we randomly draw the parameters
1103: $\bx=\{B_{M|N},\alpha_{M|N},\sigma_{M|N}\}$, and then resample of the
1104: cluster richness function to obtain
1105: a new estimate of $n_i$.  The whole procedure is iterated $10^5$ times,
1106: and the mean and covariance matrix of the number counts
1107: in each of our logarithmic mass bins is computed.\footnote{We again checked
1108: explicitly that the mass cut $M_{min}=3\times 10^{14}\ \msun$ is large enough
1109: for our results to be insensitive to the maxBCG richness cut $N\geq 10$.}  
1110: 
1111: %----------------------------------------------------------------------- 
1112: %----------------------------------------------------------------------- 
1113: 
1114: \begin{figure}[t]
1115: \epsscale{1.2}
1116: \plotone{f5.eps}
1117: \caption{The maxBCG mass function.  Cluster counts were converted
1118: to densities assuming $\Omega_m=0.27$ and $h=0.71$, the same cosmology
1119: assumed in the lensing measurements \citep{johnstonetal07}.
1120: The error bars shown are due to
1121: the scatter in the mass--richness relation, and are strongly correlated.
1122: For comparison, we have
1123: also plotted the \citet{tinkeretal08} mass function corresponding to the
1124: WMAP5 $95\%$ confidence region for $\sigma_8$,
1125: $0.724 \leq \sigma_8 \leq 0.868$.  All other parameters are held fixed to 
1126: the central values reported in \citet{wmap08}.  Our data are consistent with
1127: the WMAP5 results, though they might suggest a slightly higher power spectrum
1128: normalization.}
1129: \label{fig:mf}
1130: \end{figure} 
1131: 
1132: %----------------------------------------------------------------------- 
1133: %----------------------------------------------------------------------- 
1134: 
1135: Figure \ref{fig:mf} shows the mass function recovered through our
1136: analysis.  To turn our number counts into a density, we assumed
1137: a WMAP5 cosmology \citep{wmap08}, with $\Omega_m=0.27$
1138: and $h=0.72$, and a photometric redshift error $\Delta z=0.01$ (used for
1139: computing the effective volume of the sample).
1140: The diamonds correspond to our estimated means, and
1141: the error bars are the square root of the diagonal elements of
1142: the correlation matrix.  We emphasize that the error bars are heavily
1143: correlated.  The mean and covariance matrix of the recovered halo mass function
1144: can be found in Appendix \ref{app:mfdata}.
1145: 
1146: Also shown in Figure \ref{fig:mf} with dotted lines are the halo mass functions at
1147: $z=0.23$ predicted by WMAP5 assuming the \citet{tinkeretal08} mass function.
1148: For both curves, we set all cosmological parameters to the central values reported
1149: in \citet{wmap08}, except for $\sigma_8$, which is set to $\sigma_8=0.868$
1150: for the upper curve and $\sigma_8=0.724$ for the lower curve.  These two
1151: values define the $95\%$ confidence interval for $\sigma_8$ in \citet{wmap08}.
1152: As we can see, the mass function recovered from our analysis is fully consistent
1153: with the WMAP5 cosmology, though it seems to push for values of $\sigma_8$ on the
1154: high end of their allowed region.  A detailed cosmological analysis
1155: of our data will be presented in a subsequent paper (Rozo et al, in preparation).
1156: 
1157: 
1158: %-----------------------------------------------------
1159: %-----------------------------------------------------
1160: %-----------------------------------------------------
1161: %-----------------------------------------------------
1162: %-----------------------------------------------------
1163: %-----------------------------------------------------
1164: %-----------------------------------------------------
1165: 
1166: \section{Summary and Conclusions}
1167: \label{sec:conclusions}
1168: 
1169: We have shown that by combining the information in the maxBCG
1170: richness function, the mean richness-mass relation, the
1171: mean and scatter of the $\Lx-$richness relation, and the mean and scatter
1172: of the $\Lx-M$ relation, we can constrain both the scatter in mass
1173: at fixed richness for maxBCG clusters, as well as the correlation coefficient
1174: between mass and $\Lx$ at fixed richness.  We find
1175: %
1176: %
1177: \begin{eqnarray}
1178: \sigma_{M|N} & = & 0.45^{+0.20}_{-0.18}\ (95\%\ \mbox{CL}) \\
1179: r_{L,M|N} & \geq & 0.85\ (95\%\ \mbox{CL}).
1180: \end{eqnarray}
1181: %
1182: These constraints are dominated by uncertainties in the $\Lx-M$ relation, and can be significantly
1183: tightened if our understanding of the $\Lx-M$ relation improves.  We also found our
1184: results are consistent with those presented in \citet{beckeretal07} once miscentering
1185: of maxBCG clusters is taken into account.
1186: 
1187: Our lower limit on the correlation between $M$ and $\Lx$ at fixed richness constitutes the first observational
1188: constraint on a correlation coefficient involving two different halo mass tracers.  Note that the large
1189: correlation between $\Lx$ and $M$ implies that $\Lx$ - even without core exclusion - is a significantly
1190: better mass tracer than the maxBCG richness estimator (i.e. at fixed richness, over-luminous cluster
1191: are nearly always more massive).  This is an important result, which we use in
1192: a concurrent paper to help us define new richness estimators that are better correlated with
1193: cluster mass~\citep{rozoetal08b}.
1194: 
1195: Using our results, and assuming $\Omega_m=0.27$ and $h=0.71$,
1196: we have estimated the halo mass function at $z=0.23$, corresponding to the median redshift of the
1197: cluster sample.  We find that our recovered mass function is in good agreement with 
1198: the mass function predicted by \citet{tinkeretal08} for the WMAP5 cosmology \citep{wmap08}. A
1199: detailed cosmological analysis will be presented in a forthcoming
1200: paper (Rozo et al, in preparation).
1201: 
1202: Our work sheds new light on the interrelationship of bulk properties of massive halos.
1203: We have used weak lensing, X-ray luminosities,
1204: and optical richness estimates to constrain the scatter in the richness-mass relation, which can lead
1205: to improved cosmological constraints.  In principle, one could also turn this question around, and, assuming
1206: cosmology, we could constrain the scatter in the richness-mass relation, which would then allow us
1207: to place constraints on the amplitude, slope, and scatter of the $\Lx-M$ relation.   Such an analysis would
1208: be interesting in that, by doing so, one could compare the predicted amplitude of the $\Lx-M$ relation to
1209: that derived from hydrostatic mass estimates, thereby directly probing the amount of non-thermal pressure
1210: support in galaxy clusters. Note that even though this question can also be directly addressed by comparing
1211: weak lensing and X-ray mass estimates of individual clusters, the analysis suggested here would benefit
1212: from having small uncertainties, whereas projection effects result in rather noisy weak lensing
1213: mass estimates for individual systems.  
1214: 
1215: \acknowledgements The authors would like to thank Alexey Vikhlinin for providing them with 
1216: the full covariance of the $\Lx-M$ parameters from their analysis of the 400d cluster sample.  
1217: ER thanks David Weinberg and Chris Kochanek for useful discussions and their careful reading
1218: of the manuscript.  ESR would like to thank the TABASGO foundation.
1219: TM and JH gratefully acknowledge support from NSF grant AST 0807304 and DoE 
1220: Grant DE-FG02-95ER40899.  AE gratefully acknowledges support from NSF grant 
1221: AST-0708150.   RHW was supported in part by the U.S. Department of Energy under contract
1222: number DE-AC02-76SF00515 and by a Terman Fellowship at Stanford
1223: University.
1224: This project was made possible by workshop support from the Michigan Center for Theoretical Physics.
1225: 
1226: %\bibliographystyle{apj}
1227: %\bibliography{mybib}
1228: \begin{thebibliography}{53}
1229: \expandafter\ifx\csname natexlab\endcsname\relax\def\natexlab#1{#1}\fi
1230: 
1231: \bibitem[{{Adelman-McCarthy} {et~al.}(2008){Adelman-McCarthy}, {Ag{\"u}eros},
1232:   {Allam}, {Allende Prieto}, {Anderson}, {Anderson}, {Annis}, {Bahcall},
1233:   {Bailer-Jones}, {Baldry}, {Barentine}, {Bassett}, {Becker}, {Beers}, {Bell},
1234:   {Berlind}, {Bernardi}, {Blanton}, {Bochanski}, {Boroski}, {Brinchmann},
1235:   {Brinkmann}, {Brunner}, {Budav{\'a}ri}, {Carliles}, {Carr}, {Castander},
1236:   {Cinabro}, {Cool}, {Covey}, {Csabai}, {Cunha}, {Davenport}, {Dilday}, {Doi},
1237:   {Eisenstein}, {Evans}, {Fan}, {Finkbeiner}, {Friedman}, {Frieman},
1238:   {Fukugita}, {G{\"a}nsicke}, {Gates}, {Gillespie}, {Glazebrook}, {Gray},
1239:   {Grebel}, {Gunn}, {Gurbani}, {Hall}, {Harding}, {Harvanek}, {Hawley},
1240:   {Hayes}, {Heckman}, {Hendry}, {Hindsley}, {Hirata}, {Hogan}, {Hogg}, {Hyde},
1241:   {Ichikawa}, {Ivezi{\'c}}, {Jester}, {Johnson}, {Jorgensen}, {Juri{\'c}},
1242:   {Kent}, {Kessler}, {Kleinman}, {Knapp}, {Kron}, {Krzesinski}, {Kuropatkin},
1243:   {Lamb}, {Lampeitl}, {Lebedeva}, {Lee}, {Leger}, {L{\'e}pine}, {Lima}, {Lin},
1244:   {Long}, {Loomis}, {Loveday}, {Lupton}, {Malanushenko}, {Malanushenko},
1245:   {Mandelbaum}, {Margon}, {Marriner}, {Mart{\'{\i}}nez-Delgado}, {Matsubara},
1246:   {McGehee}, {McKay}, {Meiksin}, {Morrison}, {Munn}, {Nakajima}, {Neilsen},
1247:   {Newberg}, {Nichol}, {Nicinski}, {Nieto-Santisteban}, {Nitta}, {Okamura},
1248:   {Owen}, {Oyaizu}, {Padmanabhan}, {Pan}, {Park}, {Peoples}, {Pier}, {Pope},
1249:   {Purger}, {Raddick}, {Re Fiorentin}, {Richards}, {Richmond}, {Riess}, {Rix},
1250:   {Rockosi}, {Sako}, {Schlegel}, {Schneider}, {Schreiber}, {Schwope}, {Seljak},
1251:   {Sesar}, {Sheldon}, {Shimasaku}, {Sivarani}, {Smith}, {Snedden}, {Steinmetz},
1252:   {Strauss}, {SubbaRao}, {Suto}, {Szalay}, {Szapudi}, {Szkody}, {Tegmark},
1253:   {Thakar}, {Tremonti}, {Tucker}, {Uomoto}, {Vanden Berk}, {Vandenberg},
1254:   {Vidrih}, {Vogeley}, {Voges}, {Vogt}, {Wadadekar}, {Weinberg}, {West},
1255:   {White}, {Wilhite}, {Yanny}, {Yocum}, {York}, {Zehavi}, \& {Zucker}}]{dr6}
1256: {Adelman-McCarthy}, J.~K. {et~al.} 2008, \apjs, 175, 297
1257: 
1258: \bibitem[{{Bardeau} {et~al.}(2005){Bardeau}, {Kneib}, {Czoske}, {Soucail},
1259:   {Smail}, {Ebeling}, \& {Smith}}]{bardeauetal05}
1260: {Bardeau}, S., {Kneib}, J.-P., {Czoske}, O., {Soucail}, G., {Smail}, I.,
1261:   {Ebeling}, H., \& {Smith}, G.~P. 2005, \aap, 434, 433
1262: 
1263: \bibitem[{{Bardeau} {et~al.}(2007{\natexlab{a}}){Bardeau}, {Soucail}, {Kneib},
1264:   {Czoske}, {Ebeling}, {Hudelot}, {Smail}, \& {Smith}}]{bardeauetal07}
1265: {Bardeau}, S., {Soucail}, G., {Kneib}, J.-P., {Czoske}, O., {Ebeling}, H.,
1266:   {Hudelot}, P., {Smail}, I., \& {Smith}, G.~P. 2007{\natexlab{a}}, \aap, 470,
1267:   449
1268: 
1269: \bibitem[{{Bardeau} {et~al.}(2007{\natexlab{b}}){Bardeau}, {Soucail}, {Kneib},
1270:   {Czoske}, {Ebeling}, {Hudelot}, {Smail}, \& {Smith}}]{bardeauetal07b}
1271: ---. 2007{\natexlab{b}}, \aap, 470, 449
1272: 
1273: \bibitem[{{Becker} {et~al.}(2007){Becker}, {McKay}, {Koester}, {Wechsler},
1274:   {Rozo}, {Evrard}, {Johnston}, {Sheldon}, {Annis}, {Lau}, {Nichol}, \&
1275:   {Miller}}]{beckeretal07}
1276: {Becker}, M.~R. {et~al.} 2007, \apj, 669, 905
1277: 
1278: \bibitem[{{Burenin} {et~al.}(2007){Burenin}, {Vikhlinin}, {Hornstrup},
1279:   {Ebeling}, {Quintana}, \& {Mescheryakov}}]{bureninetal07}
1280: {Burenin}, R.~A., {Vikhlinin}, A., {Hornstrup}, A., {Ebeling}, H., {Quintana},
1281:   H., \& {Mescheryakov}, A. 2007, \apjs, 172, 561
1282: 
1283: \bibitem[{{Dahle}(2006)}]{dahle07}
1284: {Dahle}, H. 2006, \apj, 653, 954
1285: 
1286: \bibitem[{{Dai} {et~al.}(2007){Dai}, {Kochanek}, \& {Morgan}}]{daietal07}
1287: {Dai}, X., {Kochanek}, C.~S., \& {Morgan}, N.~D. 2007, \apj, 658, 917
1288: 
1289: \bibitem[{{Dunkley} {et~al.}(2008){Dunkley}, {Komatsu}, {Nolta}, {Spergel},
1290:   {Larson}, {Hinshaw}, {Page}, {Bennett}, {Gold}, {Jarosik}, {Weiland},
1291:   {Halpern}, {Hill}, {Kogut}, {Limon}, {Meyer}, {Tucker}, {Wollack}, \&
1292:   {Wright}}]{wmap08}
1293: {Dunkley}, J. {et~al.} 2008, ArXiv e-prints, 803, astro-ph/0803.0586
1294: 
1295: \bibitem[{{Evrard}(1990)}]{evrard90}
1296: {Evrard}, A.~E. 1990, \apj, 363, 349
1297: 
1298: \bibitem[{{Evrard} {et~al.}(2008){Evrard}, {Bialek}, {Busha}, {White}, {Habib},
1299:   {Heitmann}, {Warren}, {Rasia}, {Tormen}, {Moscardini}, {Power}, {Jenkins},
1300:   {Gao}, {Frenk}, {Springel}, {White}, \& {Diemand}}]{evrardetal08}
1301: {Evrard}, A.~E. {et~al.} 2008, \apj, 672, 122
1302: 
1303: \bibitem[{{Frenk} {et~al.}(1990){Frenk}, {White}, {Efstathiou}, \&
1304:   {Davis}}]{frenketal90}
1305: {Frenk}, C.~S., {White}, S.~D.~M., {Efstathiou}, G., \& {Davis}, M. 1990, \apj,
1306:   351, 10
1307: 
1308: \bibitem[{{Gladders} {et~al.}(2007){Gladders}, {Yee}, {Majumdar}, {Barrientos},
1309:   {Hoekstra}, {Hall}, \& {Infante}}]{gladdersetal07}
1310: {Gladders}, M.~D., {Yee}, H.~K.~C., {Majumdar}, S., {Barrientos}, L.~F.,
1311:   {Hoekstra}, H., {Hall}, P.~B., \& {Infante}, L. 2007, \apj, 655, 128
1312: 
1313: \bibitem[{{Haiman} {et~al.}(2001){Haiman}, {Mohr}, \& {Holder}}]{haimanetal01}
1314: {Haiman}, Z., {Mohr}, J.~J., \& {Holder}, G.~P. 2001, \apj, 553, 545
1315: 
1316: \bibitem[{{Henry}(2004)}]{henry04}
1317: {Henry}, J.~P. 2004, \apj, 609, 603
1318: 
1319: \bibitem[{{Henry} \& {Arnaud}(1991)}]{henry91}
1320: {Henry}, J.~P., \& {Arnaud}, K.~A. 1991, \apj, 372, 410
1321: 
1322: \bibitem[{{Hoekstra}(2007)}]{hoekstra07}
1323: {Hoekstra}, H. 2007, \mnras, 379, 317
1324: 
1325: \bibitem[{{Holder} {et~al.}(2001){Holder}, {Haiman}, \& {Mohr}}]{holderetal01}
1326: {Holder}, G., {Haiman}, Z., \& {Mohr}, J.~J. 2001, \apjl, 560, L111
1327: 
1328: \bibitem[{{Hu} \& {Kravtsov}(2003)}]{hukravtsov03}
1329: {Hu}, W., \& {Kravtsov}, A.~V. 2003, \apj, 584, 702
1330: 
1331: \bibitem[{{Johnston} {et~al.}(2007){Johnston}, {Sheldon}, {Wechsler}, {Rozo},
1332:   {Koester}, {Frieman}, {McKay}, {Evrard}, {Becker}, \&
1333:   {Annis}}]{johnstonetal07}
1334: {Johnston}, D.~E. {et~al.} 2007, ArXiv e-prints, 709, astro-ph/0709.1159
1335: 
1336: \bibitem[{{Kelly}(2007)}]{kelly07}
1337: {Kelly}, B.~C. 2007, \apj, 665, 1489
1338: 
1339: \bibitem[{{Koester} {et~al.}(2007{\natexlab{a}}){Koester}, {McKay}, {Annis},
1340:   {Wechsler}, {Evrard}, {Bleem}, {Becker}, {Johnston}, {Sheldon}, {Nichol},
1341:   {Miller}, {Scranton}, {Bahcall}, {Barentine}, {Brewington}, {Brinkmann},
1342:   {Harvanek}, {Kleinman}, {Krzesinski}, {Long}, {Nitta}, {Schneider},
1343:   {Sneddin}, {Voges}, \& {York}}]{koesteretal07a}
1344: {Koester}, B.~P. {et~al.} 2007{\natexlab{a}}, \apj, 660, 239
1345: 
1346: \bibitem[{{Koester} {et~al.}(2007{\natexlab{b}}){Koester}, {McKay}, {Annis},
1347:   {Wechsler}, {Evrard}, {Rozo}, {Bleem}, {Sheldon}, \&
1348:   {Johnston}}]{koesteretal07}
1349: ---. 2007{\natexlab{b}}, \apj, 660, 221
1350: 
1351: \bibitem[{{Kravtsov} {et~al.}(2006){Kravtsov}, {Vikhlinin}, \&
1352:   {Nagai}}]{kravtsovetal06}
1353: {Kravtsov}, A.~V., {Vikhlinin}, A., \& {Nagai}, D. 2006, \apj, 650, 128
1354: 
1355: \bibitem[{{Lima} {et~al.}(2008){Lima}, {Cunha}, {Oyaizu}, {Frieman}, {Lin}, \&
1356:   {Sheldon}}]{limaetal08}
1357: {Lima}, M., {Cunha}, C.~E., {Oyaizu}, H., {Frieman}, J., {Lin}, H., \&
1358:   {Sheldon}, E.~S. 2008, ArXiv e-prints, 801, astro-ph/0801.3822
1359: 
1360: \bibitem[{{Lima} \& {Hu}(2004)}]{limahu04}
1361: {Lima}, M., \& {Hu}, W. 2004, \prd, 70, 043504
1362: 
1363: \bibitem[{{Lima} \& {Hu}(2005)}]{limahu05}
1364: ---. 2005, \prd, 72, 043006
1365: 
1366: \bibitem[{{Mahdavi} {et~al.}(2008){Mahdavi}, {Hoekstra}, {Babul}, \&
1367:   {Henry}}]{mahdavietal08}
1368: {Mahdavi}, A., {Hoekstra}, H., {Babul}, A., \& {Henry}, J.~P. 2008, \mnras,
1369:   384, 1567
1370: 
1371: \bibitem[{{Mandelbaum} {et~al.}(2008{\natexlab{a}}){Mandelbaum}, {Seljak}, \&
1372:   {Hirata}}]{mandelbaumetal08b}
1373: {Mandelbaum}, R., {Seljak}, U., \& {Hirata}, C.~M. 2008{\natexlab{a}}, ArXiv
1374:   e-prints, 805, astro-ph/0805.2552
1375: 
1376: \bibitem[{{Mandelbaum} {et~al.}(2008{\natexlab{b}}){Mandelbaum}, {Seljak},
1377:   {Hirata}, {Bardelli}, {Bolzonella}, {Bongiorno}, {Carollo}, {Contini},
1378:   {Cunha}, {Garilli}, {Iovino}, {Kampczyk}, {Kneib}, {Knobel}, {Koo},
1379:   {Lamareille}, {Le F{\`e}vre}, {Leborgne}, {Lilly}, {Maier}, {Mainieri},
1380:   {Mignoli}, {Newman}, {Oesch}, {Perez-Montero}, {Ricciardelli}, {Scodeggio},
1381:   {Silverman}, \& {Tasca}}]{mandelbaumetal08}
1382: {Mandelbaum}, R. {et~al.} 2008{\natexlab{b}}, \mnras, 386, 781
1383: 
1384: \bibitem[{{Maughan}(2007)}]{maughan07}
1385: {Maughan}, B.~J. 2007, \apj, 668, 772
1386: 
1387: \bibitem[{{Nagai} {et~al.}(2007){Nagai}, {Vikhlinin}, \&
1388:   {Kravtsov}}]{nagaietal07a}
1389: {Nagai}, D., {Vikhlinin}, A., \& {Kravtsov}, A.~V. 2007, \apj, 655, 98
1390: 
1391: \bibitem[{{Oyaizu} {et~al.}(2008){Oyaizu}, {Lima}, {Cunha}, {Lin}, {Frieman},
1392:   \& {Sheldon}}]{oyaizuetal08}
1393: {Oyaizu}, H., {Lima}, M., {Cunha}, C.~E., {Lin}, H., {Frieman}, J., \&
1394:   {Sheldon}, E.~S. 2008, \apj, 674, 768
1395: 
1396: \bibitem[{{Pierpaoli} {et~al.}(2001){Pierpaoli}, {Scott}, \&
1397:   {White}}]{pierpaolietal01}
1398: {Pierpaoli}, E., {Scott}, D., \& {White}, M. 2001, \mnras, 325, 77
1399: 
1400: \bibitem[{{Rasia} {et~al.}(2006){Rasia}, {Ettori}, {Moscardini}, {Mazzotta},
1401:   {Borgani}, {Dolag}, {Tormen}, {Cheng}, \& {Diaferio}}]{rasiaetal06}
1402: {Rasia}, E. {et~al.} 2006, \mnras, 369, 2013
1403: 
1404: \bibitem[{{Reiprich} \& {B{\"o}hringer}(2002)}]{rb02}
1405: {Reiprich}, T.~H., \& {B{\"o}hringer}, H. 2002, \apj, 567, 716
1406: 
1407: \bibitem[{{Rozo} {et~al.}(2004){Rozo}, {Dodelson}, \& {Frieman}}]{rozoetal04}
1408: {Rozo}, E., {Dodelson}, S., \& {Frieman}, J.~A. 2004, \prd, 70, 083008
1409: 
1410: \bibitem[{{Rozo} {et~al.}(2007{\natexlab{a}}){Rozo}, {Wechsler}, {Koester},
1411:   {Evrard}, \& {McKay}}]{rozoetal07b}
1412: {Rozo}, E., {Wechsler}, R.~H., {Koester}, B.~P., {Evrard}, A.~E., \& {McKay},
1413:   T.~A. 2007{\natexlab{a}}, ArXiv Astrophysics e-prints, astro-ph/0703574
1414: 
1415: \bibitem[{{Rozo} {et~al.}(2007{\natexlab{b}}){Rozo}, {Wechsler}, {Koester},
1416:   {McKay}, {Evrard}, {Johnston}, {Sheldon}, {Annis}, \&
1417:   {Frieman}}]{rozoetal07a}
1418: {Rozo}, E. {et~al.} 2007{\natexlab{b}}, ArXiv Astrophysics e-prints,
1419:   astro-ph/0703571
1420: 
1421: \bibitem[{{Rozo} {et~al.}(2006){Rozo}, {Zentner}, {Bertone}, \&
1422:   {Chen}}]{rozoetal06}
1423: {Rozo}, E., {Zentner}, A.~R., {Bertone}, G., \& {Chen}, J. 2006, \apj, 639, 573
1424: 
1425: \bibitem[{{Rozo} {et~al.}(2008)}]{rozoetal08b}
1426: {Rozo}, E., {et~al.} 2008, ArXiv e-prints, 809, astro-ph/0809.2797
1427: 
1428: \bibitem[{{Rykoff} {et~al.}(2008{\natexlab{a}}){Rykoff}, {Evrard}, {McKay},
1429:   {Becker}, {Johnston}, {Koester}, {Nord}, {Rozo}, {Sheldon}, {Stanek}, \&
1430:   {Wechsler}}]{rykoffetal08b}
1431: {Rykoff}, E.~S. {et~al.} 2008{\natexlab{a}}, \mnras, 387, L28
1432: 
1433: \bibitem[{{Rykoff} {et~al.}(2008{\natexlab{b}}){Rykoff}, {McKay}, {Becker},
1434:   {Evrard}, {Johnston}, {Koester}, {Rozo}, {Sheldon}, \&
1435:   {Wechsler}}]{rykoffetal08a}
1436: ---. 2008{\natexlab{b}}, \apj, 675, 1106
1437: 
1438: \bibitem[{{Schuecker} {et~al.}(2003){Schuecker}, {B{\"o}hringer}, {Collins}, \&
1439:   {Guzzo}}]{schueckeretal03}
1440: {Schuecker}, P., {B{\"o}hringer}, H., {Collins}, C.~A., \& {Guzzo}, L. 2003,
1441:   \aap, 398, 867
1442: 
1443: \bibitem[{{Sheldon} {et~al.}(2007){Sheldon}, {Johnston}, {Scranton}, {Koester},
1444:   {McKay}, {Oyaizu}, {Cunha}, {Lima}, {Lin}, {Frieman}, {Wechsler}, {Annis},
1445:   {Mandelbaum}, {Bahcall}, \& {Fukugita}}]{sheldonetal07}
1446: {Sheldon}, E.~S. {et~al.} 2007, ArXiv e-prints, 709, astro-ph/0709.1153
1447: 
1448: \bibitem[{{Stanek} {et~al.}(2006){Stanek}, {Evrard}, {B{\"o}hringer},
1449:   {Schuecker}, \& {Nord}}]{staneketal06}
1450: {Stanek}, R., {Evrard}, A.~E., {B{\"o}hringer}, H., {Schuecker}, P., \& {Nord},
1451:   B. 2006, \apj, 648, 956
1452: 
1453: \bibitem[{{Tinker} {et~al.}(2008){Tinker}, {Kravtsov}, {Klypin}, {Abazajian},
1454:   {Warren}, {Yepes}, {Gottlober}, \& {Holz}}]{tinkeretal08}
1455: {Tinker}, J.~L., {Kravtsov}, A.~V., {Klypin}, A., {Abazajian}, K., {Warren},
1456:   M.~S., {Yepes}, G., {Gottlober}, S., \& {Holz}, D.~E. 2008, ArXiv e-prints,
1457:   803, astro-ph/0803.2706
1458: 
1459: \bibitem[{{Vikhlinin} {et~al.}(2008){Vikhlinin}, {Burenin}, {Ebeling},
1460:   {Forman}, {Hornstrup}, {Jones}, {Kravtsov}, {Murray}, {Nagai}, {Quintana}, \&
1461:   {Voevodkin}}]{vikhlininetal08}
1462: {Vikhlinin}, A. {et~al.} 2008, ArXiv e-prints, 805, astro-ph/0805.2207
1463: 
1464: \bibitem[{{Voges} {et~al.}(1999){Voges}, {Aschenbach}, {Boller},
1465:   {Br{\"a}uninger}, {Briel}, {Burkert}, {Dennerl}, {Englhauser}, {Gruber},
1466:   {Haberl}, {Hartner}, {Hasinger}, {K{\"u}rster}, {Pfeffermann}, {Pietsch},
1467:   {Predehl}, {Rosso}, {Schmitt}, {Tr{\"u}mper}, \& {Zimmermann}}]{vogesetal99}
1468: {Voges}, W. {et~al.} 1999, \aap, 349, 389
1469: 
1470: \bibitem[{{Voges} {et~al.}(2001){Voges}, {Boller}, {Englhauser}, {Freyberg}, \&
1471:   {Supper}}]{vogesetal01}
1472: {Voges}, W., {Boller}, T., {Englhauser}, J., {Freyberg}, M., \& {Supper}, R.
1473:   2001, in Astronomical Society of the Pacific Conference Series, Vol. 225,
1474:   Virtual Observatories of the Future, ed. R.~J. {Brunner}, S.~G. {Djorgovski},
1475:   \& A.~S. {Szalay}, 234--+
1476: 
1477: \bibitem[{{York} {et~al.}(2000){York}, {Adelman}, {Anderson}, {Anderson},
1478:   {Annis}, \& { the SDSS collaboration}}]{yorketal00}
1479: {York}, D.~G., {Adelman}, J., {Anderson}, J.~E., {Anderson}, S.~F., {Annis},
1480:   J., \& { the SDSS collaboration}. 2000, \aj, 120, 1579
1481: 
1482: \bibitem[{{Zhang} {et~al.}(2007){Zhang}, {Finoguenov}, {B{\"o}hringer},
1483:   {Kneib}, {Smith}, {Czoske}, \& {Soucail}}]{zhangetal07}
1484: {Zhang}, Y.-Y., {Finoguenov}, A., {B{\"o}hringer}, H., {Kneib}, J.-P., {Smith},
1485:   G.~P., {Czoske}, O., \& {Soucail}, G. 2007, \aap, 467, 437
1486: 
1487: \bibitem[{{Zhang} {et~al.}(2008){Zhang}, {Finoguenov}, {B{\"o}hringer},
1488:   {Kneib}, {Smith}, {Kneissl}, {Okabe}, \& {Dahle}}]{zhangetal08}
1489: {Zhang}, Y.-Y., {Finoguenov}, A., {B{\"o}hringer}, H., {Kneib}, J.-P., {Smith},
1490:   G.~P., {Kneissl}, R., {Okabe}, N., \& {Dahle}, H. 2008, \aap, 482, 451
1491: 
1492: \end{thebibliography}
1493: 
1494: 
1495: \newcommand\AAA[3]{{A\& A} {\bf #1}, #2 (#3)}
1496: \newcommand\PhysRep[3]{{Physics Reports} {\bf #1}, #2 (#3)}
1497: \newcommand\ApJ[3]{ {ApJ} {\bf #1}, #2 (#3) }
1498: \newcommand\PhysRevD[3]{ {Phys. Rev. D} {\bf #1}, #2 (#3) }
1499: \newcommand\PhysRevLet[3]{ {Physics Review Letters} {\bf #1}, #2 (#3) }
1500: \newcommand\MNRAS[3]{{MNRAS} {\bf #1}, #2 (#3)}
1501: \newcommand\PhysLet[3]{{Physics Letters} {\bf B#1}, #2 (#3)}
1502: \newcommand\AJ[3]{ {AJ} {\bf #1}, #2 (#3) }
1503: \newcommand\aph{astro-ph/}
1504: \newcommand\AREVAA[3]{{Ann. Rev. A.\& A.} {\bf #1}, #2 (#3)}
1505: 
1506: \appendix
1507: 
1508: 
1509: %-----------------------------------------------------
1510: %-----------------------------------------------------
1511: %-----------------------------------------------------
1512: %-----------------------------------------------------
1513: %-----------------------------------------------------
1514: %-----------------------------------------------------
1515: %-----------------------------------------------------
1516: 
1517: \section{Priors}
1518: \label{app:priors}
1519: 
1520: \subsection{Abundance Priors}
1521: 
1522: Our estimates of the $\Lx-M$ parameters depend on the abundance function
1523: of maxBCG clusters, which is observationally determined, but not known to 
1524: infinite precision.  Here, we fit the observed abundance function using a Schechter
1525: function, such that the mean number of clusters $\mu$ of richness $N$ is 
1526: %
1527: \begin{equation}
1528: \mu(N) = n_0 (N/40)^{-\tau} \exp( - N/N_* ).
1529: \end{equation}
1530: %
1531: The amplitude $n_0$ is chosen such that the total number of clusters exactly
1532: equals the observed number of clusters.  We set this normalization condition
1533: because we are interested only in the shape of the richness function, and not 
1534: in its amplitude.  
1535: 
1536: The fits are done by maximizing the likelihood of the observed distribution,
1537: binned in bins of width $\Delta N=1$.  We assume that the probability
1538: distribution of observed $n$ clusters in a bin of richness $N$ is Poisson,
1539: with
1540: %
1541: \begin{equation}
1542: P(n) = \frac{ \mu(N)^n\exp(-\mu(N)) }{n!}.
1543: \end{equation}
1544: %
1545: For numerical purposes, we cut the distribution at $N_{max}=300$,
1546: which is sufficiently large to not affect our fits.  We emphasize that we use the
1547: above likelihood only to define estimators for $N_*$ and $\tau$, since, as discussed
1548: below, both goodness of fit and errors in the parameter estimation are obtained through
1549: Monte Carlo simulation.
1550: 
1551: The richness distribution is fit over the range $N\geq 10$ by maximizing the
1552: log-likelihood function using an amoeba routine.  To estimate our
1553: errors, we follow a Monte Carlo approach and resample the observed richness 
1554: function $10^4$ times.  We find that the parameters $N_*$ and $\tau$ are significantly
1555: correlated, with the probability distribution being Gaussian in 
1556: $\tau$ and $\ln N_*$.   The best fit parameters are
1557: %
1558: \begin{eqnarray}
1559: \avg{\ln N_*} & = & 3.66\pm 0.10 \\
1560: \avg{\tau} & = & 2.61 \pm 0.06
1561: \end{eqnarray}
1562: %
1563: with a correlation coefficient 
1564: %
1565: \begin{equation}
1566: r_{N_*,\tau} = 0.94.
1567: \end{equation}
1568: 
1569: To assess goodness of fit, we generate $10^4$ mock catalogs
1570: with as many clusters as the real data from the probability distribution specified by
1571: $\avg{\ln N_*}$ and $\avg{\tau}$.  We compute the likelihood for each of these mock catalogs,
1572: and compare the corresponding likelihood distribution to that observed in the real data.
1573: We find that our fit is statistically acceptable.
1574: 
1575: The most significant systematic error affecting our measurements of the shape
1576: of the richness function are completeness and purity variations in the cluster catalog.  
1577: \citet{rozoetal07b} have shown that the maxBCG catalog
1578: is over $90\%$ pure and complete for $N\geq 10$.  Here, we take a conservative approach,
1579: and consider the change in
1580: the best fit parameters assuming the observed counts are rescaled by a completeness/purity
1581: correction factor $\lambda$ given by
1582: %
1583: \begin{equation}
1584: \lambda=\min \{ 0.9+0.1\ln(N/10.0)/\ln(10.0)\}.
1585: \end{equation}
1586: %
1587: This corresponds to a $10\%$ decrease in the observed counts at $N=10$ while
1588: holding the counts at $N=100$ constant.  
1589: Upon refitting the data after this correction we find systematic offsets
1590: %
1591: \begin{eqnarray}
1592: (\Delta \ln N_*)_{sys} & = & 0.01 \\
1593: (\Delta \tau)_{sys} & = & 0.05
1594: \end{eqnarray}
1595: %
1596: which we adopt as our systematic error.  Note the systematic offsets are allowed
1597: to be both positive and negative, since the correction multiplier $\lambda$ above
1598: could easily be larger than unity rather than smaller than unity.
1599: 
1600: 
1601: %-----------------------------------------------------
1602: %-----------------------------------------------------
1603: %-----------------------------------------------------
1604: %-----------------------------------------------------
1605: %-----------------------------------------------------
1606: %-----------------------------------------------------
1607: %-----------------------------------------------------
1608: 
1609: \subsection{$M-N$ Priors}
1610: \label{sec:m-n_priors}
1611: 
1612: Our priors on the $M-N$ relation are based on the results presented in \citet{johnstonetal07},
1613: \citet{mandelbaumetal08}, and \citet{mandelbaumetal08b}.  To assign our priors, we first 
1614: compare the results of these two works as a means of assessing systematic uncertainties
1615: in the mass parameters.  We then focus exclusively on the \citet{johnstonetal07} results
1616: to place our final priors on the $M-N$ relation.  The latter choice reflects the fact that
1617: \citet{johnstonetal07} report weak lensing mass estimates for several mass definitions, among
1618: them $M_{500c}$, the relevant quantity in the $\Lx-M$ relation of \citet{vikhlininetal08}.
1619: 
1620: Let us then begin by discussing the \citet{johnstonetal07} results first.  
1621: While \citet{johnstonetal07} quote a power-law
1622: fit for the mean mass at fixed $\avg{M|N}$, this fit is based a non-public version of the
1623: maxBCG catalog that extends 
1624: to a richness of $N=3$ (the catalog for clusters with $N<10$ is not public).   Since the maxBCG
1625: catalog is only known to be highly complete and pure in the range $N\geq 10$, we have refit
1626: the \citet{johnstonetal07} masses restricting ourselves to the range $N\geq 9$.  This slightly
1627: lower cut is necessary due to the richness binning in \citet{johnstonetal07}.    We find that the
1628: mass $M_{180b}$ within a 180 overdensity threshold relative to mean matter density is 
1629: %
1630: \begin{equation}
1631: \frac{\avg{M_{180b}|N}}{10^{14}\ h^{-1}\msun} = \exp(0.25\pm0.07)(N/20)^{1.18\pm0.09}
1632: \end{equation}
1633: %
1634: with a correlation coefficient $r=-0.43$ between the amplitude and slope parameters.  
1635: 
1636: \citet{mandelbaumetal08b} preformed a similar but independent weak lensing analysis of the
1637: maxBCG clusters, though using $M_{200b}$ as their mass variable.  They find
1638: %
1639: \begin{equation}
1640: \frac{\avg{M_{200b}|N}}{10^{14}\ h^{-1} \msun} = \exp(0.45\pm 0.08)(N/20)^{1.15\pm 0.14}
1641: \end{equation}
1642: %
1643: To compare against the \citet{johnstonetal07} values, we use the \citet{hukravtsov03} mass 
1644: conversion formulae to find an approximate power law relation between $M_{200b}$ and $M_{180b}$
1645: over the range $5\times 10^{14}\ \msun \leq M_{200b} \leq 10^{15}\ \msun$.  We find
1646: $M_{180b} = 1.022 M_{200b}$, which is only a $2\%$ correction.  Applying this correction,
1647: we find that the corresponding $M-N$ parameters from \citet{mandelbaumetal08b} are
1648: %
1649: \begin{equation}
1650: \frac{\avg{M_{180b}|N}}{10^{14}\ h^{-1}\msun} = \exp(0.47\pm0.07)(N/20)^{1.15\pm0.14}
1651: \end{equation}
1652: 
1653: We find that the slopes of the \citet{johnstonetal07} and \cite{mandelbaumetal08b} results are 
1654: nearly identical, but that the masses of \citet{mandelbaumetal08b} are
1655: systematically higher by $\approx 25\%$.  This difference can be traced back to how
1656: the lensing critical surface density for each of the two works is estimated. 
1657: 
1658: In general, lensing masses are proportional to the quantity
1659: $1/\avg{ \Sigma_{crit}^{-1} }$, where $\Sigma_{crit}$ is the lensing critical
1660: surface density, and the average is to be computed over the source redshift
1661: distribution. Given multi-band photometric data $\bm{m}$ for each
1662: galaxy, one way to compute $\avg{\Sigma_{crit}^{-1}}$ is to use a photometric
1663: redshift estimator $z_{photo}(\bm{m})$, and then assume that the true
1664: source redshift distribution is identical to the photometric redshift distribution.
1665: \citet{mandelbaumetal08} have shown that such a simple
1666: approach typically results in biased lensing mass estimates, but they also
1667: demonstrate that it is possible to achieve unbiased results using the probability
1668: distribution $P(z|\bm{m})$.
1669: 
1670: The weak lensing analysis in \citet{sheldonetal07}, on which the results from \citet{johnstonetal07}
1671: are based, falls somewhere in between these two approaches.  While \citet{sheldonetal07} 
1672: does in fact make use of photometric redshifts, they do not simply assume that the source
1673: redshift distribution is identical to the photometric redshift distribution.  Rather, they construct
1674: a probability distribution $P(z|z_{photo})$, and use this probability to 
1675: estimate $\avg{\Sigma_{crit}^{-1}}$.
1676: As it turns out, evaluating $\avg{\Sigma_{crit}^{-1}}$ in this way
1677: leads to results that are nearly identical to those obtained by simply setting $z=z_{photo}$.
1678: Thus, even though the approach used in
1679: \citet{sheldonetal07} is more sophisticated than the simple case considered
1680: in \citet{mandelbaumetal08}, we expect the \citet{sheldonetal07} results to be biased but correctable
1681: as prescribed in \citet{mandelbaumetal08}.
1682: This correction amounts to a boost of the lensing masses by a factor of $1.18\pm 0.02\ (stat) \pm 0.02\ (sys)$.
1683: The statistical error bar in the correction is added in quadrature to the statistical error
1684: bar from our fit, which results in
1685: %
1686: \begin{equation}
1687: \frac{\avg{M_{180b}|N}}{10^{14}\ h^{-1}\msun} = \exp[0.42 \pm 0.07\ (stat) \pm 0.02\ (sys)]\times (N/20)^{1.18\pm 0.09}
1688: \label{eq:johnston_mass}
1689: \end{equation}
1690: %
1691: These new values for the \citet{johnstonetal07} data are in considerably better agreement with those
1692: of \citet{mandelbaumetal08b}.  There remains, however, a systematic $5\%$ difference between the
1693: two amplitudes, as well as a small difference $\Delta \alpha_{M|N}=0.028$ between the two slopes.
1694: 
1695: A possible culprit for this systematic $5\%$ offset is the difference in how miscentering is accounted for
1696: in the data models.
1697: The word miscentering refers to the fact that when finding clusters, one will inevitably find clusters that are
1698: improperly centered, either due to failures of the cluster finding algorithm, or simply because there is
1699: no obvious center of the cluster based on its optical image.  Such offsets between the true and assigned
1700: centers are problematic because if a cluster is miscentered, the corresponding lensing
1701: signal is weakened, resulting in systematically low mass estimates.  
1702: 
1703: To determine whether the remaining offset between \citet{mandelbaumetal08b} and \citet{johnstonetal07}
1704: is consistent with differences in the miscentering model, we refit our data assuming no errors
1705: on the miscentering corrections.  We find
1706: %
1707: \begin{equation}
1708: \avg{M_{180b}|N} = \exp(0.42\pm 0.04)(N/20)^{1.17\pm 0.07}.
1709: \end{equation}
1710: %
1711: with a correlation coefficient $r=-0.15$. Note that these errors are smaller than the errors
1712: quoted before, as they should be, given that this new fit does not marginalize over a wide range
1713: of miscentering models.  By subtracting the two sets of errors in quadrature, we find
1714: that the miscentering priors adopted in \citet{johnstonetal07} correspond to an error
1715: $0.043$ in the amplitude and $0.05$ in the slope.  Thus, the \citet{mandelbaumetal08b}
1716: mass measurements are well within the centering error included in the analysis 
1717: of \citet{johnstonetal07}.
1718: 
1719: Nevertheless, it is unclear whether miscentering can in fact account for the difference between
1720: the \citet{johnstonetal07} and \citet{mandelbaumetal08} results.  More specifically, 
1721: \citet{mandelbaumetal08b} also performed their analysis including the \citet{johnstonetal07}
1722: model for miscentering, and find after applying the centering correction their best fit
1723: $M_{180b}-N$ relation becomes
1724: %
1725: \begin{equation}
1726: \avg{M_{180b}|N} = \exp(0.53\pm 0.07)(N/20)^{1.08\pm 0.14}.
1727: \end{equation}
1728: %
1729: Comparing this to equation \ref{eq:johnston_mass}, we find including a miscentering correction in
1730: the \citet{mandelbaumetal08b} analysis increases the tension between the two results.  Moreover,
1731: it suggests that the difference between the two results is due to some other form of systematic
1732: difference between the two analysis pipelines.  
1733: In light of this, we opt for introducing a systematic correction to the \citet{johnstonetal07}
1734: results of  $+0.06$ and $-0.05$ for the amplitude and slope respectively.  We also introduce 
1735: systematic errors of the same magnitude as this systematic correction, so that our final result is
1736: %
1737: \begin{equation}
1738: \avg{M_{180b}|N} = [\exp(0.48\pm 0.07\ (stat) \pm 0.06\ (sys))](N/20)^{1.13\pm 0.09\ (stat) \pm 0.05\ (sys)}.
1739: \end{equation}
1740: %
1741: Note the central values of the original \citet{johnstonetal07} analysis (corrected for photometric redshift bias)
1742: as well as the \citet{mandelbaumetal08b} results both with and without miscentering corrections are all encompassed
1743: by our systematic error.  
1744: 
1745: Now, in this work we are interested more in the $M_{500c}-N$ (henceforth simply $M-N$) relation than in the 
1746: $M_{200c}-N$ relation, since it is the former mass which is accessible to X-ray studies.  To constrain 
1747: the $M-N$ relation we use the quoted $M_{500c}$ mass measurements from \citet{johnstonetal07}, re-scaling their
1748: $M_{200c}$ errors to $M_{500c}$ by assuming the relative errors are constant.  A fit to the data results in
1749: %
1750: \begin{equation}
1751: \frac{\avg{M|N}}{10^{14} \msun} = \exp[ 0.68 \pm 0.07 ] (N/40)^{1.11 \pm 0.08}
1752: \end{equation}
1753: %
1754: with a correlation coefficient $r=0.45$.  
1755: 
1756: We now boost this expression by factor $1.18$ due to the photometric redshift bias correction, and add the systematic
1757: corrections $+0.06$ and $-0.05$ to the amplitude and slope respectively as per our discussion of the $M_{180b}-N$ relation.
1758: We also include a systematic error on the amplitudes and slopes of this same magnitude.  We obtain
1759: %
1760: \begin{eqnarray}
1761: B_{M|N} & = & 0.91 \pm 0.07\ (stat)\ \pm 0.06\ (sys) \\
1762: \alpha_{M|N} & = & 1.06 \pm 0.08\ (stat) \pm 0.05\ (sys).
1763: \end{eqnarray}
1764: 
1765: The final systematics we consider here are the purity and completeness of the sample.   Now,
1766: as long as the completeness is not correlated with mass, completeness should not in any
1767: way bias the recovered parameters of the $M-N$ relation, though it obviously affects the
1768: error bars due to lower statistics.  
1769: 
1770: The same cannot be said of purity.   If only a fraction $p$ of the clusters are actually
1771: good matches to real halos in the universe, then a fraction $1-p$ of the clusters will have a lensing
1772: signal that is significantly different from the mean signal.  As an extreme case, we can consider
1773: what happens if a fraction $1-p$ of the clusters had no mass associated with them.  In that case,
1774: the observed mean mass is simply $M_{obs}=M_{true}/p$ where $M_{true}$ is the true mean, 
1775: so one should boost the observed masses
1776: by a factor of $1/p$ to obtain an unbiased estimate. For $p=0.9$, this amounts to an 
1777: increase in $B_{M|N}$ of magnitude  $\Delta B_{M|N}= 0.1$. 
1778: Now, \citet{rozoetal06} showed that the purity of the maxBCG cluster sample
1779: is expected to be above $90\%$ over the range or richnesses considered here,
1780: and the increase in $B_{M|N}$ quoted above is undoubtedly an overestimate of the
1781: necessary correction since even false cluster detections will have excess mass
1782: associated with them.  In light of this, we have adopted a one-sided systematic error bar 
1783: $\Delta B_{M|N}=0.08$ to take into account the impact of purity in the recovered $M-N$
1784: relation.   The error bar is one sided since we expect impurities will tend to decrease
1785: the observed mean mass.  We can, however, turn this prior into a normal double-sided
1786: prior by including a systematic correction $\Delta B_{M|N}=0.04$ to the central
1787: value, and setting the systematic error bar to the same magnitude as the
1788: central value shift.
1789: We can also get a rough estimate for the systematic error on the purity 
1790: by assuming that the quoted systematic error in the amplitude should be made only
1791: in the limit of high or low richness.  If that were the case, using the fact the slope is measured
1792: over a decade of richness values, the corresponding slope
1793: would be
1794: %
1795: \begin{equation}
1796: \alpha = \frac{ 1.06\ln(10)+0.08}{\ln(10)} = 1.09
1797: \end{equation}
1798: %
1799: which amounts to a systematic offset $\Delta\alpha=0.03$.  
1800: These systematic error bars are added linearly to our previous systematic error.
1801: Our final set of priors for the $M-N$ relation is
1802: %
1803: \begin{eqnarray}
1804: B_{M|N} & = & 0.95 \pm 0.07\ (stat) \pm 0.10\ (sys) \\
1805: \alpha_{M|N} & = & 1.06 \pm 0.08\ (stat) \pm 0.08\ (sys)
1806: \end{eqnarray}
1807: %
1808: with a correlation coefficient $r=0.45$ between the two statistical errors.
1809: 
1810: 
1811: %-----------------------------------------------------
1812: %-----------------------------------------------------
1813: %-----------------------------------------------------
1814: %-----------------------------------------------------
1815: %-----------------------------------------------------
1816: %-----------------------------------------------------
1817: %-----------------------------------------------------
1818: 
1819: \subsection{$\Lx-N$ Priors}
1820: \label{app:lx-n_priors}
1821: 
1822: The priors in the $\Lx-N$ relation come from repeating the analysis described
1823: in \citet{rykoffetal08a}, but with $\Lx$ defined as the X-ray luminosity in the
1824: 0.5-2.0 keV band, and corrected for aperture effects.  As in
1825: \citet{rykoffetal08a}, we restrict this analysis to clusters with $N \geq
1826: 30$.  We begin by measuring the stacked mean $\Lx-N$ relation and scatter on a
1827: fixed $1\,\hMpc$ scale
1828: %
1829: \begin{eqnarray}
1830: \label{eqn:lxnmean}
1831: B_{L|N} & = & 1.69\pm0.04\,(stat)\\
1832: \alpha_{L|N} & = & 1.63 \pm 0.06\,(stat)\\
1833: \sigma_{L|N} & = & 0.84 \pm 0.03\,(stat)
1834: \end{eqnarray}
1835: %
1836: where we have measured $\Lx$ in units of
1837: $10^{43}\ \mathrm{ergs}\times \mathrm{s}^{-1}$, with a pivot point of
1838: $N = 40$.  We emphasize that the scatter determined above is the total
1839: scatter in the observed $\Lx-N$ relation that cannot be attributed to Poisson
1840: uncertainties in the ROSAT photon counts.  In particular, the quoted scatter is
1841: affected by possible point source contamination, AGN activity, cool cores,
1842: cluster mergers, etc.
1843: 
1844: There are multiple systematic errors that can affect the derived parameters for
1845: the $\Lx-N$ relation.  These include photometric redshift errors,
1846: evolution of the richness parameter $N$, uncorrelated point sources,
1847: cluster mis-centering, and cluster AGN and cool cores.  In addition, we need to
1848: account for the fraction of cluster flux lost due to our finite aperture and
1849: the RASS PSF, in order to compare our results with the luminosity measurements
1850: of \citet{vikhlininetal08}.  We shall now discuss each of these possible
1851: systematic effects.
1852: 
1853: \citet{rykoffetal08a} find that the accuracy of the maxBCG photo-z estimates is
1854: high enough such that any biases are insignificant relative to the statistical
1855: uncertainty of the parameter determinations, and can thus be safely ignored.
1856: However, \citet{rykoffetal08a} did find significant redshift evolution in the
1857: $\Lx-N$ relation, well above the expected self-similar evolution.
1858: Similar redshift evolution is found in \citet{beckeretal07}; the reason for the
1859: systematic undercounting of cluster members at high redshift is explained in \citet{rozoetal08b}.  We have estimated the effect of this redshift evolution on
1860: our derived scatter parameter via a simple Monte Carlo, and confirm that
1861: although the apparent evolution is strong, it is insignificant relative to the
1862: intrinsic scatter.  Therefore, we may also safely ignore this possible
1863: systematic effect.
1864: 
1865: We now take a combined approach to the systematic effects due to cluster
1866: mis-centering, a finite aperture, the RASS PSF and uncorrelated point sources.
1867: The first three effects are strongly related, in that they all tend to scatter
1868: cluster photons out of our initial fixed $1\,\hMpc$ aperture, and these may
1869: affect the normalization, slope, and scatter in the $\Lx-N$ relation.
1870: Uncorrelated point sources should not affect the mean relation because the
1871: large number of stacked sources smooths out the foreground and background.
1872: However, when uncorrelated point sources are aligned with individual clusters
1873: they may increase the measured scatter by boosting the apparent $\Lx$.
1874: 
1875: We have estimated the effects of these systematics by running a Monte Carlo
1876: with simulated RASS data on top of random backgrounds selected from the area of
1877: the RASS photon map that overlaps with the maxBCG mask. 
1878: We first resample the maxBCG richness function 100 times.  Each 
1879: cluster is given a redshift drawn from the maxBCG redshift distribution, as well as a random 
1880: postion on the sky selected from the area of the RASS survey that overlaps with the maxBCG mask.  
1881: After we select the richest 1000 clusters in each realization, each cluster is given a luminosity based 
1882: on the mean relation from Eqn.~\ref{eqn:lxnmean} and an input intrinsic scatter,  
1883: $\sigma_{in} =\{0.0, 0.2, 0.4, 0.6, 0.8, 1.0\}$.  Each cluster luminosity is then converted
1884: to a number of photon counts according to the RASS exposure at the given
1885: point, and scattered by Poisson uncertainties.  Then, each cluster is given a
1886: position offset according to the maxBCG miscentering distribution described in
1887: \citet[][see \S~4.3]{johnstonetal07}.  The cluster profiles are assumed to
1888: follow a $\beta$ model, $S(R) = S_0(1+R^2/R_C^2)^{-3\beta+1/2}$.  To ensure we
1889: are on similar footing as \citet{vikhlininetal08}, we randomly assign each
1890: cluster $\beta$ model parameters uniformly in the range $0.6<\beta<0.7$ and
1891: $0.05<R_C<0.15\,\hMpc$.  Finally, the photons are scattered according to the
1892: RASS PSF, following the method of \citet[][see \S~3.3.1]{rykoffetal08a}.  We
1893: then calculate the stacked mean relation and scatter as described in
1894: \citet{rykoffetal08a}.
1895: 
1896: Figure~\ref{fig:lnsystematics} summarizes the results from our systematic
1897: tests.  The x-axis shows the input intrinsic scatter, $\sigma_{in}$.  The
1898: y-axis shows the ratio of the input parameter to output parameter for the
1899: normalization $B_{L|N}$ (circles), slope $\alpha_{L|N}$ (diamonds), and scatter
1900: $\sigma_{L|N}$ (squares).  We note that when $\sigma_{in}=0.0$ then
1901: $\sigma_{out}=0.31\pm 0.04$, which cannot be displayed on the plot.  This is
1902: consistent with our expectation that uncorrelated sources may boost the observed
1903: scatter, especially with low intrinsic scatter.  Overall, we find that (a) the
1904: slope $\alpha_{L|N}$ is not significantly biased; (b) at moderate to large
1905: scatter ($\sigma_{in} \gtrsim 0.5$) the intrinsic scatter $\sigma_{L|N}$ is not
1906: significantly biased; and (c) the output normalization $B_{L|N}$ must be
1907: boosted by a factor of $1.20\pm0.05$ to account for the flux lost to
1908: miscentering, the finite aperture, and RASS PSF effects.  Our priors become 
1909: then
1910: %
1911: \begin{eqnarray}
1912: B_{L|N} & = & 1.87 \pm 0.04\,(stat) \pm 0.05 \,(sys)\\
1913: \alpha_{L|N} & = & 1.63 \pm 0.06\,(stat) \\
1914: \sigma_{L|N} & = & 0.84 \pm 0.03\,(stat).
1915: \end{eqnarray}
1916: %
1917: 
1918: In addition to these corrections, we also need to take into account systematic uncertainties
1919: due to purity and completeness in the sample.  Just as with the weak lensing mass estimates,
1920: completeness should not affect the measured $\Lx-N$ relation, whereas purity will tend to
1921: suppress the X-ray luminosity at fixed richness.  Following the same procedure as in 
1922: appendix \ref{sec:m-n_priors}, we derive systematic errors $\Delta B_{L|N}=0.04$ and
1923: $\Delta \alpha_{L|N}=0.05$, which we add linearly to our previous systematic error estimates.
1924: Finally, we have repeated our scatter analysis using not just the 1000 richest clusters,
1925: but also the 2000 richest clusters, in which case we find $\sigma_{L|N}=0.95$.  To 
1926: take into account this variation in our analysis, we also introduce a systematic error 
1927: $\Delta \sigma_{L|N}=0.10$.
1928: Our final set of priors is
1929: %
1930: \begin{eqnarray}
1931: B_{L|N} & = & 1.91 \pm 0.04\,(stat) \pm 0.09 \,(sys)\\
1932: \alpha_{L|N} & = & 1.63 \pm 0.06\,(stat)\pm 0.05\,(sys)\\
1933: \sigma_{L|N} & = & 0.84 \pm 0.03\,(stat)\pm 0.10\,(sys).
1934: \end{eqnarray}
1935: %
1936: 
1937: \begin{figure}
1938: \begin{center}
1939: \rotatebox{270}{\scalebox{0.5}{\plotone{f6.eps}}}
1940: \caption{\label{fig:lnsystematics}Results from systematic error Monte Carlo
1941:   tests.  The x-axis shows the input intrinsic scatter, $\sigma_{in}$.  The
1942:   y-axis shows the ratio of the given input parameter to output parameter for
1943:   the normalization $B_{L|N}$ (circles), slope $\alpha_{L|N}$ (diamonds), and
1944:   scatter $\sigma_{L|N}$ (squares).  We note that when $\sigma_{in}=0.0$ then
1945:   $\sigma_{out}=0.31\pm 0.04$, which cannot be displayed on the plot.
1946: }
1947: \end{center}
1948: \end{figure}
1949: 
1950: %-----------------------------------------------------
1951: %-----------------------------------------------------
1952: %-----------------------------------------------------
1953: %-----------------------------------------------------
1954: %-----------------------------------------------------
1955: %-----------------------------------------------------
1956: %-----------------------------------------------------
1957: 
1958: 
1959: \subsection{$\Lx-M$ Priors}
1960: \label{app:lx-m_priors}
1961: 
1962: As discussed in section \ref{sec:rough}, our analysis hinges on the fact
1963: that we can use prior knowledge about the $\Lx-M$ relation 
1964: to constrain the $M-N$ relation.   Here, we use the results of \citet{vikhlininetal08}
1965: to put priors on the $\Lx-M$ relation, which may be summarized as\footnote{We have
1966: included the appropriate evolution correction for a median redshift $z=0.23$, as
1967: appropriate for the maxBCG sample.}
1968: %
1969: \begin{eqnarray}
1970: A_{L|M} + 1.361\alpha_{L|M} +1.5(\sigma_{L|M}^2-0.40^2) & = & 2.59\pm 0.08 \\
1971: \alpha_{L|M} & = & 1.61\pm 0.14 \\
1972: \sigma_{L|M} & = & 0.40 \pm 0.04.
1973: \end{eqnarray}
1974: %
1975: We report a prior on $A_{L|M}+1.361\alpha_{L|M}+1.5(\sigma_{L|M}^2-0.40^2)$ 
1976: because at $M=10^{14}\ \msun$ the $\Lx-M$ parameters derived from the
1977: \citet{bureninetal07} sample are correlated.  To decouple them, one
1978: needs to shift to the statistical pivot point $M=3.9\times 10^{14}\ \msun$ and
1979: introduce the scatter dependence quoted above (Vikhlinin, private communication).
1980: These constraints are derived from Chandra observations of clusters in the
1981: 400d cluster catalog \citep{bureninetal07}, which allowed \citet{vikhlininetal08}
1982: to measure $Y_X$ and thereby infer cluster mass using the $M-Y_X$ relation. 
1983: This relation was itself calibrated on a cluster subsample for which masses were
1984: derived using the standard hydrostatic equilibrium argument.
1985: This last point is very important, since
1986: simulations suggest that hydrostatic mass estimates of clusters may be biased low
1987: by $\approx 10\%-30\%$ \citep[see e.g.][]{evrard90,rasiaetal06,nagaietal07a}.  
1988: One way to calibrate such uncertainties is to
1989: compare weak lensing mass estimates to hydrostatic mass estimates.   There are several
1990: examples of this type of approach.  For instance, 
1991: \citet{vikhlininetal08} have
1992: performed such an analysis using the weak lensing mass estimates of \citet{hoekstra07}, and
1993: find $M_{wl}= ( 1.09 \pm 0.11) M_X.$
1994: A similar analysis has been carried out by \citet{mahdavietal08}, who used the weak lensing
1995: mass estimates of \citet{hoekstra07} and their own analysis of Chandra public data to obtain
1996: $M_{wl}=( 1.28 \pm 0.15) M_X$.  Finally, using XMM X-ray observations
1997: and the weak lensing data of \citet{bardeauetal05}, \citet{bardeauetal07}, and \citet{dahle07},
1998: \citet{zhangetal08} find $M_{wl} = ( 1.21 \pm 0.13 )M_X$.
1999: \citet{zhangetal08} also note, however, that a histogram of $M_{wl}/M_{X}$ peaks
2000: at a ratio of $1.00\pm 0.05$, and that clusters in the tails of the distribution tend to have tight
2001: error bars, possibly biasing the error weighted ratio.
2002: In light of this, we have opted for a ``middle of the road'' approach, and introduce a correction 
2003: factor $1.15\pm 0.15$.  Our corresponding prior is
2004: %
2005: \begin{eqnarray}
2006: A_{L|M} + 1.361\alpha_{L|M}  +1.5(\sigma_{L|M}^2-0.40^2)  & = & 2.45 \pm 0.08\ (stat) \pm 0.23\ (sys) \\
2007: \alpha_{L|M} & = & 1.61\pm 0.14\ (stat) \\
2008: \sigma_{L|M} & = & 0.40 \pm 0.04\ (stat).
2009: \end{eqnarray}
2010: %
2011: 
2012: Estimating systematic errors in $\alpha_{L|M}$ and $\sigma_{L|M}$ is difficult.  For instance,
2013: comparisons with weak lensing masses are not an effective way of assessing systematics 
2014: because weak lensing mass estimates are so noisy: trying to fit a power law relation between
2015: $M_{wl}$ and $M_X$ results in very large errors for the slope of the relation.
2016: 
2017: One alternative is to consider multiple studies of the $\Lx-M$ relation in order to asses how
2018: sensitive the recovered parameters are to the analysis pipeline.  Unfortunately, such an excercise
2019: is far from trivial.  One difficulty is the fact that there is very little agreement on the meaning of
2020: $\Lx$, with many works focusing on core-excised and/or core-corrected bolometric X-ray
2021: lumunisoties \citep[e.g.][]{bardeauetal07b,zhangetal07,zhangetal08}.  Even among those works
2022: that also explore the $\Lx-M$ relation when $\Lx$ is a soft X-ray band luminosity
2023: \citep[e.g.][]{rb02,maughan07}, there are still important differences in the aperture used
2024: to estimate $\Lx$.  In principle, we could attempt to convert between the various definitions
2025: of $\Lx$ to try to compare the works against each other, but many of these $\Lx-M$ measurements
2026: are affected by Malmquist bias, making comparisons to the \citet[][]{vikhlininetal08} results
2027: difficult.
2028: 
2029: One work that does constrain the the soft X-ray band,
2030: non-core excised, Malmquist bias corrected $\Lx-M$ relation is \citet{staneketal06}.
2031: Unfortunately, the energy band they use is slightly different from that of of \citet{vikhlininetal08}, so
2032: even here comparison is not trivial.  We expect, however, that at least the scatter and slopes 
2033: of the $\Lx-M$ relation will not be strongly affected by the minor differences between the two 
2034: $\Lx$ definitions.  Given our purposes, the interesting thing about the \citet{staneketal06}
2035: results is that they use a very different methodology for constraining the $\Lx-M$ relation.
2036: In particular, they assume knowledge of cosmological parameters, and then use the observed
2037: cluster X-ray luminosity function to constrain $P(\Lx|M)$.  Assuming their ``compromise cosmology'',
2038: which they argue gives the best results, they find
2039: $\alpha_{X|M}=1.60\pm0.05$ and $\sigma_{L|M}=0.34\pm0.10$.  These values are in
2040: excellent agreement with those of \citet{vikhlininetal08}, and suggest that placing additional
2041: systematic errors in the $\Lx-M$ parameters is not really necessary at this point.
2042: 
2043: 
2044: 
2045: 
2046: %-----------------------------------------------------
2047: %-----------------------------------------------------
2048: %-----------------------------------------------------
2049: %-----------------------------------------------------
2050: %-----------------------------------------------------
2051: %-----------------------------------------------------
2052: %-----------------------------------------------------
2053: 
2054: \section{Mass Function Data}
2055: \label{app:mfdata}
2056: 
2057: 
2058: Table \ref{tab:mfdata} presents the mean and covariance matrix of the mass function data derived from our
2059: analysis.  These results represent the state of the art mass function measurements at low redshift
2060: from optically derived cluster catalogs.  We emphasize we assumed
2061: $\Omega_m=0.27$ and $h=0.71$, so appropriate rescaling must be applied if the results
2062: are to be compared against significantly different cosmologies.  
2063: Note that the covariance matrix data in table \ref{tab:mfdata} is 
2064: normalized such that the diagonal entries are the fractional error $\sqrt{C_{i,i}}/\avg{n_i}$, while
2065: the off diagonal entries are the correlation coefficients $r_{i,j}=C_{i,j}/\sqrt{C_{i,i}C_{j,j}}$.  We present
2066: the data in this way since it is easier to understand when expressed this way.   The 
2067: actual values for the covariance matrix are easily reconstructed from the data in the table.
2068: 
2069: \begin{deluxetable}{|c|c|lllllllllllllllll|}
2070: \tablewidth{0pt}
2071: \tablecaption{\label{tab:mfdata}maxBCG Mass Function Data}
2072: \startdata
2073: \hline
2074: $M_{500c}$ & $\avg{dn/d\ln M}$ & 3.22 & 3.70 & 4.26 & 4.91 & 5.65 & 6.50 & 7.49 & 8.62 & 9.92 & 11.42 & 13.14 & 15.12 & 17.41 & 20.04 & 23.07 & 26.55 & 30.56 \\
2075: \hline
2076: \hline
2077: 3.22 & 7.90e-7 & 0.22 & 0.82 & 0.77 & 0.72 & 0.66 & 0.61 & 0.55 & 0.50 & 0.44 & 0.39 & 0.35 & 0.30 & 0.25 & 0.21 & 0.18 & 0.15 & 0.12 \\ 
2078: 3.70 & 5.61e-7 & 0.82 & 0.24 & 0.79 & 0.76 & 0.71 & 0.67 & 0.62 & 0.57 & 0.52 & 0.46 & 0.41 & 0.36 & 0.31 & 0.27 & 0.23 & 0.19 & 0.16 \\ 
2079: 4.26 & 3.92e-7 & 0.77 & 0.79 & 0.27 & 0.77 & 0.74 & 0.70 & 0.66 & 0.61 & 0.57 & 0.52 & 0.46 & 0.41 & 0.36 & 0.31 & 0.26 & 0.22 & 0.18 \\ 
2080: 4.91 & 2.70e-7 & 0.72 & 0.76 & 0.77 & 0.30 & 0.75 & 0.72 & 0.68 & 0.64 & 0.59 & 0.55 & 0.50 & 0.44 & 0.38 & 0.34 & 0.29 & 0.24 & 0.20 \\ 
2081: 5.65 & 1.82e-7 & 0.66 & 0.71 & 0.74 & 0.75 & 0.35 & 0.72 & 0.69 & 0.65 & 0.61 & 0.57 & 0.52 & 0.46 & 0.41 & 0.36 & 0.30 & 0.26 & 0.22 \\ 
2082: 6.50 & 1.21e-7 & 0.61 & 0.67 & 0.70 & 0.72 & 0.72 & 0.41 & 0.68 & 0.65 & 0.62 & 0.57 & 0.52 & 0.47 & 0.42 & 0.37 & 0.32 & 0.27 & 0.22 \\ 
2083: 7.49 & 7.93e-8 & 0.55 & 0.62 & 0.66 & 0.68 & 0.69 & 0.68 & 0.47 & 0.64 & 0.60 & 0.57 & 0.52 & 0.47 & 0.42 & 0.37 & 0.32 & 0.27 & 0.23 \\ 
2084: 8.62 & 5.11e-8 & 0.50 & 0.57 & 0.61 & 0.64 & 0.65 & 0.65 & 0.64 & 0.55 & 0.59 & 0.55 & 0.51 & 0.47 & 0.41 & 0.37 & 0.32 & 0.27 & 0.23 \\ 
2085: 9.92 & 3.24e-8 & 0.44 & 0.52 & 0.57 & 0.59 & 0.61 & 0.62 & 0.60 & 0.59 & 0.65 & 0.53 & 0.49 & 0.45 & 0.40 & 0.36 & 0.31 & 0.26 & 0.22 \\ 
2086: 11.42 & 2.03e-8 & 0.39 & 0.46 & 0.52 & 0.55 & 0.57 & 0.57 & 0.57 & 0.55 & 0.53 & 0.76 & 0.47 & 0.43 & 0.38 & 0.34 & 0.30 & 0.25 & 0.21 \\ 
2087: 13.14 & 1.25e-8 & 0.35 & 0.41 & 0.46 & 0.50 & 0.52 & 0.52 & 0.52 & 0.51 & 0.49 & 0.47 & 0.92 & 0.40 & 0.36 & 0.32 & 0.28 & 0.24 & 0.20 \\ 
2088: 15.12 & 7.61e-9 & 0.30 & 0.36 & 0.41 & 0.44 & 0.46 & 0.47 & 0.47 & 0.47 & 0.45 & 0.43 & 0.40 & 1.11 & 0.33 & 0.30 & 0.26 & 0.22 & 0.19 \\ 
2089: 17.41 & 4.53e-9 & 0.25 & 0.31 & 0.36 & 0.38 & 0.41 & 0.42 & 0.42 & 0.41 & 0.40 & 0.38 & 0.36 & 0.33 & 1.36 & 0.26 & 0.24 & 0.20 & 0.17 \\ 
2090: 20.04 & 2.63e-9 & 0.21 & 0.27 & 0.31 & 0.34 & 0.36 & 0.37 & 0.37 & 0.37 & 0.36 & 0.34 & 0.32 & 0.30 & 0.26 & 1.74 & 0.21 & 0.19 & 0.16 \\ 
2091: 23.07 & 1.48e-9 & 0.18 & 0.23 & 0.26 & 0.29 & 0.30 & 0.32 & 0.32 & 0.32 & 0.31 & 0.30 & 0.28 & 0.26 & 0.24 & 0.21 & 2.22 & 0.17 & 0.14 \\ 
2092: 26.55 & 8.29e-10 & 0.15 & 0.19 & 0.22 & 0.24 & 0.26 & 0.27 & 0.27 & 0.27 & 0.26 & 0.25 & 0.24 & 0.22 & 0.20 & 0.19 & 0.17 & 2.88 & 0.12 \\ 
2093: 30.56 & 4.41e-10 & 0.12 & 0.16 & 0.18 & 0.20 & 0.22 & 0.22 & 0.23 & 0.23 & 0.22 & 0.21 & 0.20 & 0.19 & 0.17 & 0.16 & 0.14 & 0.12 & 3.88 
2094: \enddata
2095: \tablenotetext{}{Mean and covariance matrix of the maxBCG mass function.   Masses are defined using an overdensity
2096: of 500 relative to critical, and are measured in units of $10^{14}\ \msun$.  Space densities are measured in
2097: units of $\Mpc^{-3}$.  Diagonal terms in the covariance matrix above are set to $\sqrt{C_{i,i}}/\avg{n_i}$, and thus represent
2098: the fractional uncertainty in the halo space density.   Off diagonal terms contain the correlation coefficient $r_{i,j}=C_{i,j}/\sqrt{C_{i,i}C_{j,j}}$
2099: between the various bins.  The median redshift of the sample is $z=0.23$.}
2100: \end{deluxetable}
2101: 
2102: 
2103: \end{document}
2104: 
2105: