0709.0759/ms.tex
1: \documentclass[12pt,preprint]{aastex}
2: %\documentclass[]{emulateapj}
3: \input epsf
4: \usepackage{graphicx,natbib}
5: \citestyle{aa}
6: %\date{VERSION: 07/05/25}        % Activate to display a given date or no date
7: 
8: \def \half {{\case{1}{2}}}
9: %07/02/10 Elena's corrections to Feng's original version, expanded biblio; to Feng
10: %07/02/16 Feng's corrections viz. Elena's comments, biblio corrections, and some TeX and figure corrections; to Jim
11: %07/02/19 Jim's edits and additiions. Questions for Feng.
12: %07/03/02 Feng's corrections to Jim's revised version. 
13: %07/03/16 Some more Jim corrections--off to Feng to add 3 new figures
14: %07/03/26 Feng's corrections to Risa's comments. 
15: %07/04/04 Jim's small corrections. Still waiting on Risa's simulation paragraph.
16: %07/04/06 Feng's small corrections for several typos and references. 
17: %07/04/15 Jim's small additions, apologies for being so slow; final (?)
18: %07/04/15 Risa's corrections
19: %07/04/17 Feng's corrections
20: %07/04/24 Elena's corrections
21: %07/05/03 Feng's corrections
22: %07/05/25 Jim's reading and corrections, Feng's corrections to figures. Final (?) version
23: 
24: \begin{document}
25: 
26: \title{Optical Cluster-Finding with An Adaptive Matched-Filter Technique: Algorithm 
27: and Comparison with Simulations }
28: \author{Feng Dong \altaffilmark{1}, Elena Pierpaoli \altaffilmark{2}, 
29: James E. Gunn \altaffilmark{3}, Risa H. Wechsler \altaffilmark{4}}
30: \altaffiltext{1}{Department of Astrophysical Sciences, Princeton University, Princeton, 
31: NJ 08544, feng@astro.princeton.edu}
32: %\altaffiltext{2}{Theoretical Astrophysics Section, Caltech, Pasadena, CA 91125, 
33: %pierpa@caltech.edu}
34: \altaffiltext{2}{University of Southern California, Los Angels, CA, 90089-0484, pierpaol@usc.edu}
35: \altaffiltext{3}{Department of Astrophysical Sciences, Princeton University, Princeton, 
36: NJ 08544, jeg@astro.princeton.edu}
37: \altaffiltext{4}{Kavli Institute for Particle Astrophysics and Cosmology, 
38: Physics Department, and
39: Stanford Linear Accelerator Center, 
40: Stanford University,
41: Stanford, CA 94305}
42: 
43: 
44: \begin{abstract}
45: 
46: We present a modified adaptive matched filter algorithm designed to identify clusters of galaxies
47: in wide-field imaging surveys such as the Sloan Digital Sky Survey. The cluster-finding technique 
48: is fully adaptive to imaging surveys with spectroscopic coverage, multicolor photometric redshifts, 
49: no redshift information at all, and any combination of these within one survey. It works with high 
50: efficiency in multi-band imaging surveys where photometric redshifts can be estimated with well-understood 
51: error distributions. Tests of the algorithm on realistic mock SDSS catalogs suggest that the detected 
52: sample is $\sim 85\%$ complete and over $90\%$ pure for clusters with masses above $1.0\times10^{14} h^{-1}$ 
53: M$_\odot$ and redshifts up to $z=0.45$. The errors of estimated cluster redshifts from maximum likelihood 
54: method are shown to be small (typically less that 0.01) over the whole redshift range with photometric 
55: redshift errors typical of those found in the Sloan survey. Inside the spherical radius corresponding 
56: to a galaxy overdensity of $\Delta=200$, we find the derived cluster richness $\Lambda_{200}$ a roughly 
57: linear indicator of its virial mass $M_{200}$, which well recovers the relation between total 
58: luminosity and cluster mass of the input simulation.
59: \end{abstract}
60: 
61: \keywords{cosmology:theory -- galaxies:clusters:general -- large-scale structure of universe}
62: 
63: 
64: 
65: 
66: \section{Introduction} \label{sec:Introduction}
67: 
68: 
69: Clusters of galaxies are the most massive virialized systems in the Universe and have been 
70: extensively used to study galaxy population and evolution \citep{Dre84, Dre92}, to trace 
71: the large-scale structure of the universe \citep{Bah88, Pos92}, and to constrain cosmology 
72: \citep{Evr89, Bah99, Hen00,Pierpa01,Pierpa03}. Given the important roles clusters of galaxies 
73: play in the studies of both astrophysics and cosmology, tremendous efforts have been made 
74: during the past several decades to search for these systems. The first large samples of 
75: clusters were identified by looking for projected galaxy overdensities through visual 
76: inspection of photographic plates \citep{Abe58, Abe89, Zwi68}. These catalogs made pioneering 
77: contributions to our understanding of the extragalactic universe and since their generation 
78: have opened many new frontiers in the studies of galaxy clusters. However, the compilation 
79: of a relatively complete and pure sample of galaxy clusters has remained far from trivial. 
80: To date the Abell catalog, which contains about 4000 rich clusters to a redshift of $z\sim0.2$, 
81: is still the most widely used cluster catalog in the field, though it was realized early that 
82: visually-constructed catalogs suffer from projection effects, subjectivity, and large 
83: uncertainties in estimated properties \citep{Sut88}. It is difficult to apply these catalogs 
84: for statistical studies in cosmology because of these uncertainties, in addition to the fact 
85: that the selection function and false positive rates of such cluster samples are hard to quantify. 
86: 
87: 
88: To relieve some of these concerns, other approaches for identifying clusters have also been 
89: designed and implemented, such as reconstructing the full 3-D structures in complete redshift 
90: surveys \citep{Huc82, Gel83, Ram97}, detecting clusters in X-ray surveys \citep{Gio90, Edg90, 
91: Ebe98, Ros98, Rom00, Sch00, Boh01, Mul03, Boh04}, and utilizing the Sunyaev-Zeldovich effect 
92: \citep{Car00, Moh02, Pierpa05} and weak gravitational lensing \citep{Sch96, Wit01} in search for 
93: clusters.  Moreover, the realization of large and deep galaxy surveys in recent years has 
94: revived optical cluster-finding endeavors and prompted the development of more automated 
95: and rigorous algorithms to select clusters from imaging surveys. Using multi-color photometric 
96: data from which photometric redshifts can be estimated, it is now possible to mitigate the 
97: problems of projection effects, and quantitative analysis of the selection bias is also now 
98: possible. Automated peak-finding techniques in optical cluster searches were attempted by 
99: \citet{She85} and later used in the Edinburgh/Durham survey \citep[ED][]{Lum92} as well as the 
100: Automatic Plate Measurement Facility survey \citep[APM][]{Dal94, Dal97}. In the construction 
101: of the cluster catalog from the Palomar Distant Cluster Survey \citep{Pos96}, a matched filter 
102: algorithm was developed to select clusters from a photometric galaxy sample. It was widely used 
103: in subsequent surveys and several variants have been put forward \citep{Kaw98, Sch98, Kep99, Kim02, Whi02}. 
104: Meanwhile with the knowledge of the existence of the ``E/S0 ridgeline'' of cluster galaxies in 
105: color-magnitude space and the aid of multi-color CCD photometry, several color-based cluster-finding 
106: techniques were also investigated \citep{Gla00, Got02, Gla05, Mil05}. Some of these have already 
107: been successfully applied to select clusters from the Sloan Digital Sky Survey (SDSS) data 
108: \citep{Got02, Ann02, Bah03, Mil05, Koe07}.
109: 
110: 
111: The Sloan Digital Sky Survey \citep{Yor00} is a five-band CCD imaging survey of about 10$^4$ deg$^{2}$ 
112: in the high latitude North Galactic Cap and a smaller deeper region in the South, followed by 
113: an extensive multi-fiber spectroscopic survey. The imaging survey is carried out in drift-scan 
114: mode in five SDSS filters ($u$, $g$, $r$, $i$, $z$) to a limiting magnitude of $r\sim22.5$ 
115: \citep{Fuk96, Gun98, Lup01, Smi02}. The spectroscopic survey targets $\sim$10$^{6}$ galaxies 
116: to  $r\sim17.7$, with a median redshift of $z\sim0.1$ \citep{Str02}, and a smaller deeper sample 
117: of $\sim$10$^{5}$ Luminous Red Galaxies out to $z\sim0.5$ \citep{Eis01}. In this paper we discuss 
118: a modified adaptive matched filter technique incorporating several new features over previous 
119: algorithms and designed to detect clusters using both the SDSS imaging and spectroscopic data; 
120: it could readily be adapted to other similar multi-band, large-area galaxy surveys for construction 
121: of optically-selected cluster samples. It is the first of a series of papers that will explore 
122: the application of the technique to select clusters from the Sloan Digital Sky Survey.
123: 
124: 
125: The general idea of the matched filter method relies on the fact that clusters show on average 
126: a typical density profile, now widely assumed to be the ``NFW'' form suggested first by Navarro, 
127: Frenk and White \citep{Nav96}. Assuming that galaxies trace the dark matter, we expect galaxies 
128: within clusters to be distributed according to such profile. The algorithm selects regions in 
129: the sky where the distribution of galaxies corresponds to the projection of average cluster 
130: density profile. In addition, it is possible to specify the galaxy redshift information 
131: inside clusters, and to use prior knowledge on the galaxy luminosity function. The combination 
132: of these matched subfilters thus enables us to extract a quantitative signal corresponding to 
133: the existence of a cluster at a given location in the surveyed sky area. 
134: 
135: 
136: The modified matched filter technique presented in this paper can fully adapt to imaging surveys 
137: with spectroscopic measurements, multicolor photometric redshifts, no redshift information at all, 
138: and any combination of these within one survey. In the Sloan Digital Sky Survey where photometric 
139: redshifts can be estimated with well-understood error distributions from the five-band 
140: ($u$,$g$,$r$,$i$,$z$) multi-color photometry, the matched filter technique described here utilizes 
141: not only the spectroscopic coverage for the bright main sample galaxies and Luminous Red Galaxies (LRGs) 
142: but also the photometric redshift information for most of the galaxies detected in the imaging 
143: survey. This greatly expands the input galaxy sample to feed into the cluster-finding algorithm 
144: compared to pure spectroscopic methods \citep[e.g.][]{Mil05}. The obtained composite cluster catalog 
145: can also go much deeper in redshift ($z\sim0.4-0.5$ in this case) than the typical $z\sim0.2$ limit 
146: for spectroscopic samples due to the lack of availability of spectroscopic measurements for faint, 
147: deep galaxies.  
148: 
149: 
150: Since the matched filter technique does not explicitly use the information about the red sequence to 
151: select clusters as is done in some color-based cluster-finding methods \citep{Ann02, Mil05, Koe07}, 
152: it can theoretically detect clusters of any type in color, and is not restricted only to old, red 
153: E/S0 galaxies. Such clusters likely dominate the cluster population, but may not constitute all 
154: of it especially as one probes systems of lower richness and at higher redshifts. The use of both 
155: spectroscopic and photometric redshift information largely eliminates the projection effects and 
156: removes most of the phantom clusters. The matched filter also generates accurate quantitative estimates 
157: of derived cluster properties, such as redshift, scale, richness, and concentration, and produces 
158: quantitative detection likelihoods, indicative of the combined information for both red and blue 
159: galaxies identified as cluster members. These facilitate further studies of detected systems and 
160: makes easier the comparison to clusters selected by other methods. One major concern for the matched 
161: filter technique is the fact that determination of these parameters depends on the specific cluster 
162: model we put in to build the relevant filters. However, these effects can be minimized by careful 
163: assumptions about the shape and evolution of luminosity function, and by the fact that our density 
164: filter is self-adaptive to different cluster scales and concentration. The clusters selected by 
165: the algorithm will provide us the necessary sample on which we then apply an iterative procedure 
166: aimed at refining the constraints on clusters' properties. More details will be discussed in 
167: section \S\ref{sec:Algorithm} and subsequent work following this paper. 
168: 
169: 
170: The new algorithm presented here differs from previous matched filter implementations 
171: \citep{Kep99, Kim02} in several ways. We use a uniform Poisson likelihood analysis, which is only 
172: the second step in the approach by \citet{Kep99} following a first pass using Gaussian statistics for 
173: pre-selection of clusters. This avoids the common problem for high-redshift clusters of having 
174: too few galaxies in any cell of interest for Gaussian statistics to apply, and the adopted approach 
175: yields correct likelihoods even at the detection stage. In addition, both the core radius and 
176: virial radius of the matched filter are adaptive over the typical observed dynamical range for clusters,
177: in contrast to most previous cluster-finding techniques that set the cluster core radius or search 
178: radius to be fixed. For each individual cluster, a best-fit core radius is found to maximize the 
179: likelihood match, as well as an outer radius inside which the galaxy overdensity reaches $\Delta$=200. 
180: The cluster richness is then normalized to be the light contained within this virial radius, which we 
181: find correlates better with the mass of gravitational systems whose extent is defined by density 
182: contrast as is widely adopted in theoretical studies. The new features of our modified algorithm 
183: will be further discussed in \S\ref{sec:Algorithm}.
184: 
185: 
186: In order to understand the biases and the selection functions of our algorithm, we test it on 
187: a mock SDSS catalog which has been constructed from the Hubble Volume Simulation \citep{Evr02} 
188: by assigning luminosities and colors to the dark matter particles in a manner which reproduces 
189: many characteristics of the galaxy population from SDSS observations. The ``observations'' of 
190: the simulations have then been further modified so that the redshift scatter of those galaxies 
191: which have photometric but no spectroscopic redshifts correspond to that of the photometric redshift 
192: errors in actual SDSS data. The comparison of the detected cluster sample with halos in 
193: the simulation provides the only rigorous way to assess how the observed cluster properties 
194: relate to the real masses, and how the cluster sample can be used to derive cosmological 
195: constraints.
196: 
197: 
198: In section \S\ref{sec:Algorithm} we describe the modified adaptive matched filter technique 
199: and how it is used to extract the cluster sample. Section \S\ref{sec:Simulation} presents 
200: the basic features of the simulated catalog we adopted for the testing purpose. In section 
201: \S\ref{sec:Results} we show results on the completeness and purity of our cluster sample, and 
202: the expected scaling relations inferred from runs on the simulations. We conclude in section 
203: \S\ref{sec:Conclusion}. 
204: 
205: 
206: A flat $\Lambda$CDM model with $\Omega_m=0.3$ and $\Omega_{\Lambda}=0.7$ is used throughout 
207: this work, and we assume a Hubble constant of $H_0=100 h$ km s$^{-1}$ Mpc$^{-1}$ if not 
208: specified otherwise. 
209: 
210: 
211: 
212: 
213: \section{The Cluster-Finding Algorithm} \label{sec:Algorithm}
214: 
215: 
216: The matched filter technique introduced here is a likelihood method which identifies clusters 
217: by convolving the optical galaxy survey with a set of filters based on a modeling of the cluster 
218: and field galaxy distributions. A cluster radial surface density profile, a galaxy luminosity 
219: function, and redshift information (when available) are used to construct filters in position, 
220: magnitude, and redshift space, from which a cluster likelihood map is generated. The peaks in 
221: the map thus correspond to candidate cluster centers where the matches between the survey data 
222: and the cluster filters are optimized. The algorithm automatically provides the probability for 
223: the detection, best-fit estimates of cluster properties including redshift, radius and richness, 
224: as well as membership assessment for each galaxy. The modified algorithm can be fully adaptive 
225: to current and future galaxy surveys in 2-D (imaging), 2$\half$-D (where multi-color photometric 
226: redshifts and their errors can be estimated), and 3-D (with full spectroscopic redshift measurements). 
227: Usage of the apparent magnitudes and, where applicable, the redshift estimates instead of simply 
228: searching for projected galaxy overdensities effectively suppresses the foreground-background 
229: contamination, and the technique has proven to be an efficient way of selecting clusters of 
230: galaxies from large multi-band optical surveys. 
231:  
232: 
233: In what follows, we first provide a general introduction on how the likelihood function is 
234: constructed and how we detect clusters with the matched filter method. This gives us an overview 
235: about how the cluster catalog is derived. Then we discuss in more detail the density models and 
236: subfilters used to construct the likelihood. More specifically, we assume an NFW density profile, 
237: a general Schechter luminosity function and a Gaussian model for BCGs to model clusters, and use 
238: the spectroscopic measurements and obtained error distributions of galaxy photometric redshifts 
239: from the Sloan Digital Sky Survey to incorporate redshift uncertainties. In the end we describe how 
240: to determine the set of best-fit parameters on cluster properties that maximize the likelihood at 
241: a given position over a range of redshift, scale, concentration, and richness.
242: 
243: 
244: 
245: \subsection{Likelihood Function} \label{sec:Likelihood}
246: 
247: 
248: The likelihood function used here is based on the assumption that the probability of finding galaxies 
249: in an infinitesimal bin in angular position, apparent magnitude and redshift space is given by a Poisson 
250: distribution. Under this assumption, the total likelihood of many of such bins, which we take to be 
251: centered in the location of the galaxies in the survey, is (see appendix C2 in \citet{Kep99} for a full 
252: derivation):
253: 
254: \begin{equation}
255: ln {\cal L} = -N_f -\sum_{k=1}^{N_c}N_k +\sum_{i=1}^{N_g}\ln[P(i)], 
256: \end{equation}
257: 
258: where $N_f$ is the total number of field galaxies expected within the searching area, $N_g$ is the total 
259: number of galaxies and $\sum_{k=1}^{N_c}N_k$ is normalized to be the number of galaxies 
260: brighter than $L^*$ as members of the $N_c$ clusters assumed in the model. $P(i)$ represents the 
261: predicted probability density of galaxies in a given bin, which includes both probabilities 
262: of field galaxies ($P_f$) and of cluster members ($P_c$),
263: 
264: \begin{equation}
265:   P(i) = P_f(i) + \sum_{k=1}^{N_c} P_{c}(i,k).
266: \end{equation}
267: 
268: These probabilities are the expected number densities for a given location and magnitude.
269: 
270: 
271: The cluster catalog is constructed with an iterative procedure similar to the one used in \citet{Koc03}.
272: We start our process from a density model of a smooth background with no clusters. For each galaxy 
273: position, we then evaluate the likelihood increment we would obtain by assuming that there is in fact 
274: a cluster centered on that galaxy. The likelihood is then optimized by varying the cluster galaxy 
275: number $N_k$, the redshift and cluster scale length. At each iteration, we retain the cluster candidate 
276: which resulted in the greatest likelihood increase. We incorporate it in our density model and restart 
277: the procedure. The function for finding the $k^{th}$ cluster in the whole surveyed area therefore is 
278: 
279: \begin{equation}
280: \begin{array}{ll}
281:   \Delta\ln{\cal L}(k) = -N_k + \sum_{i=1}^{N_g}\ln[{P_f(i)+\sum_{j=1}^{k}P_c(i,j) \over 
282:       P_f(i)+\sum_{j=1}^{k-1}P_c(i,j)}].
283: \end{array}
284: \end{equation}
285: 
286: 
287: A list of cluster candidates then becomes available in decreasing order of detection likelihoods.
288: For each candidate one has derived properties, including best-fit position, scale, richness, and 
289: estimated redshift. The initial cluster catalog allows us to further inspect each individual candidate 
290: for exploration of substructure and better constraints on previously fitted quantities.   
291: 
292: 
293: 
294: \subsection{Density Model} \label{sec:Density}
295: 
296: 
297: As both field and cluster galaxies are found in the survey, the probability of finding a galaxy in 
298: a given bin depends on the density of both these populations (see eq.(2)).
299: 
300: 
301: For galaxy $i$ with angular position $\vec{\theta}_i$, $r$-band apparent magnitude $m^r_i$ and 
302: redshift $z_i$ (when available), the background number density $P_f(i)$ can be directly extracted 
303: from the global number counts of the galaxy survey,  
304: 
305: \begin{equation}
306:   P_f(i) = { d N \over dm~ dz }(m^r_i, z_i),  
307: \end{equation}
308: 
309: and it has to be modified to account for the effects of galaxy redshift uncertainties if photometric 
310: redshift estimates are used. 
311: 
312: 
313: For cluster $k$ located at $\vec{\theta}_k$ with proper scale length $r_{ck}$, redshift $z_k$ and 
314: galaxy number $N_k$, the probability of galaxy $i$ being a member of it, $P_c(i,k)$, is just the 
315: product of a surface density profile $\Sigma_c$ and a luminosity function $\phi_c$ at the cluster's 
316: redshift, times a distribution function $f(z_i- z_k)$ that expresses redshift uncertainties: 
317: 
318: \begin{equation}
319:   P_c(i,k) = N_{k}\ \Sigma_c\left[D_A(z_k)\theta_{ik}\right]\ \phi_c\left[m^r_i-{\cal D}(z_k);
320:     \right]\ f(z_i-z_k),
321: \end{equation}
322: 
323: where ${\cal D}(z_k)$ is defined through 
324: 
325: \begin{equation}
326:   M^r_i = m^r_i - 5 \log (D_L(z_k)/10\mbox{pc}) - k(z_k) = m^r_i - {\cal D}(z_k),
327: \end{equation}
328: 
329: and where $D_A(z_k)$ and $D_L(z_k)$ are the angular diameter and luminosity distance at the cluster's 
330: redshift $z_k$,  and $k(z_k)$ is the $k$-correction. The conversion of units in luminosity and 
331: distance is conducted by performing proper $k$-corrections for galaxies of different spectral types 
332: and choosing the proper cosmology (see \S\ref{sec:Introduction}). 
333: 
334: 
335: 
336: \subsection{Subfilters} \label{sec:Subfilters}
337: 
338: 
339: Based on current observational studies as well as findings from dark matter halos, and for convenient 
340: comparisons to theoretical models widely used in analytical studies and N-body simulations, we assume 
341: the density profile of galaxies within a cluster follows the form of a NFW profile \citep{Nav96}, 
342: which in three dimensions is given by 
343: 
344: \begin{equation}
345:   \rho_c(r) = {1 \over 4\pi r_c^3 F(c)}{1 \over {r \over r_c}(1+{r \over r_c})^2},
346: \end{equation}
347: 
348: where $c$ is the concentration parameter and $F(c)$ is the typical normalization factor for galaxies 
349: inside the virial radius of the cluster, $r_v=cr_c$. The 3-D profile is then integrated along the 
350: line of sight to derive a projected surface density profile $\Sigma_c(r)$ which is expressible as 
351: a much more complicated analytical form (see \citealt{Bar96}). The profile is normalized so that 
352: $\int_0^{cr_c} 2\pi r \Sigma_c(r) dr = 1$.
353: 
354: 
355: The search radius for galaxies belonging to the cluster is set to be the virial radius of the 
356: cluster, or more specifically here, the radius inside which the mass overdensity is 200 times 
357: the critical density, i.e., 200$\Omega_M^{-1}$ times the average background \citep{Evr02}.
358: Since it is hard to directly measure the cluster mass overdensity in observations, we instead 
359: determine the virial radius inside which the space density of cluster galaxies is 200$\Omega_M^{-1}$ 
360: times the mean field, assuming that the galaxy distribution in a halo traces the overall dark 
361: matter distribution (see discussions in \citealt{Han05}), which has been suggested by recent 
362: observations and simulations \citep{Lin04a, Nag05, Lin07a}, and is supported by weak lensing 
363: measurements \citep{She04}. For simplicity, we use $r_{200}$ throughout this work to denote 
364: the cluster virial radius determined by galaxy overdensities. The cluster richness is then 
365: defined to be the total luminosity in units of $L^*$ inside $r_{200}$.  
366: 
367: 
368: As has been discussed before in matched-filter studies \citep{Pos96, Kim02} and also shown by 
369: our own numerical experiments, the efficiency of the filter is usually much more sensitive to 
370: the overall filter cutoff radius than to the details of its shape. Therefore the determination 
371: of appropriate values for the scale length in the cluster model is of particular importance, 
372: as it may have significant impact on the detection efficiency of the cluster-finding algorithm. 
373: Most of the previous matched filter methods have used a carefully chosen fixed value for the 
374: model cluster cutoff radii, and they compute the galaxy number or the richness of clusters within 
375: such a fixed radius in physical units. \citet{Pos96} concludes that a fixed search radius of 
376: 1 Mpc $h^{-1}$ is a near-optimal choice in their radial filter, and this value has been also 
377: adopted by \citet{Kep99, Kim02} in their method which assumes a modified Plummer law model 
378: for the surface density profile. In \citet{Whi02} and \citet{Koc03}, the authors set a fixed 
379: core radius of $r_c=200$ kpc $h^{-1}$ and concentration parameter of $c=4$ for the NFW profile 
380: in the cluster detection and mass estimates. Although we find from observations and simulations 
381: that these choices are reasonable values for typical rich clusters, a single fixed scale length 
382: for all clusters over a wide range of masses and concentrations will certainly degrade the 
383: signal-to-noise ratio, bias detection probabilities, and be responsible for at least part of 
384: the large scatter observed in previous cluster mass-richness scaling relations. In our modified 
385: adaptive matched filter algorithm, we optimize the core radius for each individual cluster over 
386: the dynamical range for typical galaxy clusters. For the core radius value that maximizes the 
387: likelihood, we then compute the normalized cluster richness according to the NFW profile with 
388: best-fit parameters within a cluster virial radius $r_{200}$ determined from galaxy overdensities. 
389: We believe this procedure is more similar to and comparable with the virial mass defined by 
390: density contrast in most theoretical studies and analyses of simulations.
391: 
392: 
393: For the magnitude filter, we adopt a luminosity profile described by a central galaxy plus
394: a standard Schechter luminosity function \citep{Sch76}
395: 
396: \begin{equation}
397: \phi(M) = {dn \over dM} = 0.4 \ln10\ n^* \left({L \over L^*} \right)^{1+\alpha} \exp(-L/L^*);
398: \end{equation}
399: 
400: the integrated luminosity function is 
401: 
402: \begin{equation}
403: \Phi(M) = \int_{-\infty}^M \phi(M) dM = n^* \Gamma[1+\alpha, L/L^*].
404: \end{equation}
405: 
406: 
407: Parameters for the global luminosity function are obtained from the SDSS spectroscopic sample at 
408: the redshift of $z=0.1$ \citep{Bla03}. To account for the evolutionary effects at higher redshifts, 
409: we allow a passive evolution of $L^*$ which brightens about 0.8 magnitudes from $z=0$ to $z=0.5$ 
410: \citep{Lov92, Lil95b, Nag01, Bla03, Lov04, Bal05, Ilb05}. We assume that $L^*$ does not vary as a 
411: function of cluster richness, which is supported by the results of \citet{Han05}. Because the 
412: matched filter algorithm uses both a cluster galaxy luminosity function and a field galaxy 
413: luminosity function, which are expected to be different due to the morphology-density relation 
414: \citep{Dre80} and the observed dependence of luminosity function on galaxy over-densities 
415: \citep{Chr00, Mo04, Cro05}, it would be desirable to model these separately. It would also be 
416: desirable to further model the luminosity distributions according to galaxy spectral types 
417: \citep{Fol99, Lin99, Hog03}. At this stage, however, only a single function is adopted since 
418: the work on precise luminosity functions for cluster galaxies of different types has just been 
419: started. We hope to investigate this further on the basis of the first catalog we produce. 
420: Once a cluster catalog is available for galaxies in all redshift ranges, we can go back and 
421: examine the impact of our assumptions about the galaxy luminosity functions as well as their 
422: evolution for different environments and spectral types. In order to use the same range in 
423: the luminosity function at all distances and therefore avoid bias associated with errors in the 
424: assumed form of the luminosity function, we cut off the luminosity function at one magnitude 
425: below $L^*$.  We can still calculate total luminosities by integrating the assumed form, and 
426: we use this in our richness calculation, described below.
427: 
428: 
429: The existence of Brightest Cluster Galaxies (BCGs) near the cluster centers is incorporated into 
430: our cluster galaxy luminosity model as a separate component from the main Schechter function for 
431: satellites, as this distinction has been clearly seen in clusters over a range of richness \citep{RT77,Han05}. 
432: We assume a Gaussian distribution for the luminosities of these objects and adopt the results 
433: from \citet{Lin04b} for correlations between the BCG luminosity and host cluster properties. 
434: More specifically, the BCG luminosity is assumed to follow a single power law with the cluster 
435: richness, $L_{BCG} \sim \Lambda_{200}^{1\over4}$, and we take the width of the Gaussian to be 
436: $\sim0.5$ mag \citep{Lin04b, Zhe05, Han05}. The luminosity of BCGs is assumed to evolve in the 
437: same way as $L^*$ does, {\it i.e.~} the luminosity at the mean of the gaussian has a constant 
438: ratio to $L^*$. This is almost certainly incorrect in detail, but will be explored in follow-up 
439: work once the catalog is constructed. This modification of the 
440: general Schechter function enhances the detectability of typical clusters with BCGs, especially 
441: those at higher redshifts with only few galaxies other than the BCG to be included in the 
442: apparent magnitude-limited galaxy sample.
443: 
444: 
445: 
446: Thanks to the accurate five-band ($u$,$g$,$r$,$i$,$z$) multi-color photometry in the SDSS 
447: \citep{Yor00}, as well as the associated redshift survey for the bright main sample galaxies 
448: \citep{Str02} and Luminous Red Galaxies (LRGs, \citealt{Eis01}), it is now also possible to retrieve 
449: redshift information for most of the galaxies that we are going to use in construction of the 
450: SDSS cluster catalog, either photometrically or spectroscopically. For real SDSS data 
451: currently available from DR5, we find that galaxies with valid photometric redshift estimates make 
452: up more than $96\%$ of the whole sample in the imaging data, within which about $1\%$, mostly 
453: bright, red galaxies, have matched spectroscopic measurements from redshift surveys. 
454: Not surprisingly, the inclusion of galaxy redshift estimates greatly improves the accuracy of 
455: the cluster redshift determinations and significantly mitigates projection effects, thus allowing 
456: the detection of much poorer systems than possible in previous work with no redshift measurements.
457: 
458: 
459: The uncertainties of galaxy redshifts are assumed to follow Gaussian distributions in the 2$\half$-D 
460: and 3-D cases, where in terms of the $f(z)$ function in equation (5) we have     
461: 
462: \begin{equation}
463:   f(z_k) = {\exp\left[-(z_i-z_k)^2/2\sigma_{z_i}^2\right] \over \sqrt{2\pi}\sigma_{z_i}}.   
464: \end{equation}
465: 
466: 
467: For galaxies with computed photometric redshifts (described below), we add to the cluster galaxy 
468: density model a third subfilter based on the distribution of derived redshift uncertainties in the 
469: form of a combination of multiple Gaussian modes. These error estimates are obtained by calibrating 
470: photometric redshifts with the real redshifts in the SDSS spectroscopic galaxy sample and redshifts 
471: for other fainter (but smaller) overlapping surveys. The analysis is done for red and blue galaxies 
472: separately using the color separator by \citet{Strv01}, and it is found that a model using Gaussian 
473: modes with proper weights assigned generally provides a good description of the bias and scatter 
474: in the photometric redshifts for galaxies of both spectral types and in different apparent magnitude 
475: bins. Some of the results are shown in \S\ref{sec:Simulation}. 
476: 
477: 
478: In the 3-D case where spectroscopic redshifts of galaxies are measured, we smooth them in 
479: Gaussians with assigned cluster velocity dispersions that vary in the range from $400$ km s$^{-1}$ 
480: (proper) for poorer clusters to $1200$ km s$^{-1}$ (proper) for the richest systems in the 
481: selected cluster sample, according to several discrete estimated richness classes. The same 
482: procedure as outlined in the previous paragraph for photometric redshifts is applied to include 
483: this redshift filter in the galaxy density model. 
484: 
485: 
486: In addition, there are galaxies we find that either have invalid photometric redshifts computed 
487: or fall into the redshift and magnitude range where no good calibrations are available. Such 
488: galaxies, which are currently about $3\%$-$5\%$ of the whole sample, are assumed to have no redshift 
489: estimates and therefore no constraining filter. Hence we set up for each galaxy the appropriate 
490: scenario that adapts the matched filter algorithm to galaxy redshift estimates with varied accuracy. 
491: 
492: 
493: Finally, of course, we fit an overall amplitude, which represents the cluster richness. Since
494: its size, shape and redshift are all determined at this point, we can express the amplitude
495: however we like in physical terms. We have chosen to use the total luminosity within $r_{200}$
496: expressed as a multiple of $L^*$ (evolved to the relevant redshift using 1.6 mags of luminosity
497: evolution per unit redshift), which we denote as $\Lambda_{200}$.
498: 
499: 
500: 
501: \subsection{Implementation} \label{sec:Implementation}
502: 
503: 
504: Implementation of the matched filter algorithm starts with reading the galaxy catalog. For each 
505: galaxy $i$ in the sample, we read in the positions $\alpha_i$, $\delta_i$, the extinction-corrected 
506: five-band apparent magnitudes and their errors, and the redshift $z_i$ if it has a matched spectrum. 
507: Using the flux and color information, we compute a photometric redshift estimate using a neural 
508: network technique by \cite{Lin07} as well as $k$-corrections and estimated rest-frame colors for 
509: each galaxy, which we add as input to the cluster-finding algorithm. 
510: 
511: 
512: The next step is to define the cluster model we adopt for the filters, including the surface 
513: density profile $\Sigma_c(r)$, the luminosity function $\phi(M)$, and the assumed Gaussian 
514: modes of photometric redshift uncertainties. The field density model $P_f(m,z)$ is constructed 
515: from global number counts of the surveyed background galaxy distributions as a function of 
516: magnitude and redshift, as shown in equation (4). We then incorporate these models into the Poisson 
517: likelihood functions as discussed above. 
518: 
519: 
520: To map the likelihood distributions of the surveyed area, we grid the sky using the Healpix 
521: package of \cite{Gor05} which provides a useful hierarchical pixelization scheme of equal-area 
522: pixels. In \citet{Kep99}, the authors choose galaxy positions on an adaptive grid in calculating the 
523: likelihoods instead of the uniform grids used in the previous matched filter codes \citep{Pos96}, so 
524: that sufficient resolution in the high density regions is ensured while saving computational 
525: time and memory for less dense regions. We follow this procedure and evaluate the likelihood 
526: functions at every galaxy position to locate the peaks in the map as possible cluster centers. 
527: The cluster richness is optimized over the whole redshift range of our search at intervals that 
528: finally adapt to $\delta z=0.001$, and for a set of trial cluster scale radii ($r_c$) at $10$ 
529: kpc $h^{-1}$ steps. The derived quantities for best fit cluster richness, redshift and scale 
530: length thus correspond to the parameters that maximize the likelihood function at the grid position 
531: or candidate cluster center. 
532: 
533: 
534: This algorithm possesses several new features. First, the cluster algorithm is fully adaptive to 
535: 2-D, 2$\half$-D and 3-D case in the optical surveys, and can deal with data with these different 
536: attributes simultaneously. It can easily accommodate the galaxy redshifts with uncertainties in 
537: any forms and distributions, from purely single-band imaging data to a complete spectroscopic 
538: redshift survey, and works well for the intermediate case where photometric redshifts are estimated 
539: from multi-band color information. Projection effects from foreground--background contamination, 
540: which have been a long-standing problem for optically-selected clusters, are largely suppressed. 
541: This allows the detection of even poorer systems at high redshift, and shows great potential for 
542: current and future large, deeper surveys in the optical band. Second, the current adaptive matched 
543: filter used a single Poisson statistics in the likelihood analysis, compared to the two-step 
544: approach in \citet{Kep99}, which uses a ``coarse'' filter based on Gaussian likelihood for 
545: pre-selection of clusters. We write our code in Fortran-90 and by careful arrangement in computations 
546: and setting up the quick link search, the optimization of the Poisson likelihood through the 
547: whole process is now affordable in the sense of execution time and memory. For a survey field of 
548: $\sim300$ deg$^2$, which is comparable to a typical SDSS stripe \citep{Yor00}, the modified adaptive 
549: matched filter algorithm requires around 900 megabytes of memory and takes about 30 hours 
550: for a single run using one dual-processor node in a Linux Beowulf cluster with 3.06 GHz clock speed each. 
551: With no assumption necessary about sufficiently many galaxies inside each virtual bin as is necessary 
552: in the Gaussian case, the Poisson 
553: statistics remains robust in the common situation where there are too few galaxies in each 
554: cell for Gaussian statistics to apply. Third, as discussed in 
555: \citet{Whi02} and \citet{Koc03}, the current density model explicitly includes the effect of 
556: previously found clusters on the global likelihood function. The procedure automatically separates 
557: overlapping clusters and avoids multiple detections of the same system in the overdensity regions, 
558: somewhat similar to the CLEAN method used in radio astronomy to produce maps \citep{Hog74, Sch78}. 
559: We do not need to do extra cluster de-blending work afterwards. Finally, as discussed earlier, our approach to 
560: maximizing the likelihood differs from most previous cluster-finding techniques that choose a fixed 
561: cluster scale or search radius. We optimize the core radius for each individual cluster, and the 
562: cluster richness is computed within a virial radius which is determined from galaxy overdensities. 
563: This provides insights about the virial mass of such gravitational systems defined by density 
564: contrast and better corresponds to what is done in theoretical treatments.
565: 
566: 
567: \section{Tests on Mock Galaxy Catalogs}\label{sec:Simulation}
568: 
569: 
570: To evaluate the completeness and purity (false positive rate) of our
571: cluster sample, as well as to assess the how well our measured cluster
572: properties correspond to the properties of the underlying dark matter
573: halos, we have run the matched-filter algorithm on a mock galaxy catalog
574: generated from a realistic cosmological N-body simulation. Because of
575: the large redshift range we are trying to probe, it is important to do
576: this with as large a simulation volume as possible. In addition,
577: because we seek here to test the behavior of our algorithm using a
578: combination of spectroscopic and photometric redshifts, it is useful
579: to have a realistic galaxy population in both clusters and the field,
580: with luminosities, colors, and the relation between these quantities
581: and environment that are a good match to SDSS data.  Here we have used a
582: mock catalog based on a method namely ADDGALS (Adding Density-Determined 
583: Galaxies to Lightcone Simulations) 
584: (\citealt{Wec04} and in preparation, 2007), 
585: which is designed to model 
586: relatively bright galaxies in large volume simulations.
587: 
588: 
589: The underlying dark matter simulation used here tracks $10^9$ particles
590: of mass $2.25\times10^{12} h^{-1} M_\odot$ in a periodic cubic volume
591: with side length of $3 h^{-1}$ Gpc, using a flat $\Lambda$CDM
592: cosmology with $\Omega_m=0.3$, $\sigma_8=0.9$, and $h=0.7$
593: \citep[the Hubble Volume simulation;][]{Evr02}. Halos are identified
594: for masses above $2.7\times10^{13} h^{-1} M_\odot$. Data are collected 
595: on the past light cone of an observer at the center of the volume.  
596: The size of the simulation enables the creation of a full-sky survey out 
597: to redshift of $z=0.58$, and is thus suited to testing our
598: cluster-finding algorithm out to high redshifts using the SDSS imaging
599: data.
600: 
601: 
602: Galaxies are connected to individual dark matter particles on this
603: simulated light-cone, subject to several empirical constraints.  The
604: resolution of the simulation allows the mock catalog to include
605: galaxies brighter than about 0.4$L^*$; the number of galaxies of a
606: given brightness placed within the simulation is determined by drawing
607: galaxies from the SDSS galaxy luminosity function
608: \citep[][]{Bla03}, with 1.6 mags of luminosity evolution
609: assumed per unit redshift (the same assumption is made by our cluster
610: finding algorithm).  The choice of which particle these galaxies are
611: assigned to is determined by relating the particle overdensities (on a
612: mass scale of $\sim 1e13 M_{\odot}$) to the two-point correlation
613: function of the particles; these particles are then chosen to
614: reproduce the luminosity-dependent correlation function as measured in
615: the SDSS by \citet[][]{Zeh04}.
616: 
617: 
618: Finally, colors are assigned to each galaxy by measuring their local
619: galaxy density (here, the fifth nearest neighbor within a redshift
620: slice), and assigning to them the colors of a real SDSS galaxy with
621: similar luminosity and local density.  The local density measure for
622: SDSS galaxies is taken from a volume-limited sample of the CMU-Pitt
623: DR4 Value Added Catalog.  This method produces mock galaxy catalogs
624: that reproduces the luminosity and color correlation function of the
625: real sky.  The created mock galaxy sample therefore provides a unique
626: tool to assess the performance of the SDSS cluster-finding algorithms
627: in terms of completeness and purity, as well as how the observables of
628: the detected clusters correspond to dark matter halos assuming galaxy
629: clusters do trace the underlying halo population in the universe.
630: 
631: 
632: Since precise spectroscopic redshift measurements are only available for the SDSS main sample 
633: galaxies \citep{Str02} and LRGs \citep{Eis01}, we must use photometric redshift estimates for 
634: most of the galaxies. In order to accurately reproduce this scenario in the simulations, we 
635: scatter the given redshifts of mock galaxies according to the error distributions of photometric 
636: redshift estimates, which are obtained by calibrating a sample of $\sim$140,000 SDSS photometric 
637: redshifts to their known corresponding spectroscopic measurements coming from the SDSS spectroscopic 
638: survey and various other sources such as CNOC2 \citep{Yee00}, CFRS \citep{Lil95a}, DEEP \citep{Wei05}, 
639: and 2SLAQ LRG \citep{Pad05}. The photometric redshifts were computed using a neural network 
640: technique by \cite{Lin07} and in preparation; see also the short discussion in the SDSS DR5 
641: data release paper, \cite{AMc07}.
642: The comparison between calculated photometric redshifts and measured spectroscopic redshifts 
643: is shown in Figure 1 for both the red and blue galaxy samples. The distributions of sampled 
644: redshift uncertainties are derived for different magnitude and redshift bins, and found to 
645: be well described by a combination of multiple Gaussian fits as shown in Figure 2 for examples.
646: The resulted fitting parameters are used for the scattering of mock galaxy redshifts in 
647: the simulation. In the case of applying the cluster-finding technique to the real SDSS data, however, 
648: instead of deriving ``empirical'' error estimates collectively, we would use the photo-z errors 
649: that are computed based on the Nearest Neighbor Error estimate method (NNE) \citep{Lin07}, 
650: which makes it possible to get an estimate of the error for each individual object. This would 
651: better constrain the photometric redshift uncertainty, especially for galaxy samples with 
652: photo-z errors depending strongly on magnitudes and the actual redshifts. We find the 
653: computed errors correspond reasonably well with the empirical ones derived from statistics, 
654: with exceptions only for the catastrophic objects. More details would be discussed in a subsequent 
655: paper on the application of the modified adaptive matched-filter technique with SDSS data. 
656: 
657: 
658: To summarize, the implementation of simulating the observed galaxy redshifts in the mock sample 
659: proceeds as follows: for galaxies that satisfy the SDSS spectroscopic target selection criteria 
660: we take the given galaxy redshifts as spectroscopic measurements, while for the rest of the sample 
661: we use the scattered redshifts to mimic the photometric redshift estimates. As discussed above 
662: in \S\ref{sec:Algorithm}, there are a few percent of such galaxies that fall into the redshift 
663: and magnitude ranges where we find no good calibrations are available. For these galaxies we 
664: just treat them as if there is no redshift information at all to put into the algorithm. We also 
665: impose to the mock galaxy catalog an apparent magnitude cut ($r<21$) as we intend to adopt in 
666: the SDSS imaging sample. The procedure described above thus provides the a mock catalog 
667: with the most similar characteristics to the SDSS survey and it will allow us to explore the 
668: performance of the cluster-finding algorithm on real SDSS data.    
669: 
670: 
671: 
672: The modified matched filter algorithm is then run on the mock galaxy catalog, and the 
673: detected clusters are compared with matched known halos given in the simulation. We find that 
674: the matches are generally robust against details of the matching techniques, as pointed out 
675: by \citet[][although see also the discussion of various matching algorithms in \citealt{Roz07}]{Mil05}. 
676: Here we adopt a matching criterion of projected separation between the detection and the candidate halo 
677: within the virial radius $r_{200}$ and redshift difference $\Delta z<0.05$. 
678: To evaluate completeness of the cluster sample, we match each dark halo to the nearest detected cluster 
679: within the projected cluster $r_{200}$ and $\Delta z$ of 0.05, while in measurement of purity, we 
680: match clusters to their corresponding halos applying the same criteria. In the case of multiple matches 
681: which are possible for above matching algorithms, we simply assign the most massive halo within the searching 
682: space as the real match. Other methods have also been tried in efforts to refine the matching process, 
683: but no significant changes are found in the final results.
684: 
685: 
686: 
687: 
688: \section{Results and Discussions}\label{sec:Results}
689: 
690: 
691: In this section we present the results of running the modified adaptive matched-filter algorithm on 
692: the simulation-based mock catalogs. These include the completeness and purity check of the detected 
693: cluster sample, the derived cluster properties such as estimated redshift and richness, and the expected 
694: scaling relations that would link the observed clusters to true halo distributions. 
695: 
696: 
697: \subsection{Completeness and Purity Check}\label{sec:Completeness}
698: 
699: 
700: We define the completeness $C$ of the selected cluster sample as a cumulative function of $M_{200}$, 
701: the mass within the virial radius inside which the overdensity is 200 times the critical density:
702: 
703: \begin{equation}
704:   C(M_{200}) = {N_{found} \over 
705:     N_{total} }
706: \end{equation}
707: 
708: where $N_{found}$ is the number of halos with mass greater than $M_{200}$ matched to clusters and 
709: $N_{total} $ is the total number of halos above that mass.
710: 
711: 
712: Figure 3 shows the completeness of the detected cluster sample as a function of redshift and the 
713: virial mass of matched dark matter halos, respectively. The cluster sample, which has a richness 
714: cut at $\Lambda_{200} > 20$, is over $95\%$ complete for objects with $M_{200} > 2.0\times10^{14} h^{-1} M_\odot$ 
715: and $\sim85\%$ complete for objects with masses above $1.0\times10^{14} h^{-1} M_\odot$ in the 
716: redshift range of $0.05<z<0.45$. As we will find in the subsequent discussion of cluster scaling 
717: relations, the richness cut we impose on the cluster sample contributes to some of the incompleteness 
718: for less massive objects because of the large scatter in the cluster 
719: richness-mass relation; many of the matched clusters at $\sim 1.0\times10^{14} h^{-1} M_\odot$ 
720: are simply scattered below the richness cut and thus not counted to compute the completeness. This 
721: can be for sure relieved by lowering the richness cut of the cluster sample, although we choose 
722: to stick to this cut for the purity considerations below. 
723: 
724: 
725: Also from Figure 3a, the completeness level of the cluster sample remains almost flat out to 
726: $z\sim0.45$, beyond which it suffers a significant decline. This is at least partly due to the 
727: volume limit of the mock catalog which only extends to $z=0.58$. When we scatter 
728: the given galaxy redshifts with photometric redshift errors, which become large around $z\sim0.5$, 
729: many of the galaxies near the far edge of the light cone are scattered away while fewer galaxies 
730: would be shifted into that range, since they are absent from the simulation. The apparent magnitude 
731: cut we have applied to the mock galaxy sample may also contribute to incompleteness at high redshift. 
732: Taking into consideration the necessary $k$-corrections, the galaxy sample is no longer complete 
733: down to the luminosity of $0.4 L^*$, which is the limit assumed throughout the simulation tests. 
734: The matched filter therefore loses some power in detecting less rich systems at redshifts of $z\sim0.5$ 
735: and beyond since many fewer galaxies would be bright enough to be observable at that distance in 
736: the current survey. We have not investigated these effects in detail, though the onset of clear 
737: incompleteness corresponds well to the distance at which they become important. 
738:  
739: 
740: 
741: We similarly define the purity P of the selected cluster sample as a cumulative function of 
742: cluster richness $\Lambda_{200}$ which is the total cluster luminosity in units of $L^*$ inside 
743: its virial radius $r_{200}$
744: 
745: \begin{equation}
746:   P(\Lambda_{200}) = { N_{match} \over 
747:     N_{tot,\Lambda}},
748: \end{equation}
749: 
750: where $ N_{match} $ is the number of clusters with richness greater than $\Lambda_{200}$ matched 
751: to halos and $N_{tot,\Lambda}$ is the total number of clusters with richness above $\Lambda_{200}$.
752: 
753: 
754: The results of the purity check for the obtained cluster catalog are shown in Figure 4. The sample 
755: is over $95\%$ pure for clusters with $\Lambda_{200} > 30$ and around $90\%$ pure for clusters 
756: with $\Lambda_{200} > 20$ over the whole redshift range out to $z\sim0.45$. As will be shown 
757: in the richness-mass relationship below, these two thresholds in richness correspond 
758: to $M_{200} \sim 6.0\times10^{13} h^{-1} M_\odot$ and $M_{200} \sim 4.0\times10^{13} h^{-1} 
759: M_\odot$, respectively. It is worth to be noted that the lower purity for $\Lambda_{200} > 20$ 
760: is clearly going to be affected by halo incompleteness in the simulation, since some of the 
761: matched halos for this richness will fall below the mass resolution of the halo catalog, which 
762: means the purity we have derived above is in fact probably a lower limit, in similar logic to 
763: the completeness arguments.
764: 
765: 
766: To ensure a reasonably high purity of selected clusters, we therefore apply a 
767: $\Lambda_{200} > 20$ cut for the cluster catalog, which is used for analysis of completeness 
768: as well as cluster derived properties and scaling relations. The purity measurement 
769: shows a slight but notable uptrend in the last redshift bin of $z\sim0.45-0.5$, which could 
770: be similarly explained by the arguments above in the completeness discussions. This reflects 
771: a shift in the richness-mass scaling relation at high redshift end where clusters with the 
772: same richness measurements may correspond to actually richer and more massive systems because 
773: of the under-representation of galaxies that are observable in that redshift range. It is 
774: therefore wise to limit the current cluster catalog to a redshift of $z=0.45$ in order to 
775: extract a uniform sample for statistical use, though the catalog using real SDSS data may well 
776: go deeper reliably.
777:  
778: 
779: 
780: \subsection{Derived Cluster Properties and Scaling Relations}\label{sec:Scaling}
781: 
782: 
783: As is discussed in \S\ref{sec:Algorithm}, for each selected cluster a redshift estimate is 
784: found for the system by the matched filter that optimizes the detection likelihood at the 
785: given galaxy position as cluster center. This measurement is then taken as the estimated 
786: redshift for the cluster. Since all the halos have known redshifts in the simulation, by 
787: matching the detected clusters to halos following the procedure described in 
788: \S\ref{sec:Simulation} we can compare the derived cluster redshifts with the true 
789: redshifts of associated halos.    
790: 
791: 
792: Figure 5 illustrates the comparison between estimated cluster redshifts and known halo 
793: redshifts. For clusters with redshifts below $z=0.25$ where spectroscopic redshift 
794: measurements are often available for member galaxies, the derived cluster redshift 
795: estimates precisely reproduce the true redshifts of corresponding dark halos. 
796: The inclusion of spectroscopic information of input galaxies markedly sharpens the 
797: cluster detection likelihood in the line-of-sight dimension and thus provides accurate 
798: measurements of the cluster redshifts. In the higher redshift range where spectroscopic 
799: measurements become rare and photometric estimates dominate, the plot illustrates a larger 
800: dispersion while the matched filter still gives robust determinations of cluster redshifts 
801: even with only photometric galaxy redshift information for inputs. We find that the 
802: accuracy of the redshift estimates does incease with cluster richness as expected, which 
803: is albeit mostly accounted by higher fraction of cluster galaxy members with spectroscopic 
804: measurements inside these systems. There is a slight uptrend bias seen at the redshift 
805: of $z\sim0.45$, which we see as a similar indication of incompleteness of the input 
806: galaxy sample near the high end of the redshift range for this mock catalog because of 
807: the volume limit and magnitude cut. The estimated cluster redshift determined from maximum 
808: likelihood tends to drift towards smaller values in some cases since the detection 
809: probability at higher redshift is suppressed by such effects. We also note the existence 
810: of a few serious outliers, which probably represent the occasional scenario when there 
811: exists a mismatch between relevant clusters and dark halos due to the projection effects 
812: or false positive detections.    
813: 
814: 
815: The normalized cluster richnesses $\Lambda_{200}$ are also compared with the virial mass 
816: $M_{200}$ of matched halos. The results are shown in Figure 6. We find that the 
817: richness-mass scaling relation follows 
818: 
819: \begin{equation}
820:   \Lambda_{200} = (47.2\pm4.1)\times \left( M_{200}  \over {10^{14} h^{-1} M_\odot} \right )^{1.03\pm0.04} , 
821: \end{equation}
822: 
823: which is roughly a linear fit. Whether this is correct or not, clearly, depends upon the details
824: of the simulation input, and the way the simulation was constructed gives no easy clue to
825: what the results should be. What is important in this test, however, is that we recover
826: what is present in the simulations, not what might or might not be present in the real universe.
827: To that end, we have constructed three more plots. The first, Figure 7, compares the cluster
828: richness determined by the present algorithm with the total three dimensional luminosity of the
829: matched halos; the agreement is very good, with no bias evident at either the sparse or the
830: rich end. Given this agreement and the results of Figure 6, the next plot, Figure 8, of the 3-D halo 
831: luminosity vs the 3-D halo mass, contains no surprises. The simulated halo mass is, in fact,
832: linear with its total luminosity, and we recover this relationship. 
833: 
834: 
835: Figure 9 compares the derived cluster virial radius $r_{200}$ from the cluster-finding algorithm 
836: and the $r_{200}$ determined from 3-dimensional galaxy overdensities. The agreement is excellent 
837: at small virial radii, though there is a strong hint that the
838: algorithm slightly overestimates large virial radii, by seven percent or thereabouts. This is
839: almost certainly due to the assumption of a single NFW profile to describe the cluster; neighboring
840: halos have rather different effects in the cylinder to which the algorithm is sensitive and the
841: corresponding sphere in the simulations, but it is gratifying that the effects are this small.
842: These results further justify our choice to refer our richness measurements 
843: to the commonly-used virial radius determined from galaxy overdensities.
844: 
845: 
846: It is, however, clear that the scatter in the richness--mass relation derived from the cluster 
847: finding algorithm (Figure 6) is somewhat larger than that of the intrinsic richeness-mass relation 
848: in the simulations (Figure 8), 
849: % {\bf Elena: can we quantify this?} 
850: which can be read as an indication of complications in the 
851: cluster-halo matching process, e.g., the inevitable difference between the cluster finder 
852: and halo finder regarding fragmentation and merging, differing shapes between the galaxy 
853: and mass distributions, and, even further, the variable mass-to-light ratios inside the systems 
854: incorporated in the current dark matter simulations. Despite these intrinsic 
855: dispersions, the richness-mass scaling relation shows a strong linear correspondence 
856: between the observables and the mass, and thus makes it possible to extract 
857: the true halo distribution in the Universe from the observed cluster abundance 
858: and correlation functions. It is important to note that the simulation from which
859: the catalog was made is a dark-matter-only simulation, and thus effects which
860: may well exist in real clusters and can affect the baryon fraction in the
861: intracluster gas and galaxies (see, for example, \cite{Kra05}) as a function of
862: cluster mass are absent here, but the fact that we recover the relation found from 
863: input 3-D simulations, here just linear, indicates that we should be able to investigate
864: a possibly more complex relationship in the real universe.
865: 
866: 
867: 
868: 
869: \section{Conclusions}\label{sec:Conclusion}
870: 
871: 
872: We present a modified matched filter algorithm which is designed to construct a 
873: comprehensive cluster catalog from the Sloan Digital Sky Survey, but is applicable
874: to any deep photometric survey. The technique is fully adaptive to 2-D, 2$\half$-D 
875: and 3-D optical surveys, as well as to various cluster scales and substructures. 
876: 
877: 
878: The cluster-finding algorithm has been tested against a realistic mock SDSS catalog from 
879: a large N-body simulation. The results suggest that the selected cluster sample is 
880: $\sim 85\%$ complete and over $90\%$ pure for systems more massive than 
881: $1.0\times10^{14} h^{-1}$ M$_\odot$ with redshifts up to $z=0.45$. The estimated 
882: cluster redshifts derived from maximum likelihood analysis show small errors 
883: with $\Delta z < 0.01$, and the normalized cluster richness measurements fit 
884: linearly with the virial mass of matched halos, the correct relation in this simulation.
885: This offers hope that the (very likely nonlinear) relation between richness
886: and halo mass which exists in the real universe can be investigated with these
887: techniques.
888: 
889: 
890: 
891: \acknowledgments F.D. thanks H. Lin, H. Oyaizu, and the SDSS photo-$z$
892: group for providing the photometric redshifts which allowed us to
893: derive the statistics of the photo-$z$ calibration to the
894: spectroscopic redshifts. E.P. is an ADVANCE fellow (NSF grant
895: AST-0649899), also supported by NASA grant NAG5-11489.  RHW was
896: supported in part by the U.S. Department of Energy under contract
897: number DE-AC02-76SF00515.  This research used 
898: computational facilities supported by NSF grant AST-0216105.
899: 
900: \bibliography{ms.bib}
901: \bibliographystyle{apj}
902: 
903: 
904: 
905: \clearpage
906: 
907: %% Use the figure environment and \plotone or \plottwo to include 
908: %% figures and captions in your electronic submission.
909: 
910: 
911: \epsscale{1.00}
912: \begin{figure}
913: \plottwo{f1a.eps}{f1b.eps}
914: \caption{Calculated photometric redshifts versus corresponding spectroscopic measurements 
915: for early type galaxies (or red galaxies, left), and late type galaxies (or blue galaxies, 
916: right). Here, red means $g-r >$ 1.3 and blue means $g-r <$ 1.3.
917: \label{f1}}
918: \end{figure}
919: 
920: 
921: \epsscale{0.75}
922: \begin{figure}
923: \plottwo{f2a.eps}{f2b.eps}
924: \caption{Examples of multiple Gaussian fits for the error distributions of computed 
925: photometric redshifts. The derived fitting parameters are used to scatter the known 
926: redshifts of mock galaxies in order to simulate the practice with real SDSS data. 
927: \label{f2}}
928: \end{figure}
929: 
930: 
931: \epsscale{1.0}
932: \begin{figure}
933: \plottwo{f3a.eps}{f3b.eps}
934: \caption{Completeness of the detected cluster sample as a function of redshift 
935: and the virial mass of matched halos, respectively. The sample shows 
936: a consistent completeness of $>95\%$ complete for halos with 
937: $M_{200} > 2.0\times10^{14} h^{-1} M_\odot$ and is $\sim85\%$ complete for 
938: halos with $M_{200} > 1.0\times10^{14} h^{-1} M_\odot$ in the redshift range of 
939: $0.05<z<0.45$. Note that the annotations in the figures should read $h^{-1} M_\odot$ 
940: instead of $M_\odot$.
941: \label{f3}}
942: \end{figure}
943: 
944: 
945: \epsscale{1.0}
946: \begin{figure}
947: \plottwo{f4a.eps}{f4b.eps}
948: \caption{Purity of the detected cluster sample as a function of redshift 
949: and the cluster richness, respectively. The derived catalog is over $95\%$ pure for 
950: clusters with $\Lambda_{200} > 30$ and around $90\%$ pure for $\Lambda_{200} > 20$ 
951: in the redshift range of $0.05<z<0.45$.
952: \label{f4}}
953: \end{figure}
954: 
955: 
956: \epsscale{1.00}
957: \begin{figure}
958: \plotone{f5.eps}
959: \caption{Comparison between estimated cluster redshifts and known redshifts of 
960: matched halos.
961: \label{f5}}
962: \end{figure}
963: 
964: 
965: \epsscale{0.87}
966: \begin{figure}
967: \plotone{f6.eps}
968: \caption{Comparison between derived cluster richness and the virial mass of matched 
969: halos. The cluster richness $\Lambda_{200}$ is the total luminosity 
970: of the cluster in units of $L^*$ inside its virial radius $r_{200}$.  
971: \label{f6}}
972: \end{figure}
973: 
974: 
975: \epsscale{0.87}
976: \begin{figure}
977: \plotone{f7.eps}
978: \caption{Comparison between derived cluster richness and the total luminosity of matched 
979: halos in units of $L^*$. The cluster richness $\Lambda_{200}$ is the total luminosity 
980: of the cluster in units of $L^*$ inside its virial radius $r_{200}$.  
981: \label{f7}}
982: \end{figure}
983: 
984: 
985: \epsscale{0.87}
986: \begin{figure}
987: \plotone{f8.eps}
988: \caption{Comparison between the virial mass of matched halos and their luminosities in 
989: units of $L^*$. The dashed line is the best-fit cluster richness-mass scaling relation 
990: given in Figure 6. 
991: \label{f8}}
992: \end{figure}
993: 
994: 
995: \epsscale{0.87}
996: \begin{figure}
997: \plotone{f9.eps}
998: \caption{Comparison between derived cluster virial radius $r_{200}$ and the halo 
999: $r_{200}$ determined by galaxy overdensities.
1000: \label{f9}}
1001: \end{figure}
1002: 
1003: 
1004: 
1005: 
1006: \end{document}  
1007: