0804:0804.1532/ms.tex

1: \documentclass[10pt,preprint2]{aastex}

2:

3: \newcommand{\Mks}{\ifmmode M_{K_{\rm s}} \else $M_{K_{\rm s}}$ \fi}

4: \newcommand{\ff}{\ifmmode f_{{\rm gm}} \else $f_{{\rm gm}}$ \fi}

5: \newcommand{\ffo}{\ifmmode f_{{\rm gm},0} \else $f_{{\rm gm},0}$ \fi}

6: \newcommand{\ffk}{\ifmmode f_{{\rm gm},k} \else $f_{{\rm gm},k}$ \fi}

7: \newcommand{\ffkr}{\ifmmode f_{{\rm gm},k}^{\rm in} \else $f_{{\rm gm},k}^{\rm in}$ \fi}

8: \newcommand{\ffkml}{\ifmmode f_{{\rm gm},k}^{\rm ML} \else $f_{{\rm gm},k}^{\rm ML}$ \fi}

9: \newcommand{\ffkclass}{\ifmmode f_{{\rm gm},k}^{\rm class} \else $f_{{\rm gm},k}^{\rm class}$ \fi}

10: \newcommand{\sffk}{\ifmmode \sigma_{f_{{\rm gm},k}} \else $\sigma_{f_{{\rm gm},k}}$ \fi}

11: \newcommand{\sffkml}{\ifmmode \sigma_{f_{{\rm gm},k}^{\rm ML}} \else $\sigma_{f_{{\rm gm},k}^{\rm ML}}$ \fi}

12: \newcommand{\sffkmlp}{\ifmmode \sigma^{+}_{f_{{\rm gm},k}^{\rm ML}} \else $\sigma^{+}_{f_{{\rm gm},k}^{\rm ML}}$ \fi}

13: \newcommand{\sffkmlm}{\ifmmode \sigma^{-}_{f_{{\rm gm},k}^{\rm ML}} \else $\sigma^{-}_{f_{{\rm gm},k}^{\rm ML}}$ \fi}

14: \newcommand{\sffkclass}{\ifmmode \sigma_{f_{{\rm gm},k}^{\rm class}} \else $\sigma_{f_{{\rm gm},k}^{\rm class}}$ \fi}

15:

16: \newcommand{\zp}{\ifmmode z_{{\rm phot}} \else $z_{{\rm phot}}$ \fi}

17: \newcommand{\zspec}{\ifmmode z_{{\rm spec}} \else $z_{{\rm spec}}$ \fi}

18:

19: \newcommand{\pkl}{\ifmmode p_{kl} \else $p_{kl}$ \fi}

20: \newcommand{\pklp}{\ifmmode p'_{kl} \else $p'_{kl}$ \fi}

21: \newcommand{\spkl}{\ifmmode \sigma_{p_{kl}} \else $\sigma_{p_{kl}}$ \fi}

22: \newcommand{\spklp}{\ifmmode \sigma_{p'_{kl}} \else $\sigma_{p'_{kl}}$ \fi}

23:

24: \newcommand{\pklrc}{\ifmmode p_{kl,{\rm ML}} \else $p_{kl,{\rm ML}}$ \fi}

25: \newcommand{\pklrcp}{\ifmmode p'_{kl,{\rm ML}} \else $p'_{kl,{\rm ML}}$ \fi}

26: \newcommand{\pklr}{\ifmmode p_{kl, {\rm in}} \else $p_{kl, {\rm in}}$ \fi}

27: \newcommand{\pklrp}{\ifmmode p'_{kl, {\rm in}} \else $p'_{kl, {\rm in}}$ \fi}

28: \newcommand{\mpklrc}{\ifmmode \overline{p_{kl, {\rm ML}}} \else $\overline{p_{kl, {\rm ML}}}$ \fi}

29: \newcommand{\mpklrcp}{\ifmmode \overline{p'_{kl, {\rm ML}}} \else $\overline{p'_{kl, {\rm ML}}}$ \fi}

30:

31: \newcommand{\spklrc}{\ifmmode \sigma_{p_{kl,{\rm ML}}} \else $\sigma_{p_{kl,{\rm ML}}}$ \fi}

32: \newcommand{\spklrcp}{\ifmmode \sigma_{p'_{kl,{\rm ML}}} \else $\sigma_{p'_{kl,{\rm ML}}}$ \fi}

33: \newcommand{\smpklrc}{\ifmmode \sigma_{\overline{p_{kl,{\rm ML}}}} \else $\sigma_{\overline{p_{kl,{\rm ML}}}}$ \fi}

34: \newcommand{\smpklrcp}{\ifmmode s_{p'_{kl,{\rm ML}}} \else $s_{p'_{kl,{\rm ML}}}$ \fi}

35: \newcommand{\mspklrc}{\ifmmode \overline{\sigma_{p_{kl,{\rm ML}}}} \else $\overline{\sigma_{p_{kl,{\rm ML}}}}$ \fi}

36: \newcommand{\mspklrcp}{\ifmmode \overline{\sigma_{p'_{kl,{\rm ML}}}} \else $\overline{\sigma_{p'_{kl,{\rm ML}}}}$ \fi}

37: \newcommand{\spklML}{\ifmmode \sigma_{p_{kl,{\rm iter}}} \else $\sigma_{p_{kl,{\rm iter}}}$ \fi}

38: \newcommand{\spklMLp}{\ifmmode s_{p'_{kl,{\rm iter}}} \else $s_{p'_{kl,{\rm iter}}}$ \fi}

39: \newcommand{\covpklrcp}{\ifmmode {\rm cov}(p'_{mn,{\rm ML}},p'_{st,{\rm ML}}) \else ${\rm cov}(p'_{mn,{\rm ML}},p'_{st,{\rm ML}})$ \fi}

40:

41: \newcommand{\pklclass}{\ifmmode p_{kl, {\rm class}} \else p_{kl, {\rm class}}$ \fi}

42: \newcommand{\pklclassp}{\ifmmode p'_{kl, {\rm class}} \else p'_{kl, {\rm class}}$ \fi}

43: \newcommand{\mpklclass}{\ifmmode \overline{p_{kl, {\rm class}}} \else $\overline{p_{kl, {\rm class}}}$ \fi}

44: \newcommand{\mpklclassp}{\ifmmode \overline{p'_{kl, {\rm class}}} \else $\overline{p'_{kl, {\rm class}}}$ \fi}

45: \newcommand{\smpklclass}{\ifmmode s_{p_{kl, {\rm class}}} \else $s_{p_{kl, {\rm class}}}$ \fi}

46: \newcommand{\smpklclassp}{\ifmmode s_{p'_{kl, {\rm class}}} \else $s_{p'_{kl, {\rm class}}}$ \fi}

47: \newcommand{\mspklclass}{\ifmmode \overline{\sqrt{p_{kl, {\rm class}}}} \else $\overline{\sqrt{p_{kl, {\rm class}}}}$ \fi}

48: \newcommand{\mspklclassp}{\ifmmode \overline{p'_{kl, {\rm theo}}}  \else $\overline{p'_{kl, {\rm theo}}}$ \fi}

49: \newcommand{\spklclassp}{\ifmmode p'_{kl, {\rm theo}} \else $p'_{kl, {\rm theo}}$ \fi}

50:

51: \shorttitle{Galaxy merger fractions using maximum likelihood techniques}

52: \shortauthors{L\'opez-Sanjuan et al.}

53:

54: \begin{document}

55:

56: \title{A maximum likelihood method for bidimensional experimental distributions, and its application to the galaxy merger fraction}

57: \author{Carlos L\'opez-Sanjuan, C\'esar Enrique Garc\'{\i}a-Dab\'o, Marc Balcells}

58: \affil{Instituto de Astrof\'{\i}sica de Canarias, Calle V\'{\i}a L\'actea s/n, La Laguna, Tenerife, 38200 Spain}

59: \email{clsj@iac.es, enrique.garcia@gtc.iac.es, balcells@iac.es}

60:

61: \begin{abstract}

62: The determination of galaxy merger fraction of field galaxies using automatic morphological indices and photometric redshifts is affected by several biases if observational errors are not properly treated. Here, we correct these biases using maximum likelihood techniques. The method takes into account the observational errors to statistically recover the real shape of the bidimensional distribution of galaxies in redshift - asymmetry space, needed to infer the redshift evolution of galaxy merger fraction. We test the method with synthetic catalogs and show its applicability limits. The accuracy of the method depends on catalog characteristics such as the number of sources or the experimental error sizes.  We show that the maximum likelihood method recovers the real distribution of galaxies in redshift and asymmetry space even when binning is such that bin sizes approach the size of the observational errors. We provide a step-by-step guide to applying maximum likelihood techniques to recover any one- or bidimensional distribution subject to observational errors.

63: \end{abstract}

64:

65: \keywords{Data Analysis and Techniques}

66:

67: \section{INTRODUCTION}

68:

69: The currently popular hierarchical $\Lambda$CDM models are successful at explaining the structure build-up of the cold dark matter component of the Universe \citep{springel05}. But such models have difficulties when explaining the evolution of the baryonic component, even with modeling that incorporates star formation, active galactic nuclei and supernova feedback, and the multiphase nature of the interstellar medium  \citep[][and references therein]{delucia07}. An open question is the role of the galaxy mergers in the formation of today's galaxies, specially the most massive ellipticals. The observational determination of the merger rate, $\Re_{{\rm m}}$, and its evolution with redshift, provide empirical clues on the amount and the timing of the merger activity.  They also constitute key inputs for semi-analytic models of galaxy formation and evolution.

70:

71: The merger rate, defined as the number density of merger systems at given redshift, depends on the merger time $\tau_{m}$, which can only be estimated by N-body simulations and simplified models \citep[]{mihos95,patton00,conselice06}. On the other hand, the galaxy merger fraction $\ff$, defined as the number of merger galaxies in a given galaxy sample in a redshift interval, is a direct observational quantity. Many works have determined the galaxy merger fraction, usually parametrized as $\ff = \ffo \cdot (1+z)^m$, using different sample selection and methods, like morphological criteria \citep[]{conselice03, lavery04, cassata05, lotz06, bridge07, depropris07}, kinematic close companions \citep[]{patton00, patton02, lin04, depropris05, depropris07}, spatial close pairs \citep[]{lefevre00, bundy04, bridge07, kartaltepe07} or correlation function \citep[]{bell06, masjedi06}. In these works the value of the merger index varies in the range $m = 0 - 4$. $\Lambda$CDM models predict $m \sim 3$ \citep[]{kolatt99, governato99, gottlober01}.

72:

73: The morphological criterion for determining the galaxy merger fraction \citep[see][hereafter C03]{conselice03}, is based on the fact that, just after a merger is complete, the galaxy image shows strong geometrical distortions, in particular asymmetric distortions. Hence, high values in the automatic asymmetry index $A$ (\citealt{abraham96}; C03) are assumed to identify merger systems. Other automatic morphological indices, such as $M_{20}$ and $G$, have also been used to determine the evolution of galaxy merger fraction with redshift \citep[]{lotz06}. The determination of morphological indices, which must be done on \facility{HST} images, is affected by surface brightness dimming and K-corrections, so the errors of the indices grow with redshift and are more important for faint galaxies.

74:

75: In this paper, we present a method based on the maximum likelihood (ML) technique, to handle the effects of the large errors on the determination of the galaxy merger fraction. Galaxy Merger fraction determinations using morphological criteria are generally done on large photometric surveys such as AEGIS \citep{davis07}, COMBO-17 \citep{wolf03}, COSMOS \citep{scoville07}, GOODS \citep{giavalisco04}, or SWIRE \citep{lonsdale03}.  We therefore address the effects of errors in the galaxy asymmetry indices as well as errors on the photometric redshifts.

76:

77: In Section \ref{methodology} we review the maximum likelihood method for determining bidimensional distributions. Its application to the galaxy merger fraction determination is given in Section \ref{teofmg}. These sections have a high mathematical content, and a statistics background is recommended. Then, in Section \ref{simulations} we summarize the simulations made to test the general method and how it improves the galaxy merger fraction determination, Section \ref{FmgComp}. In Section \ref{MLsteps} we provide an outline for the application of the ML method to any one- or bidimensional experimental distribution subject to observational errors. Our conclusions are presented in Section \ref{conclusions}.

78:

79: \section{METHODOLOGY}\label{methodology}

80: Following \cite{conselice06}, we define the galaxy merger fraction by morphological criteria as

81: \begin{equation}\label{fmg}

82: \ff = \frac{\kappa \cdot N_{\rm m}}{N_{\rm tot} + (\kappa -1) N_{\rm m}},

83: \end{equation}

84: where $N_{\rm m}$ is the number of the distorted sources in the sample, classified as the systems with a value in the asymmetry index $A$ higher than a limiting value $A_{\rm m}$ (see C03 for details), $N_{\rm tot}$ is the total number of sources in the sample, and $\kappa$ is the average number of galaxies that merged to produce the $N_{\rm m}$ merger systems. We use $\kappa = 2$ throughout this paper.

85:

86: In order to compute the galaxy merger fraction and its redshift evolution we must know the underlying distribution of the $z$ and $A$ values, that we assume is represented by a bidimensional histogram in redshift and asymmetry space. This bidimensional histogram is defined by the number of sources in each redshift-asymmetry bin. Normalizing to unity the histogram yields a bidimensional probability distribution defined now by $p_{kl}$, the probability that a source has redshift in bin $k$ and asymmetry in bin $l$. Index $k$ scans the redshift bins of size $\Delta z$ and index $l$ scans the asymmetry bins of size $\Delta A$. In our case we just need two asymmetry bins separated by $A_{\rm m}$: the $l=0$ bin represents normal sources and the $l=1$ bin represents merger systems. Now, the galaxy merger fraction in redshift bin $[z_{k}, z_{k+1})$ is

87: \begin{equation}\label{fmgf}

88: \ffk = \frac{2p_{k1}}{p_{k0}+2p_{k1}}.

89: \end{equation}

90:

91: The accuracy with which the $p_{kl}$ can be obtained degrades significantly when photometric redshifts, $\zp$, are used, and for typical errors of $A$ in deep \facility{HST} surveys. This introduces strong biases in the determination of the galaxy merger fraction.

92:

93: \subsection{The maximum likelihood method}

94: The maximum likelihood method (ML method) developed here is based on \citet{garciadabo02}, who used this technique to determine unbiased luminosity functions. ML methods have been used in a wide range of topics in astrophysics.  \citet{arzner07} use it to improve the determination of faint X-ray spectra; \citet{sheth07} to obtain redshift and luminosity distributions in photometric surveys; \citet{naylor06} to fit colour-magnitude diagrams; \citet{makarov06} to improve distance estimates using Red Giant Branch stars; and, \citet{efstathiou04} to analyze low cosmic microwave background multipoles from the Wilkinson Microwave Anisotropy Probe. ML methods are based on the estimation of the most probable values of a set of parameters which define the probability distribution that describes an observational sample \citep{davidson93,penha01}.

95:

96: The general ML method operates as follows. Throughout the paper we denote as ${\it P}({\bf a}|{\bf b})$ the probability to obtain the values ${\bf a}$, given parameters ${\bf b}$. Being ${\bf{x}}_i$ a vector containing all the measured values for source $i$ in the data set and $\theta$ the parameters of the underlying multidimensional distribution that we want to estimate, we may express the joined likelihood function as

97: \begin{equation}\label{MLdef}

98: L({\bf x}_i| \theta ) \equiv -\ln \big[ \prod_i {\it P}( {\bf x}_i | \theta) \big] = - \sum_i \ln \big[{\it P}({\bf x}_i|{\bf \theta})\big],

99: \end{equation}

100: where ${\it P}({\bf x}_i|\theta)$ is the probability to obtain ${{\bf x}_i}$ for a given $\theta$. If we are able to express ${\it P}({\bf x}_i|\theta)$ analytically, we can minimize Equation \ref{MLdef} to obtain the best estimation of parameters $\theta$, denote as $\theta_{ML}$. In our case, ${\bf x}_i$ are the observed values of $z$ and $A$ for source $i$, ${\bf x}_i \equiv (z_ {{\rm obs},i}, A_{{\rm obs},i})$, while $\theta \equiv (p_{kl},\alpha)$, where $p_{kl}$ are the probabilities which we defined in the paragraph previous to Equation \ref{fmgf}, and $\alpha$ denotes any other fixed parameters of the distribution.

101:

102: Sources are assumed to have real redshift and asymmetry values $z_{{\rm real},i}$ and $A_{{\rm real},i}$ (not affected by observational errors) which define a bidimensional distribution $p_{kl}$ such that

103:

104: \begin{displaymath}

105: P_{2D}(z_{{\rm real},i},A_{{\rm real},i}|p_{kl})\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \

106: \end{displaymath}

107: \begin{equation}

108: = \{p_{kl} ,\forall z_k \leq z_{{\rm real},i} < z_{k+1} , A_l \leq A_{{\rm real},i} < A_{l+1}\}.\label{bidi}

109: \end{equation}

110:

111: Observational errors cause the observed $z_{{\rm obs},i}$ and $A_{{\rm obs},i}$ to differ from their respective real values  $z_{{\rm real},i}$ and $A_{{\rm real},i}$.

112: The observed $z_{{\rm obs},i}$ are assumed to be extracted for a Gaussian distribution with mean $z_{{\rm real},i}$ and standard deviation $\sigma_{z_{{\rm obs},i}}$,

113: \begin{eqnarray}\label{zgauss}

114: \lefteqn{P_G(z_{{\rm obs},i}|z_{{\rm real},i},\sigma_{z_{{\rm obs},i}})}\nonumber\\

115: && \ \ \ \ \ = \frac{1}{\sqrt{2\pi}\sigma_{z_i}}{\rm e}^{-\frac{(z_{{\rm obs},i}-z_{{\rm real},i})^2}{2\sigma_{z_{{\rm obs},i}}^2}}.

116: \end{eqnarray}

117:

118: \noindent Similarly, the observed asymmetry values $A_{{\rm obs},i}$ are assumed to be extracted from a Gaussian distribution with mean $A_{{\rm real},i}$ and standard deviation $\sigma_{A_{{\rm obs},i}}$,

119: \begin{eqnarray}\label{agauss}

120: \lefteqn{P_G(A_{{\rm obs},i}|A_{{\rm real},i},\sigma_{A_{{\rm obs},i}})} \nonumber\\

121: && = \frac{1}{\sqrt{2\pi}\sigma_{A_{{\rm obs},i}}}{\rm e}^{-\frac{(A_{{\rm obs},i}-A_{{\rm real},i})^2}{2\sigma_{A_{{\rm obs},i}}^2}}.

122: \end{eqnarray}

123:

124: While the $\zp$ errors may not be strictly Gaussian, this is the best analytical approximation of the errors that we can make. We obtain the probability ${\it P}({\bf x}_i| \theta )$ of each source by the total probability theorem:

125: \begin{eqnarray}\label{tprob}

126: \lefteqn{P(z_{{\rm obs},i},A_{{\rm obs},i}|p_{kl},\sigma_{z_{{\rm obs},i}},\sigma_{A_{{\rm obs},i}})} \nonumber\\

127: && = \int P_G(z_{{\rm obs},i}|z_{{\rm real},i},\sigma_{z_{{\rm obs},i}})\nonumber\\

128: && \times P_G(A_{{\rm obs},i}|A_{{\rm real},i},\sigma_{A_{{\rm obs},i}})\nonumber\\

129: && \times P_{2D}(z_{{\rm real},i},A_{{\rm real},i}|p_{kl}){\rm d}z_{{\rm real},i}{\rm d}A_{{\rm real},i},

130: \end{eqnarray}

131: where ${{\bf x}_i} \equiv (z_{{\rm obs},i},A_{{\rm obs},i})$ and $\theta$ $\equiv (p_{kl},$ $\sigma_{z_{{\rm obs},i}},$ $\sigma_{A_{{\rm obs},i}})$ in Equation \ref{MLdef}, with $\alpha \equiv (\sigma_{z_{{\rm obs},i}},\sigma_{A_{{\rm obs},i}})$. Note that the values of $\sigma_{z_{{\rm obs},i}}$ and $\sigma_{A_{{\rm obs},i}}$ are the measured uncertainties for each source, so the only unknowns are the probabilities $p_{kl}$, which we want to estimate. Note also that we integrate over the variables $z_{{\rm real},i}$ and $A_{{\rm real},i}$, so we are not be able to estimate them individually, but only the underlying bidimensional distribution $p_{kl}$ that describes the sample.

132:

133: In order to ensure that the probabilities \pkl are not negative, we change variables, $\pkl = {\rm exp}(\pkl')$; this change keeps our problem analytic. With these new variables and after integrating Equation \ref{tprob}, our likelihood function, defined in Equation \ref{MLdef}, becomes

134: \begin{displaymath}

135: L(z_{{\rm obs},i},A_{{\rm obs},i}|p^{\prime}_{kl},\sigma_{z_{{\rm obs},i}},\sigma_{A_{{\rm obs},i}})\ \ \ \ \ \ \ \ \ \ \ \ \ \ \

136: \end{displaymath}

137: \begin{equation}

138: = \sum_i \biggr[ \ln \bigg\{ \sum_k\sum_l \frac{{\rm e}^{p'_{kl}}}{4}{\rm ERF}(z,i,k){\rm ERF}(A,i,l) \bigg\}\biggr]\label{MLfunc},

139: \end{equation}

140: where

141: \begin{displaymath}

142: {\rm ERF}(\eta,i,k)\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \

143: \end{displaymath}

144: \begin{equation}

145: = {\rm erf}\bigg(\frac{\eta_{{\rm obs},i} - \eta_{k+1}}{\sqrt{2} \sigma_{\eta_{{\rm obs},i}}}\bigg) - {\rm erf}\bigg(\frac{\eta_{{\rm obs},i} - \eta_{k}}{\sqrt{2} \sigma_{\eta_{{\rm obs},i}}}\bigg),\label{ERF}

146: \end{equation}

147: and ${\rm erf}(x)$ is the error function. We must observe that in the minimization of Equation \ref{MLfunc} the variables $\pklp$ are not independent. This is due to the normalization of the distribition: the integration over all parameters space muts be one. This impose the following condition over $\pklp$:

148: \begin{equation}\label{norm}

149: {\rm \bf{g}}(\pklp) \equiv \sum_k\sum_l  {\rm e}^{p'_{kl}} (z_{k+1} - z_{k}) (A_{l+1} - A_{l}) - 1 = 0.

150: \end{equation}

151: The method for finding the extrema of a function of several variables subject to one or more constraints is know as the Lagrange multipliers \citep[see e.g.,][for details]{marsden96}. It states that the function to minimize is not the target function, Equation \ref{MLfunc}, but a related one:

152: \begin{eqnarray}\label{lagrange}

153: G(\pklp,\lambda) =  L(z_{{\rm obs},i},A_{{\rm obs},i}|p^{\prime}_{kl},\sigma_{z_{{\rm obs},i}},\sigma_{A_{{\rm obs},i}})\nonumber\\

154: + \lambda {\rm \bf{g}}(\pklp),

155: \end{eqnarray}

156: where $\lambda$ is an auxiliary variable. Minimizing Equation \ref{lagrange} we obtain the best \pklp values, denoted as $\pklrcp$.

157:

158: The minimization of Equation \ref{lagrange} can be performed with any numerical minimization code. We used \texttt{AMOEBA}, which is based on the commonly used algorithm of Nelder-Mead \citep{nelder65} and coded in C \cite[][pp. 408-412]{press95}.

159:

160: At this point we have the probabilities $\pklrcp$. However, our goal is to obtain not only the best probabilities estimation, but also their associated uncertainties. The ML method states that we can obtain all the parameter covariaces using an expansion of the function $G(\pklp,\lambda)$ in Taylor's series of its variables $\theta = (\pklp,\lambda)$ around the minimization point $\theta_{ML} = (\pklrcp,\lambda_{ML})$ if the probability distributions of \pklrcp are Gaussian, which we assume. The previous minimization process made the first $G$ derivate null at $\theta = \theta_{ML}$ and we obtain

161: \begin{equation}\label{taylor}

162: G = G(\theta_{ML}) + \frac{1}{2}(\theta - \theta_{ML})^T H (\theta - \theta_{ML}),

163: \end{equation}

164: where $H = h_{xy}$ is the Hessian matrix and $T$ denotes the transpose vector. The inverse of the Hessian matrix gives us an estimate of the 68\% confidence intervals of $\pklrcp$, denoted as $[\pklrcp - \spklrcp, \pklrcp - \spklrcp]$, and the covariances between each $\pklrcp$, denoted as $\covpklrcp$, because maximum likelihood theory states that ${\rm cov}(\theta_{x},\theta_{y}) \geq h^{-1}_{xy}$ and $\sigma_{\theta_{x}} \geq h^{-1}_{xx}$. In our case, the Hessian matrix is

165: \begin{equation}\label{hessian}

166: H = \left(

167: \begin{array}{cc}

168:   \frac{\partial^2 G}{\partial p'_{mn} \partial p'_{st}} & \triangledown g \\

169:   \triangledown g & 0

170: \end{array}\right),

171: \end{equation}

172: where

173: \begin{eqnarray}

174: \frac{\partial^2 G}{\partial p'_{mn} \partial p'_{st}} = -\sum_i \frac{{\rm ERF}(z,i,m){\rm ERF}(A,i,n)}{16} \nonumber\\

175: \times \frac{{\rm ERF}(z,i,s){\rm ERF}(A,i,t){\rm e}^{p'_{mn}}{\rm e}^{p'_{st}}}{\sum_l\sum_k \frac{{\rm e}^{p'_{kl}}}{4}{\rm ERF}(z,i,k){\rm ERF}(A,i,l)}\label{sigmap}

176: \end{eqnarray}

177: \begin{equation}\label{sigmag}

178: \triangledown g = \frac{\partial^2 G}{\partial \lambda \partial p'_{mn}} = (z_{m+1} - z_{m}) (A_{n+1} - A_{n}){\rm e}^{p'_{mn}}.

179: \end{equation}

180:

181: Finally, the $\pklrc$ probabilities simply are:

182: \begin{equation}\label{pML}

183: \pklrc = {\rm e}^{\pklrcp}.

184: \end{equation}

185: Assuming that the $\pklrcp$ follow a Gaussian distribution, which is assured by the ML theory for large number of sources, the $\pklrc$ follow a log-normal distribution:

186: \begin{eqnarray}

187: \lefteqn{P_{LN}(\pkl|\pklrcp,\spklrcp)} \nonumber\\

188: && = \frac{{\rm e}^{-(\ln \pkl - \pklrcp)^2 / 2 \spklrcp^2}}{\sqrt{2 \pi}\pkl \cdot \spklrcp},

189: \end{eqnarray}

190: which is highly asymmetric and whose 68\% confidence interval is $[\sigma^{-}_{\pklrc},\sigma^{+}_{\pklrc}]$, where

191: \begin{equation}\label{spMLmin}

192: \sigma^{-}_{\pklrc} = {\rm e}^{-\spklrcp} \pklrc,

193: \end{equation}

194: \begin{equation}\label{spMLmax}

195: \sigma^{+}_{\pklrc} =  {\rm e}^{\spklrcp} \pklrc.

196: \end{equation}

197: Furthermore, each $p'_{k0}$ and $p'_{k1}$ are connected by the covariance ${\rm cov}(p'_{k0,{\rm ML}},p'_{k1,{\rm ML}})$, so the confidence intervals of $p_{k0}$ and $p_{k1}$ are not independent. In the next section we explain how to obtain the confidence interval of the galaxy merger fraction taking this into account.

198:

199: \subsection{The galaxy merger fraction}\label{teofmg}

200: Expressing the galaxy merger fraction in the range $[z_k, z_{k+1})$ (Equation \ref{fmg}) as a function of the output variables of the ML method we obtain:

201: \begin{equation}\label{fmgrc}

202: \ffkml = \frac{2p_{k1,{\rm ML}}}{p_{k0,{\rm ML}}+2p_{k1,{\rm ML}}}.

203: \end{equation}

204:

205: However, we cannot obtain the 68\% conficence interval of $\ffkml$, defined as $[\sffkmlm, \sffkmlp]$, applying the usual error theory, which is based in Gaussianity of variables, because the probability distribution of each $\pklrc$ is log-normal. Furthermore, the problem is not analytic and we cannot obtain a mathematical description of the $\ffk$ probability distributions. We made Monte Carlo simulations to characterize the probability distribution of each $\ffk$. The simulations showed that the $\ffk$ distributions can be fit with a log-normal:

206: \begin{equation}

207: P_{LN}(\ffk|\ffkml, \sigma) = \frac{{\rm e}^{-(\ln \ffk - \ln \ffkml )^2 / 2 \sigma^2}}{\sqrt{2 \pi} \ffk \cdot \sigma},

208: \end{equation}

209: where $\sigma$ is the only free parameter on the fit. Finally, the 68\% confidence interval of $\ffkml$ is given by

210: \begin{equation}\label{ffsigm}

211: \sffkmlm = {\rm e}^{-\sigma}\ffkml,

212: \end{equation}

213: \begin{equation}\label{ffsigp}

214: \sffkmlp =  {\rm e}^{\sigma} \ffkml.

215: \end{equation}

216:

217: \section{SIMULATIONS WITH SYNTHETIC CATALOGS}\label{simulations}

218: The accuracy and reliability of the ML method can be tested using synthetic catalogs. This is an important step since ML theory warns that the estimated parameters may suffer from biases; convergence is only assured for large number of sources. The approach is to create catalogs with predefined underlying distribution parameters and compare with the estimated ML parameters. Note that the inputs of the ML method are the same whether we have a real catalog or a synthetic one. In the following paragraphs, we first explain how we created the synthetic catalogs in a general case, and later define and justify the input parameters used for the synthetic catalogs in this paper. Given the high number of variables used in the following discussion, we provide their precise definitions in Table \ref{defvar}.

219:

220: We created the synthetic catalogs as follows: first we took $n$ sources distributed in redshift and asymmetry space following a bidimensional distribution defined by the input probabilities $\pklr$. This process yielded the $z_{{\rm in},i}$ and $A_{{\rm in},i}$ values of the $n$ sources of our synthetic catalogs, which play the role of $z_{{\rm real},i}$ and $A_{{\rm real},i}$ in Equation \ref{bidi}. Next, we applied the experimental errors: following Equation \ref{zgauss} we obtained the simulated $z_{{\rm sim},i}$ values, which play the role of $z_{{\rm obs},i}$, as drawn from a Gaussian distribution with mean $z_{{\rm in},i}$ and standard deviation $\sigma_{z_{{\rm sim},i}}$; the latter plays the role of $\sigma_{z_{{\rm obs},i}}$. The value of $\sigma_{z_{{\rm sim},i}}$ is a positive value obtained also from a Gaussian distribution with mean $\overline{\sigma_z}$ and standard deviation $\sigma_{\sigma_z}$.  The process was repeated following Equation \ref{agauss} to obtain the simulated $A_{{\rm sim},i}$ and its standard deviation $\sigma_{A_{{\rm sim},i}}$. Finally, we applied the ML method over the synthetic catalog to obtain $\pklrcp$ and $\spklrcp$. Summarizing, the input parameters of our simulations were the bidimensional distribution $\pklr$, $n$, $\overline{\sigma_z}$, $\sigma_{\sigma_z}$, $\overline{\sigma_A}$, and $\sigma_{\sigma_A}$, and the output parameters were $\pklrcp$ and $\spklrcp$.

221:

222: We defined three intervals in redshift ($k = 0,1,2$) with $\Delta z = 0.4$ and $z \in [0,1.2)$, and two in asymmetry ($l = 0,1$) with $\Delta A = 0.7$ and $A \in [-0.35,1.05)$. Distorted sources with $A > A_{\rm m} = 0.35$ (see C03 for details about the determination of this limit value) are described by $p'_{k1,{\rm in}}$, while normal sources by $p'_{k0,{\rm in}}$. We list in Table \ref{realbidih} the redshift and asymmetry intervals, as well as the probabilities $\pklr$ and $\pklrp = \ln \pklr$, that define the input bidimensional distribution of our synthetic catalogs.

223: The $\pklrp$ values in Table \ref{realbidih} do not match any particular observational determination of these quantities, but they follow the general behavior inferred from observed galaxy merger fractions: highly asymmetric galaxies are less frequent than low-asymmetry galaxies up to $z = 1.2$ \citep[][]{conselice03ff, cassata05, bridge07, kamp07}, so the $p'_{k1,{\rm in}}$ are lower than the $p'_{k0,{\rm in}}$. The number of highly asymmetric galaxies increases with redshift in the range $z \in [0,1.2)$ \citep[][]{conselice03ff}, so $p'_{k1,{\rm in}}$ increase with redshift. Several studies present a maximum at intermediate $z$ in the redshift distribution of galaxies in optically selected samples \citep[e.g.,][]{grazian06}, so $p'_{k0,{\rm in}}+p'_{k1,{\rm in}}$ values have a maximum in the interval $z = [0.4,0.8)$. We can check that the $\pklrp$ are normalized following Equation \ref{norm}. Although we preset here this particular bidimensional distribution we carried out the same study with other distributions, and the results were similar.

224:

225: For convenience we express the experimental  dispersions using the dimensionless variables

226: \begin{eqnarray}

227: \sigma_{{\rm bin},z} = \frac{\overline{\sigma_z}}{\Delta z},\label{sbinz}\\

228: \sigma_{{\rm bin},A} = \frac{\overline{\sigma_A}}{\Delta A}.\label{sbina}

229: \end{eqnarray}

230: We used the same value of both variables in each simulation, that is, we used $\sigma_{\rm bin} = \sigma_{{\rm bin},z} = \sigma_{{\rm bin},A}$. Because we fixed the values of $\Delta z = 0.4$ and $\Delta A = 0.7$, $\sigma_{\rm bin}$ unequivocally defines $\overline{\sigma_z}$ and $\overline{\sigma_A}$. It is important to notice that, when we work with observational data, the situation is the opposite: our data define  $\overline{\sigma_z}$ and $\overline{\sigma_A}$, and we should choose the most appropriate values of $\Delta z$ and $\Delta A$. We made simulations for $\sigma_{\rm bin}$ = 0 as a check corresponding to null experimental errors, $\sigma_{\rm bin}$ = 0.25 and 0.5 as typical observational cases, and $\sigma_{\rm bin}$ = 1.0 as extreme case to explore the applicability limits of the ML method. The values of $\sigma_{\sigma_z}$ and $\sigma_{\sigma_A}$ were a half of $\overline{\sigma_z}$ and $\overline{\sigma_A}$ respectively in all cases.

231:

232: We ran models with $n$ = 50, 100, and 1000 to check catalog size effects. We took these values because we expect experimental catalogs of a few hundred sources or more and we are interested in the applicability limits of the method to small samples.

233:

234: In order to study how the ML parameters compare with the input parameters, we must preform several simulations and study how the parameters $\pklrcp$ are distributed. Hence, for each $n$ and $\sigma_{\rm bin}$ case we create a simulation set of $N=1000$ independent synthetic catalogs.

235:

236: The results of the simulations are shown in Figure \ref{hist1000}, and in Tables \ref{simresult50}, \ref{simresult100}, and \ref{simresult1000}. Figure \ref{hist1000} shows $\pklrcp$ for samples of $n=1000$ sources (crosses), with error bars showing their 68\% confidence intervals; for comparison, the input probabilities $\pklrp$ are shown as black circles, and the $\pklclassp$, obtained by drawing a classical histogram (as defined below in Section \ref{class}), are shown as gray triangles, also for $n=1000$ catalogs. In Figure \ref{hist1000}, panels $a$, $b$, and $c$ correspond to increasing values of the experimental errors, defined in Equations \ref{sbinz}, \ref{sbina} and shown in the legend; panels $a,b,c$ may be taken to respectively describe 'good', 'typical', and 'bad' observational errors as compared to the $z$ and $A$ bin sizes. The top/bottom panels show $\pklp$ for the low/high-asymmetry bins. Within each panel, values for the three redshift bins are shown, as labeled

237:  on the horizontal axes. We provide the results in tabular format in Tables \ref{simresult50}, \ref{simresult100}, and \ref{simresult1000}, corresponding to simulations with sample sizes of $n=50, 100$, and $1000$, respectively.

238:

239: \begin{figure*}[t]

240: \begin{center}

241: \includegraphics[width = 0.3\linewidth]{f1a.eps}

242: \includegraphics[width = 0.3\linewidth]{f1b.eps}

243: \includegraphics[width = 0.3\linewidth]{f1c.eps}

244: \caption{Results of run the ML method over $N=1000$ synthetic catalogs with $n=1000$ sources each for different experimental errors: (a) $\sigma_{\rm bin} = 0.25$, (b) $\sigma_{\rm bin} = 0.5$, and (c) $\sigma_{\rm bin} = 1$. In all figures black circles are the input bidimensional probabilities $\pklrp$, gray triangles are the classical bidimensional probabilities $\mpklclassp$ and crosses are the ML bidimensional probabilities $\mpklrcp$. The error bars are the 68\% confidence intervals given by ML method, $[\mpklrcp - \mspklrcp, \mpklrcp + \mspklrcp]$.}

245: \label{hist1000}

246: \end{center}

247: \end{figure*}

248:

249: \subsection{Classical bidimensional distribution}\label{class}

250: Before presenting the results of the ML method, we analyze the estimation of the $\pklp$ parameters using the classical bidimensional historgram of the $z_{{\rm sim},i}$ and $A_{{\rm sim},i}$ data. We translate the histogram occupation numbers $n_{kl}$ to probabilities $\pklclassp$ using

251: \begin{equation}\label{classhist}

252: \pklclassp = \ln \bigg( \frac{n_{kl}}{\Delta z \Delta A \Sigma_{k} \Sigma_{l} n_{kl} }\bigg),

253: \end{equation}

254: where $n_{kl}$ is the number of sources whit $z_{{\rm sim},i}$, $A_{{\rm sim},i}$ whitin the $[z_k, z_{k+1})$ $\cup$ $[A_l, A_{l+1})$ bin. We want to study how the classical method compares with the input parameters. The distribution of the $N$ values of $\pklclassp$ in one simulation set can be

255: represented by its median $\mpklclassp$ and standard deviation $\smpklclassp$. In Tables \ref{simresult50} - \ref{simresult1000} we can see that the classical bidimensional distribution recovers the input probabilities in the case of null experimental errors and $n$ large as expected. However, the shape of the input bidimensional distribution begins to deviate when $\sigma_{\rm bin}$ increases, as we can also see in Figure \ref{hist1000}: the classical bidimensional distribution (gray triangles) is smoothed by experimental errors and does not estimate well the underlying bidimensional distribution (black circles). We study this in detail in Section \ref{qpkl}.

256:

257: \subsection{The ML method in absence of experimental errors}

258: We first test that the ML method, in the case of null experimental errors, recovers the input bidimensional distribution, i.e., that it reduces to the classic method. We can see in Tables \ref{simresult50} - \ref{simresult1000} that the values of $\mpklclassp$ and the median of the $N$ values recovered by the ML method, denoted as $\mpklrcp$, are the same in all cases. This also happens with the values of $\smpklclassp$ and the standard deviations of $\mpklrcp$, denoted as $\smpklrcp$. This indicates that the ML method does not introduce systematic effects on the results.

259:

260: \begin{figure}[t]

261: \epsscale{1.0}

262: \plotone{f2.eps}

263: \caption{Variation of $T_{\rm ML}$ (black symbols) and $T_{\rm class}$ (gray symbols) with dimensionless experimental error size $\sigma_{\rm bin}$. Triangles are for $n = 50$, circles for $n = 100$, and squares for $n = 1000$ source catalogs. The solid line is the 99\% confidence limit $T = 2.6$.}

264: \label{Tkl}

265: \end{figure}

266:

267: \subsection{The ML method with non-null experimental errors}\label{qpkl}

268: We now examine how well the ML and classical methods recover the input probabilities $\pklrp$ when non-null experimental errors are included in the synthetic catalogs. We use the $N=1000$ source catalogs as an example, which is representative of the general trends. The results are shown in Figure 1, and are tabulated in Table 5. It is clear from Figure 1 that $\pklrcp$  (crosses), recover the input probabilities $\pklrp$ (black circles) in all cases, including those in which the inserted errors are as large as the bin size (panels $c$).

269: From Table 5 we see that the values of $\pklrp$ always lay within the 68\% confidence interval of the ML method, defined by $[\mpklrcp - \smpklrcp, \mpklrcp + \smpklrcp]$. This shows that the ML method is reliable. In contrast, the probabilities $\pklclassp$ derived from the classical histogram (gray triangles in Figure 1) systematically deviate from the input probabilities. Probabilities are systematically underestimated/overestimated in the low/high-asymmetry bins (upper/lower panels), due to a spill-over from the most populated bins (low asymmetries) to the least populated, high-asymmetry bins. Such deviations increase for larger experimental errors.  When the errors are as large as the bin size, spill-over is so pronounced that the probabilities in the high-asymmetry sample (lower right panel) are nearly equal for the three redshift bins, and all information on the redshift variation of the galaxy merger fractions is lost.

270:

271: We conclude that the ML method is an unbiased estimator of the input distribution.  To put this statement in a more quantitative basis, we carry out a Student's t-test \citep[][p. 232]{collins90}. We define our estimator as

272: \begin{equation}

273: T_{kl,{\rm ML}} = \frac{\sqrt{N}\left| \pklrp - \mpklrcp \right|}{\smpklrcp},

274: \end{equation}

275: and accept that $\pklrp = \mpklrcp$ with a 99\% of confidence when $T_{kl,{\rm ML}} \leq 2.6$. We define in the same way the variable $T_{kl,{\rm class}}$ to study the accuracy of the $\pklclassp$ as an estimator of the $\pklrp$. We calculate the median of the $T_{kl,{\rm ML}}$ and $T_{kl,{\rm class}}$ for each simulation set, denoted as $T_{\rm ML}$ and $T_{\rm class}$ respectively, to make a comparison beetwen different $n$ and $\sigma_{\rm bin}$.

276:

277: The results are summarized in Tables \ref{simresult50} - \ref{simresult1000}, and in Figure \ref{Tkl}. We can see that $T_{\rm ML}$ is below the confidence level for all $n$ and $\sigma_{\rm bin}$: the $\pklrcp$ are good estimators of the $\pklrp$, as wanted. In contrast, the classical method is far from the confidence condition even in the $\sigma_{\rm bin} = 0.25$ case, and $T_{\rm class}$ increases with $\sigma_{\rm bin}$. Besides, having a large $n$ does not improve the results of classical method: the $\pklclassp$ values are similar for every $n$, but the errors are reduced when increasing $n$, making $T_{\rm class}$ higher. That is, having a large observational sample affected by experimental errors does not improve the estimation of $\pklrp$, and the $\pklclassp$ errors are underestimated. This bias affects the galaxy merger fractions obtained from $\pklclassp$, as we can see on Section \ref{FmgComp}.

278:

279: \begin{figure}[t]

280: \epsscale{1.0}

281: \plotone{f3.eps}

282: \caption{Variation of $F$ with dimensionless experimental error size $\sigma_{\rm bin}$. Triangles are for $n = 50$, circles for $n = 100$, and squares for $n = 1000$ source catalogs. The solid line is the 99\% confidence limit $F = 1.8$.}

283: \label{Fkl}

284: \end{figure}

285:

286: \subsection{Study of $\spklp$}\label{qspkl}

287: When we apply the ML method to an observational sample we obtain an estimation of the $\pklrcp$ 68\% confidence intervals, $[\pklrcp - \spklrcp, \pklrcp + \spklrcp]$, and we want to know if these confidence intervals are representative of the $\pklp$ probability distributions. They are representative if the median of the $N$ values of $\spklrcp$, denoted as $\mspklrcp$, are similar to $\smpklrcp$. To study this issue we perform a Fisher's variance test \citep[][p. 234]{collins90}. We define our estimator as

288: \begin{equation}

289: F_{kl} = \frac{\max (\mspklrcp,\smpklrcp)^2}{\min (\mspklrcp,\smpklrcp)^2},

290: \end{equation}

291: and accept that $\smpklrcp = \mspklrcp$ with a 99\% of confidence when $F_{kl} \leq 1.18$. We calculate the median of the $F_{kl}$ for each simulation set, denoted as $F$, to make a comparison beetwen different $n$ and $\sigma_{\rm bin}$. The results are summarized in Tables \ref{simresult50} - \ref{simresult1000}, and in Figure \ref{Fkl}. We can see that $\smpklrcp = \mspklrcp$ for all $n$ when $\sigma_{\rm bin} = 0.25, 0.5$. Only when $\sigma_{\rm bin} = 1.0$ and the samples are small ($n=50,100$) does F lie above the confidence limits.

292:

293: These results imply that the ML method supplies reliable confidence intervals of $\pklrcp$ with thousand sources samples or, with less sources, if the experimental errors are at most a half of the histogram bin size.

294:

295: The differences between $\smpklrcp$ and $\mspklrcp$ have two origins. The main effect comes from the fact that the probability distributions of $\pklrcp$ are not perfectly Gaussian, and we had assumed Gaussianity to obtain $\spklrcp$ analytically. We study this issue in the next section. The other effect is that we evaluated the theoretical values of $\spklrcp$ at $\pklrcp$: the minimization method \texttt{AMOEBA} is not perfect and we may have estimated a local minimum of Equation \ref{lagrange} instead the absolute minimum (see Section \ref{sigML}).

296:

297: \subsection{Probability distributions of $\pklp$}\label{gauss}

298: In the analytical estimation of the $\pklrcp$ covariances we assumed that the $\pklrcp$ probability distributions are Gaussian. To check this assumption we made a histogram of the $N$ values of $\pklrcp$ to obtain the shape of the $\pklrcp$ probability distribution, which we want to approximate by a Gaussian with mean $\mpklrcp$ and standard deviation $\smpklrcp$. We tested this Gaussian approximation with a Kolmogorov-Smirnov test \citep[][p. 235]{collins90}.

299:

300: We saw that the Gaussian distribution approximation was valid for all $\sigma_{\rm bin}$ in the $n=1000$ simulation sets. The situation of the $n=50$ and 100 simulation sets was more complicated. For $n=100$ the $p'_{k0,{\rm ML}}$ Gaussian approximation was valid for all $\sigma_{\rm bin}$, while the $p'_{k1,{\rm ML}}$ started to be non Gaussian for $\sigma_{\rm bin} = 0.5$, and we could not assume Gaussianity for $\sigma_{\rm bin} = 1.0$. For $n=50$ simulations we could not assume Gaussian approximation from $\sigma_{\rm bin} = 0.25$ to the $p'_{k1,{\rm ML}}$ and from $\sigma_{\rm bin} = 0.5$ to the $p'_{k0,{\rm ML}}$.

301:

302: These results emphasize that one must check the Gaussian approximation of the $\pklrcp$ probability distributions in each case. That is, when applying the ML method to an experimental catalog it is essential to make special simulations aimed at verifying the Gaussianity of the recovered probabilities.

303:

304: \subsection{The standard deviation of the ML method due to iterative minimization}\label{sigML}

305: The iterative minimization method \texttt{AMOEBA} used to obtain the minimum of Equation \ref{lagrange} can introduce an error in the determination of $\pklrcp$ if the method converges to a local minimum. Besides, increasing the experimental errors relaxes the conditions over the absolute minimum and makes it more probable that the method converges onto one such local minimum.

306: To study this effect and its importance, we apply the ML method $N=100$ times over the same catalog, one per simulation set. We define the variable $\spklMLp$ as the dispersion of the $N$ values of the recovered probabilities $\pklrcp$. We find that the values of $\spklMLp$ depend on the tolerance and the maximum number of iterations of the minimization method. We take a $10^{-15}$ tolerance and 5000 iterations as optimal values: less tolerance or more iterations does not reduce $\spklMLp$, but increased the computational time. All final simulations presented in this paper were made with these optimal values. We also find that $\spklMLp$ increases with $\sigma_{\rm bin}$, but is $\sim 5$ times smaller than $\smpklrcp$ in the worst experimental error case, so the standard deviations of the probabilities are slightly affected by this effect. Therefore, when applying the ML method to an experimental catalog, it is safe practice to apply it more than once, as a precaution against local solutions and iteration bias.

307:

308: \subsection{The galaxy merger fraction}\label{FmgComp}

309: In the previous sections we have seen that the experimental errors modify the input bidimensinal distribution, biasing the classical method estimations, whereas the ML method is able to recover the input bidimensional distribution. In this section we study the general effect and trends that the experimental errors introduce on the galaxy merger fraction determination.

310: To obtain the galaxy merger fraction by the ML method we follow Section \ref{teofmg}. First we determine the galaxy merger fraction $\ffkml$ applying Equation \ref{pML} to the $\pklrcp$ probabilities in Tables \ref{simresult50} - \ref{simresult1000}. Next, we perform Monte Carlo simulations with this $\ffkml$ values and the $\pklrcp$ and $\spklrcp$ in Tables \ref{simresult50} - \ref{simresult1000} to characterize the probability distribution of $\ffk$, obtaining the 68\% confidence interval $[\sffkmlm, \sffkmlp]$ with Equations \ref{ffsigm} and \ref{ffsigp}.

311:

312: The galaxy merger fraction by the classical method is, applying Equation \ref{fmgf},

313: \begin{equation}

314: \ffkclass = \frac{2{\rm e}^{p'_{k1,{\rm class}}}}{{\rm e}^{p'_{k0,{\rm class}}}+2{\rm e}^{p'_{k1,{\rm class}}}}\label{ffclass},

315: \end{equation}

316: while its 68\% confidence interval $[\ffkclass - \sffkclass,$ $\ffkclass + \sffkclass]$ is obtained applying the usual error theory to Equation \ref{ffclass},

317: \begin{eqnarray}\label{sffclass}

318: \sffkclass = \frac{2{\rm e}^{p'_{k0,{\rm class}}}{\rm e}^{p'_{k1,{\rm class}}}}{({\rm e}^{p'_{k0,{\rm class}}}+2{\rm e}^{p'_{k1,{\rm class}}})^2} \nonumber\\

319: \times \sqrt{s_{p'_{k1,{\rm class}}}^2 + s_{p'_{k0,{\rm class}}}^2}.

320: \end{eqnarray}

321:

322: Because of the experimental error limits of the ML method which we noticed in the previous sections, we only made this study with the $n=1000$ simulation sets. We summarize the results in Table \ref{ffstudy}, and Figure \ref{ffstudyfig}. We can see that the classical method gives worst estimates of the input galaxy merger fraction when the experimental errors increase. We may take as observational reference the $\sigma_{\rm bin} = 0.25$ case (for example, in \citealt{conselice03ff} we have $\sigma_{\rm bin} \sim 0.2$). In this case, the difference between the input and the classical estimation is $\sim 0.1$ on the first and second redshift intervals, which have the lower input galaxy merger fraction, and $\sim 0.05$ in the third interval. Furthermore, the experimental errors tend to smooth the galaxy merger fraction values. An extreme case is $\sigma_{\rm bin} = 1$, where the dependency in $z$ has been lost. In addition, the confidence intervals are underestimated and are $\sim 0.035$ in every case. In contrast, the differences between the input and ML method galaxy merger fractions are $\sim 0.01$ in every redshift bin and experimental error case. Furthermore, the 68\% confidence intervals are more realistic: in the $\sigma_{\rm bin} = 0.25, 0.5$ cases they are $\sim 0.05$, while in the $\sigma_{\rm bin} = 1.0$ case they increase to $\sim 0.1$.

323:

324: Finally, we also determined the classical galaxy merger fraction in the $n=50$ and $100$ cases, and noticed that the values of $\ffkclass$ were similar in each $\sigma_{\rm bin}$ case: having large samples does not improve the results and we must take into account the experimental errors in our analysis to avoid the bias.

325:

326: \begin{figure}[t]

327: \epsscale{1.0}

328: \plotone{f4.eps}

329: \caption{Galaxy merger fraction estimations by classical (gray symbols) and ML method (black symbols). In the two cases triangles are for $\sigma_{\rm bin} = 0.25$, circles for $\sigma_{\rm bin} = 0.5$, and squares for $\sigma_{\rm bin} = 1$. The black solid lines are the input galaxy merger fraction in each redshift bin. We can take $\sigma_{\rm bin} = 0.25$ as observational reference.}

330: \label{ffstudyfig}

331: \end{figure}

332:

333: \section{DETERMINATION OF ANY ONE- OR BIDIMENSINAL DISTRIBUTION BY THE ML METHOD}\label{MLsteps}

334:

335: The method outlined here may easily be applied to the unbiased determination of any bidimensional distribution in the presence of observational errors. For example, the automatic indices $M_{20}$ and $G$ are used in \citet{lotz06} to determine the galaxy merger fraction by morphological criteria.  We could apply the ML method by defining the variable $MG = G + 0.14M_{20} - 0.33$ and by calling merger systems all sources with $MG > 0$. Similarly, we may apply the ML method to obtain density of sources in color-color diagrams, especially when we have some condition that separates populations, or to determine one-dimensional histogram of any observational magnitude.

336:

337: For reference, we provide an outline for the application of the ML method to any one- or bidimensional experimental distribution subject to observational errors:

338: \begin{enumerate}

339: \item Define the observational catalog. This catalog cannot be restricted to the interval of interest, e.g., $[z_0, z_k]$, because there are sources both with $z_i < z_0$ and $z_i > z_k$ that could belong to a real bidimensional distribution bin within the range of interest due to the observational errors. In general one should include in the sample those sources with $z_i + 2\sigma_{i} > z_0$ and $z_i - 2\sigma_{i} < z_k$ to avoid incompleteness effects.

340: \item Apply the ML method to the observational catalog. First, define the bidimensional distribution bins taking into account the size of the observational errors. Next, minimize Equation \ref{lagrange} to obtain the most probable values of $p'_{kl}$, $\pklrcp$. To determine their confidence intervals, calculate the Hessian matrix, Equation \ref{hessian}, with the observational data and the previous $\pklrcp$ values. The diagonal elements of the inverse Hessian matrix provide $\spklrcp$. Notice that we assumed Gaussian experimental errors, Equations \ref{zgauss} and \ref{agauss}, in the development of the ML method. If you need to assume other experimental error distributions, you need to recalculate Equations \ref{lagrange}, \ref{sigmap} and \ref{sigmag} with the new error distributions.

341: \item Check the results with representative synthetic catalogs. Run simulations with synthetic catalogs to test the accuracy and Gaussianity limits of the method in each particular case following the methodology of sections \ref{qpkl}, \ref{qspkl} and \ref{gauss}. These synthetic catalogs should have the previous $\pklrcp$ as bidimensional distribution input, that is, as $\pklrp$, and similar characteristics to the experimental ones to fix the other input parameters. For example, synthetic and experimental catalogs should have same number of sources $n$, and  $\overline{\sigma_z}$ may be given by the median of the photometric redshift errors in each redshift bin, while, for $\sigma_{\sigma_z}$, one may use the dispersions of these photometric redshift errors. Besides, is important to take into account special cases, e.g., the number of sources with $\zspec$, which have $\sigma_z \sim 0$, in each bin, or avoid unphysical values, e.g., negative redshifts.

342: \item Determine $p_{kl}$, Equation \ref{pML}, and their confidence intervals, Equations \ref{spMLmin} and \ref{spMLmax}, in the reliable cases.

343: \end{enumerate}

344:

345: \section{CONCLUSIONS}\label{conclusions}

346: We have presented a maximum likelihood method to recover bidimensional distributions of experimental data subject to measurement errors, and applied it to the determination of the galaxy merger fraction based on asymmetry criteria from C03.

347:

348: The Gaussianity of $\pklrcp$ is the strongest condition on the reliability of the method. From the results, taking into account that typical observational catalogs usually have a few hundred sources, and that the probabilities $p'_{k1}$ would be small, we conclude that the bin of the bidimensional distribution must be at least twice the typical error in redshift in the observational catalog. Within this quality limit, the ML method can recover with accuracy and reliability the lost information due to the experimental errors. Besides, our results have realistic errors with known shapes, which the classical histograms cannot provide.

349:

350: The ML method presented here may in principle be extended to as many dimensions as required by the astrophysical problem we are addressing. For instance, if we wish to determine variations in the galaxy merger fraction as a function of galaxy mass, errors in the galaxy mass determination would make objects spill over from one mass bin

351: to the next, biasing the classical histogram approach.  The ML method with an added mass axis would solve the problem.  Even if we are not seeking to determine the variation of the galaxy merger fraction with mass, our parent sample unavoidably has a boundary (e.g., luminosity; mass; color), and observational errors make objects jump in and out of the sample, hence potentially modifying the shape of the distribution we are trying to determine. This extension to higher dimensions is straightforward only when the third variable is independent from the other two. In the

352: case of a third luminosity or mass axis, this is unfortunately not the case: luminosity and mass depend on galaxy redshift, introducing covariances between the variables. Furthermore, luminosity and mass are affected by incompleteness

353: functions, making our problem non-analytic. We leave the treatment of this problem for future work.

354:

355: \acknowledgments

356: We dedicate this paper to the memory of our six IAC colleagues and friends who met with a fatal accident in Piedra de los Cochinos, Tenerife, in February 2007, with a special thanks to Maurizio Panniello, whose teachings of python were so important for this paper.

357:

358: This work was supported by the Spanish Programa Nacional de Astronom\'{\i}a y Astrof\'{\i}sica through project number AYA2006-12955.

359:

360: \begin{thebibliography}{}

361: \bibitem[Abraham et al.(1996)]{abraham96} Abraham, R. G., Tanvir, N. R., Santiago, B. X., Ellis, R. S., Glazebrook, K. \& van der Bergh, S. 1996, \mnras, 279, L47

362: \bibitem[Arzner et al.(2007)]{arzner07} Arzner, K., et al. 2007, \aap, 468, 501

363: \bibitem[Bell et al.(2006)]{bell06} Bell, E. F., et al. 2006, \apj, 652, 270

364: \bibitem[Bridge et al.(2007)]{bridge07} Bridge, C. R., et al. 2007, \apj, 659, 931

365: \bibitem[Bundy et al.(2004)]{bundy04} Bundy, K., Fukugita, M., Ellis, R. S., Kodama, T., \& Conselice, C. J. 2004, \apj, 601, L123

366: \bibitem[Cassata et al.(2005)]{cassata05} Cassata, P., et al. 2005, \mnras, 357, 903

367: \bibitem[Collins(1990)]{collins90} Collins, G. W. 1990, Fundamental numerical methods and data analysis, by George W. Collins, II.

368: \bibitem[Conselice et al.(2003)]{conselice03ff} Conselice, C. J., Bershady, M. A., Dickinson, M., Papovich, C. 2003, \aj, 126, 1183

369: \bibitem[Conselice(2003)]{conselice03} Conselice, C. J. 2003, \apjs, 147, 1

370: \bibitem[Conselice(2006)]{conselice06} ---. 2006, \apj, 638, 686

371: \bibitem[Davidson \& Mackinnon(1993)]{davidson93} Davidson, R., \& Mackinnon, J. 1993, Estimation and inference in econometrics (Ed. Oxford University Press, New York)

372: \bibitem[Davis(2007)]{davis07} Davis, M., et al. 2007, \apjl, 660, L1

373: \bibitem[De Lucia \& Blaizot(2007)]{delucia07} De Lucia, G., \& Blaizot, J. 2007, \mnras, 375, 2

374: \bibitem[De Propris et al.(2005)]{depropris05} De Propris, R., Liske, J., Driver, S. P., Allen, P. D., \& Cross, N. J. G. 2005, \aj, 130, 1516

375: \bibitem[De Propris et al.(2007)]{depropris07} De Propris, R., et al. 2007, \apj, 666, 212

376: \bibitem[Efstathiou(2004)]{efstathiou04} Efstathiou, G. 2004, \mnras, 348, 885

377: \bibitem[Garc\'{\i}a-Dab\'o(2002)]{garciadabo02} Garc\'{\i}a-Dab\'o, C. E. 2002, Estudio estad\'{\i}stico de la formaci\'on estelar en el universo local (Tesis Doctoral, Universidad Complutense de Madrid)

378: \bibitem[Giavalisco et al.(2004)]{giavalisco04} Giavalisco, M., et al. 2004, \apjl, 600, L93

379: \bibitem[Gottl\"ober et al.(2001)]{gottlober01} Gottl\"ober, S., Klypin, A., \& Kravtsov, A. V. 2001, \apj, 546, 223

380: \bibitem[Governato et al.(1999)]{governato99} Governato, F., Gardner, J. P., Stadel, J., Quinn, T., \& Lake, G. 1999, \aj, 117, 1651

381: \bibitem[Grazian et al.(2006)]{grazian06} Grazian, A., et al. 2006, \aap, 449, 951

382: \bibitem[Kampczyk et al.(2007)]{kamp07} Kampczyk, P., et al. 2007, \apjs, 172, 329

383: \bibitem[Kartaltepe et al.(2007)]{kartaltepe07} Kartaltepe, J. S., et al. 2007, \apjs, 172, 320

384: \bibitem[Kolatt et al.(1999)]{kolatt99} Kolatt, T. S., et al. 1999, \apj, 523, L109

385: \bibitem[Lavery et al.(2004)]{lavery04} Lavery, R. J., Remijan, A., Charmandaris, V., Hayes, R. D., \& Ring, A. A. 2004, \apj, 612, 679

386: \bibitem[Le F\`evre et al.(2000)]{lefevre00} Le F\`evre, O., et al. 2000, \mnras, 311, 565

387: \bibitem[Lin et al.(2004)]{lin04} Lin, L., et al. 2004, \apj, 617, L9

388: \bibitem[Lonsdale et al.(2003)]{lonsdale03} Lonsdale, C. J., et al. 2003, \pasp, 115, 897

389: \bibitem[Lotz et al.(2008)]{lotz06} Lotz, J. M., et al. 2008, \apj, 672, 177

390: \bibitem[Makarov et al.(2006)]{makarov06} Makarov, D., et al. 2006, \aj, 132, 2729

391: \bibitem[Marsden \& Tromba(1996)]{marsden96} Marsden, J.E. \& Tromba, A.J. 1996, Vector Calculus (W.H. Freeman and Company, New York)

392: \bibitem[Masjedi et al.(2006)]{masjedi06} Masjedi, M., et al. 2006, \apj, 644, 54

393: \bibitem[Mihos(1995)]{mihos95} Mihos, J. C. 1995, \apj, 438, L75

394: \bibitem[Naylor \& Jeffries(2006)]{naylor06} Naylor, T., \& Jeffries, R. D. 2006, \mnras, 373, 1251

395: \bibitem[Nelder \& Mead(1965)]{nelder65} Nelder, J.A. \& Mead, R. 1965, Computer Journal, 7(4), 308

396: \bibitem[Patton et al.(2000)]{patton00} Patton, D. R., Carlberg, R. G., Marzke, R. O., Pritchet, C. J., da Costa, L. N., \& Pellegrini, P. S. 2000, \apj, 536, 153

397: \bibitem[Patton et al.(2002)]{patton02} Patton, D. R., et al. 2002, \apj, 565, 208

398: \bibitem[Pe\~na(2001)]{penha01} Pe\~na, D. 2001, Fundamentos de estad\'{\i}stica (Alianza Editorial, Madrid)

399: \bibitem[Press(1995)]{press95} Press, W.H. 1995, Numerical Recipies in C, second edition (Cambridge University Press, New York)

400: \bibitem[Scoville et al.(2007)]{scoville07} Scoville, N., et al. 2007, \apjs, 172, 1

401: \bibitem[Sheth(2007)]{sheth07} Sheth, R. K. 2007, \mnras, 378, 709

402: \bibitem[Springel et al.(2005)]{springel05} Springel, V., Di Matteo, T., \& Hernquist, L. 2005, \apj, 620, L79

403: \bibitem[Wolf et al.(2003)]{wolf03} Wolf, C., Meisenheimer, K., Rix, H. -W., Borch, A., Dye, S., \& Kleinheinrich, M. 2003, \aap, 401, 73

404: \end{thebibliography}

405:

406: \clearpage

407:

408: \include{tab1}

409: \include{tab2}

410: \include{tab3}

411: \include{tab4}

412: \include{tab5}

413: \include{tab6}

414:

415: \end{document}

416: