0806.0730/MN.tex
1: \documentclass{JHEP3} 
2: 
3: \usepackage{graphicx}
4: 
5: \usepackage{amsmath}
6: \usepackage{amssymb}
7: 
8: 
9: \def\p{\partial}
10: \def\a{\alpha}
11: \def\b{\beta}
12: \def\d{\delta}
13: \def\e{\epsilon}
14: \def\k{\kappa}
15: \def\l{\lambda}
16: \def\L{\Lambda}
17: \def\r{\rho}
18: \def\rr{\varrho}
19: \def\g{\gamma}
20: \def\G{\Gamma}
21: \def\o{\omega}
22: \def\O{\Omega}
23: \def\th{\theta}
24: \def\z{\zeta}
25: \def\s{\sigma}
26: \def\ra{\rightarrow}
27: \def\Ra{\Rightarrow}
28: \def\Lra{\Leftrightarrow}
29: 
30: \def\Mfunction#1{\mathop{\rm #1}\nolimits}
31: 
32: \def\bm#1{\mbox{\boldmath{$#1$}}}
33: 
34: 
35: \title{Fractal analysis of the dark matter and gas distributions in the
36: Mare-Nostrum universe} 
37: \author{Jos\'e Gaite\\ Instituto de Microgravedad IDR,
38: EIAE,\\ Universidad Polit\'ecnica de Madrid, E-28040 Madrid, Spain;\\
39: jose.gaite@upm.es }
40: 
41: \preprint{\today}
42: 
43: \abstract{ We develop a method of multifractal analysis of $N$-body
44: cosmological simulations that improves on the customary counts-in-cells method
45: by taking special care of the effects of discreteness and large scale
46: homogeneity.  The analysis of the Mare-Nostrum simulation with our method
47: provides strong evidence of self-similar multifractal distributions of dark
48: matter and gas, with a halo mass function that is of Press-Schechter type but
49: has a power-law exponent $-2$, as corresponds to a multifractal.  Furthermore,
50: our analysis shows that the dark matter and gas distributions are
51: indistinguishable as multifractals.  To determine if there is any gas biasing,
52: we calculate the cross-correlation coefficient, with negative but inconclusive
53: results. Hence, we develop an effective Bayesian analysis connected with
54: information theory, which clearly demonstrates that the gas is biased in a
55: long range of scales, up to the scale of homogeneity.  However, entropic
56: measures related to the Bayesian analysis show that this gas bias is small (in
57: a precise sense) and is such that the fractal singularities of both
58: distributions coincide and are identical. We conclude that this common
59: multifractal cosmic web structure is determined by the dynamics and is
60: independent of the initial conditions.}
61: 
62: \keywords{cosmic web, cosmological simulations, superclusters}
63: 
64: % \keywords{ cosmology: large-scale structure of Universe -- galaxies:
65: %  clusters:general -- methods: statistical }
66: 
67: 
68: \begin{document}
69: 
70: \section{Introduction}
71: 
72: The large scale structure of the Universe can be described as a ``cosmic web''
73: formed by matter sheets, filaments and nodes.  This type of structure was
74: initially proposed in connection with simplified but insightful models of the
75: cosmic dynamics \cite{Shan-Zel} and has been since confirmed by galaxy surveys
76: and $N$-body cosmological simulations \cite{Rien}.  Cosmological simulations
77: have been especially helpful in testing models of structure formation. In a
78: sense, they have been complementary to observations, since observations are
79: biased towards the luminous matter, while simulations have fully considered
80: the evolution of the dark matter, which is actually the dominant component. In
81: fact, many simulations {\em only} consider dark matter, in particular,
82: non-baryonic cold dark matter, whose dynamics is simplest to simulate and
83: gives rise to cosmic structure that is in accord with observations.  However,
84: due to the advances in parallel computing, the development of efficient codes,
85: and the availability of more powerful computers, the scope of $N$-body
86: simulations has recently changed: now it is possible to simulate the combined
87: dynamics of the non-baryonic dark matter and the baryon gas in large
88: cosmological volumes and with relatively good resolution.
89: 
90: We analyse here the data output of a recent large cosmological simulation of
91: the combined dark matter and gas dynamics, namely, a simulation of the cosmic
92: evolution of $1024^3$ dark-matter particles and an equal number of gas
93: particles carried out by the Mare-Nostrum supercomputer in Barcelona. This
94: dataset has already been analysed by the researchers in charge of the
95: Mare-Nostrum universe project \cite{Gott1,Falten,Gott2}.  Here, we are
96: interested in a particular aspect of the dark matter and gas distributions:
97: their geometry and, specifically, their fractal geometry.
98: 
99: Fractal geometry \cite{Mandel} is the geometry of sets or distributions that
100: have noticeable geometrical features on ever decreasing scales. It is related
101: to scale invariance and indeed appears in nonlinear dynamical systems in which
102: the dynamics is characterized by the absence of reference scales. This is the
103: case of the dynamics of collision-less cold dark matter (CDM), only subjected
104: to the gravitational interaction.  Therefore, the cosmic web produced by this
105: type of dynamics has fine structure and it is, arguably, statistically
106: self-similar. We can reasonably assume that the cosmic web is a multifractal
107: attractor of the gravitational dynamics.  This model is supported by the
108: results of CDM simulations \cite{Valda,Colom,Yepes,I4,I5}.  Although the gas
109: dynamics is more complex (due to the gas pressure, etc), the gas takes part in
110: the nonlinear dynamics of structure formation and can also have a multifractal
111: attractor. Indeed, scaling laws in the distribution of galaxies have a long
112: history, which has been reviewed in
113: Refs.~\cite{Borgani,Jones-RMP,Sylos-Pietro}.  Therefore, it is interesting to
114: compare the scaling laws in the distribution of gas with the scaling laws in
115: the distribution of dark matter.
116: 
117: 
118: Fractal models of the cosmic structure can only be valid in a range of scales,
119: whose upper cutoff is the scale of homogeneity. Its value has been the subject
120: of considerable debates and still is controversial
121: \cite{Jones-RMP,Sylos-Pietro}. In contrast, the lower cutoff to scaling has
122: attracted less attention.  In fact, the CDM gravitational dynamics does not
123: introduce any small reference scale that can play the r\^ole of a lower
124: cutoff, but the gas dynamics introduces the Jeans length.  This length is not
125: a fixed reference scale, for it depends on the local thermodynamical
126: parameters.  In any event, one should expect that the lower cutoff to scaling
127: in the dark matter distribution is smaller than the lower cutoff appropriate
128: for the distribution of galaxies.  However, the opposite seems to be true if
129: one compares galaxy surveys with the results of cosmological simulations,
130: since the latter exhibit reduced scaling ranges, even in dark matter only
131: simulations.  Peebles has included this problem in his list of anomalies in
132: standard cosmology \cite{Pee}.  In his words: ``scale-dependent biasing seems
133: an awkward way to account for the power-law forms of the low order galaxy
134: position correlation functions.''
135: 
136: One can be inclined to place more trust in the scaling range found in galaxy
137: surveys: cosmological simulations allow one to obtain better statistics but
138: they are not free of systematic errors that affect an important range of the
139: smaller scales. Indeed, it has been long known that $N$-body simulations are
140: not fully reliable on scales smaller than the mean particle spacing $N^{-1/3}$
141: \cite{KMS,KMSS}.  In spite of the ever-growing value of $N$, the range of
142: scales between the scale $N^{-1/3}$ and the homogeneity scale is still rather
143: small.  In the Mare-Nostrum universe, this scale range spans a factor of 30
144: (see Sects.~\ref{anal} and \ref{MF}).  Our goal is to demonstrate
145: multifractality of the dark matter and gas distributions in the valid scale
146: range.  Furthermore, given that this scale range is small, we devise a method
147: to correct for discreteness effects and thus extend the valid range to smaller
148: scales, obtaining a reasonable scaling range.  We also intend to test if the
149: dark matter and gas distributions constitute a unique distribution or to what
150: extent they differ. Hence, we make a model of {\em fractal biasing}.
151: 
152: We describe our method of coarse multifractal analysis by counts in cells and
153: define the basic objects (halos) in Sect.~\ref{anal}.  In our method, the
154: scale of homogeneity is explicitly introduced to calculate the multifractal
155: spectrum (Sect.~\ref{anal_w_homo}).  In Sub-sect.~\ref{features}, we show how
156: to obtain the main features of this spectrum and how they are influenced by
157: discreteness and large scale homogeneity.  In Sect.~\ref{MF}, we apply our
158: method to the zero-redshift particle distributions of the Mare-Nostrum
159: universe: (i) we obtain the halo mass functions and discuss its relation to
160: the Press-Schechter mass function in Sect.~\ref{mfun}; (ii) we obtain the
161: multifractal spectra and discuss their relevance in regard to other
162: geometrical studies of the cosmic web in Sect.~\ref{MFsp}; and (iii) we
163: demonstrate scaling and compute sound values of the correlation dimensions in
164: Sect.~\ref{frac_dist}.  The similarity of the results corresponding to the gas
165: and the dark matter suggests that both distributions are identical and shows
166: the need of precise statistical methods to discriminate between them
167: (Sect.~\ref{bias}).  Since the cross-correlations cannot give a definite
168: answer (Sub-sect.~\ref{cross}), we develop an effective Bayesian analysis
169: (Sub-sect.~\ref{Bayes-sect}) which we apply to various cell distributions
170: (Sub-sect.~\ref{appl_Bayes}). This analysis connects with the thermodynamic
171: entropy of mixing (Sub-sect.~\ref{G-DM_entropy}).  Therefore, we study the
172: application of entropic measures to discriminating between mass distributions,
173: and we study the connection of entropies in the continuum limit with the
174: multifractal spectrum (Sect.~\ref{entropies}).  Finally, we discuss our
175: results (Sect.~\ref{discuss}).
176: 
177: A note on notation: we use frequently the asymptotic signs $\sim$ and
178: $\approx$; for example, $f(x) \sim g(x)$ or $f(x) \approx g(x)$ (often without
179: making explicit the independent variable $x$). The former means that the limit
180: of $f(x)/g(x)$ is finite and non-vanishing when $x$ approaches some value
181: (which can be zero or infinity), while the latter means, in addition, that the
182: limit is one.  We also use the sign $\simeq$, which only refers to imprecise
183: numerical values (with unspecified errors).
184: 
185: \section{Methods of data analysis}
186: \label{anal}
187: 
188: The Mare-Nostrum cosmological simulation is described by Gottl\"ober et al
189: \cite{Gott1}.  It assumes a spatially flat concordance model with parameters
190: $\O_\L=0.7$, $\O_{\bar{\rm m}}=0.3$, $\O_{\rm bar}=0.045$, Hubble parameter $h
191: = 0.7$, and initial spectrum with spectral index $n=1$, in a comoving cube of
192: 500 $h^{-1}$ Mpc edges.  The Gadget-2 code \cite{Springel} simulated the
193: evolution of dark matter and gas from redshift $z=40$ to $z=0$. Both dark
194: matter and gas are resolved by $1024^3$ particles, respectively, which results
195: in a mass of $8.24\cdot 10^9\,h^{-1}\, M_\odot$ per dark-matter particle and a
196: mass of $1.45\cdot 10^9\,h^{-1}\, M_\odot$ per gas particle. The Gadget-2 code
197: implements polytropic (adiabatic) evolution of the gas. It can also include
198: dissipation due to radiation or conduction, but these processes have not been
199: included in the Mare-Nostrum simulation.  Nevertheless, the code always
200: includes an artificial viscosity to take care of shock waves.
201: 
202: The Mare-Nostrum universe consists of 135 evenly spaced snapshots.  For our
203: statistical analysis, we only need the $z=0$ snapshot, in which the
204: homogeneity scale is largest and the structures are most developed.  The large
205: size of a Mare-Nostrum universe snapshot makes it unwieldy, so it is
206: convenient (and almost necessary) to analyse it in terms of compound
207: structures, namely, halos, rather than analysing the full particle
208: distributions.  The Mare-Nostrum universe researchers
209: \cite{Gott1,Falten,Gott2} use a friends-of-friends algorithm to define halos,
210: and then they study the distribution and features of those halos. However, we
211: prefer the method of counts in cells, more suitable for studying the continuum
212: limit and the scaling properties of particle distributions.  Therefore, our
213: elementary objects (halos) are cells with constant size but variable mass. The
214: definition of elementary objects in distributions with fine structure
215: (fractals) is arbitrary to a high degree, being actually tied to the
216: measurement or analysis technique.  The definition of elementary objects by
217: coarse graining and, in particular, their definition as cells in a mesh, is
218: very convenient \cite{I4}.  In absence of a reference scale, the appropriate
219: cell size (the coarse-graining scale) is arbitrary, and there is no clear
220: distinction between inner and outer structure.  However, an $N$-point fractal
221: sample, as a {\em finite} point distribution, has a reference scale, namely,
222: the discreteness scale $N^{-1/3}$, which allows us to properly define the
223: size of elementary objects.
224: 
225: At any rate, the cell size must be considered a running scale.  The use of a
226: running cell size is useful, for example, to distinguish the nonlinear scales
227: where structure formation takes place from the linear scales where the initial
228: conditions are preserved: as the cell size enters in the range of the latter
229: scales, the fluctuations of the counts in cells are reduced to small Gaussian
230: fluctuations. This homogeneity scale is actually the only real scale in the
231: cosmic CDM dynamics, although it is not a sharp scale and, besides, it grows
232: with time.
233: 
234: The method of counts in cells is also suitable for comparing the gas
235: distribution with the dark matter distribution, by comparing the respective
236: counts, for a given cell size.  Of course, we must devise methods to provide
237: these comparisons with statistical meaning.  We defer further description of
238: our methods to Sect.~\ref{bias}. However, we advance that our main procedure
239: naturally connects with the description of multifractals in terms of R\'enyi
240: dimensions.
241: 
242: In summary, our basic assumption is that the Mare-Nostrum particle
243: distributions represent continuous mass distributions with fine structure but
244: which are homogeneous on the large scales.  In particular, we expect
245: continuous distributions of cosmic web type, which have various kinds of
246: density singularities produced by gravitational collapse.  The properties of
247: these singularities can be deduced by suppressing the effects of discreteness.
248: We introduce in next sub-section methods of multifractal analysis
249: geared to the relevant type of singular distributions. In sub-section
250: \ref{features}, we study the influence of the discreteness scale and the
251: homogeneity scale on the features of the coarse multifractal spectrum.
252: 
253: 
254: \subsection{Counts in cells and coarse multifractal analysis}
255: \label{anal_w_homo}
256: 
257: Let us assume that a mesh of cells is placed in the sample region (the
258: simulation cube).  In the method of counts in cells, (fractional) statistical
259: moments are defined as
260: \begin{equation}
261: M_q = \sum_i \left(\frac{n_i}{N}\right)^{q} = 
262: \sum_{n>0} N(n)\left(\frac{n}{N}\right)^{q},
263: \label{Mq}
264: \end{equation}
265: where the index $i$ refers to non-empty cells, $n_i$ is the number of points
266: (particles) in the cell $i$, $N= \sum_i n_i$ is the total number of points,
267: and $N(n)$ is the number of cells with $n$ points.%
268: \footnote{Central moments are defined by subtracting from $n/N$ its
269: average. In the strongly nonlinear regime, central moments are less
270: convenient.}  The second expression involves a sum over cell populations and
271: it is more useful than the sum over individual cells, because the range of $n$
272: is much smaller (when the cell size is small).  $M_0$ is the number of
273: non-empty cells and $M_1 = 1$. We understand the latter as a mass
274: normalization, namely, the mass in cell $i$ is $n_i/N$ and the total mass is
275: one, such that the mass distribution can be interpreted as a probability
276: distribution (the physical masses of gas or dark-matter particles play no
277: r\^ole in the statistical analysis).  There is an alternate definition of
278: $q$-moments:
279: \begin{equation}
280: \mu_q = \langle \r^q \rangle 
281: = \sum_{n>0} \frac{N(n)}{M_0} \left(\frac{n}{NV}\right)^{q}
282: = \frac{M_q}{V^q M_0} \,,
283: \label{muq}
284: \end{equation}
285: where $V$ is the cell's volume, $N(n)/M_0$ is the fraction of cells that
286: contain $n$ points, and $\r=n/(NV)$ is the density in those cells. With this
287: definition, $\mu_0 =1$ while $\mu_1$ is not fixed.  We notice that the moments
288: with positive integer $q$ ($M_q$ or $\mu_q$, $q \in \mathbb{N}$) are
289: sufficient for regular distributions, but we cannot impose this restriction
290: here ($q \in \mathbb{R}$).
291: 
292: In regular distributions, the mass contained in any cell is proportional to
293: its volume $V$, in the continuum limit $V \ra 0$. Therefore, $M_q \sim
294: V^{q-1}$.  However, we consider singular distributions such that their
295: $q$-moments are non-trivial power laws of $V$ in the continuum limit, namely,
296: distributions such that one can define \cite{Harte} the exponents
297: \begin{equation}
298: \tau(q) = 3\lim_{V\ra 0}\frac{\log M_q}{\log V}\,,\;q \in \mathbb{R}\,.
299: \label{tauq}
300: \end{equation}
301: These distributions are called multifractals.%
302: \footnote{The mathematical definition of a multifractal distribution only
303:   requires the existence of $\tau(q)$, which is a mild condition on the type
304:   of singularities and does not necessarily imply self-similarity. For
305:   example, an isolated power-law singularity or a massive particle in a
306:   uniform background both give rise to non-trivial ``bifractal'' functions
307:   $\tau(q)$.  Nonetheless, physically relevant distributions with non-trivial
308:   $\tau(q)$ usually exhibit some kind of self-similarity, albeit in a
309:   statistical sense.}
310: % 
311: Of course, the numerical evaluation of the limit in Eq.~(\ref{tauq}) is not
312: feasible and one must be satisfied with finding a constant value of the
313: quotient for sufficiently small $V$, that is, in a sufficiently long range of
314: negative values of $\log V$ (a range of scales).  In fact, the exponent is
315: normally defined as the slope of the plot of $\log M_q$ versus $\log V$, and
316: its value is found by numerically fitting that slope, supposing that a
317: meaningful fit is possible.
318: 
319: A multifractal is also characterized by a set of {\em local} dimensions: the
320: local dimension at one point says how the mass grows from that point
321: outwards. Every set of points with a given local dimension $\a$ constitutes a
322: fractal set with dimension $f(\a)$.  In terms of $\tau(q)$, the spectrum of
323: local dimensions is given by
324: \begin{equation}
325: \a(q)= \tau'(q)\,,\quad q \in \mathbb{R}\,,
326: \label{aq}
327: \end{equation}
328: and the spectrum of fractal dimensions $f(\a)$ is given by the Legendre
329: transform
330: \begin{equation}
331: f(\a) = q\,\a - \tau(q)\,.
332: \label{fa}
333: \end{equation}
334: The spectrum of fractal dimensions is convex upwards and fulfills $f(\a) \leq
335: \a$.  The fractal dimension $f(\a)$ reaches the local dimension $\a$ at $q=1$
336: [note that Eq.~(\ref{tauq}) gives $\tau(1)=0$].  The set of singularities with
337: $f(\a_1) = \a_1$ contains the bulk of the mass and is called the ``mass
338: concentrate.''
339: 
340: In addition to the exact exponent $\tau(q)$ (\ref{tauq}), we define, for a
341: given cell size, the {\em coarse} exponent
342: \begin{equation}
343: \tau(q) = 3\frac{\log (M_q/V_0^{q-1})}{\log (V/V_0)}\,,
344: \label{ctauq}
345: \end{equation}
346: where $V$ is the cell size and $V_0$ is the homogeneity scale, such that the
347: density is homogeneous and $M_q \approx V^{q-1}$ for $V > V_0$.  The coarse
348: exponent depends on both $V$ and $V_0$, but this dependence vanishes if $V \ll
349: V_0$ (assuming that the limit $V \ra 0$ exists).  The introduction of the
350: homogeneity scale in Eq.~(\ref{ctauq}) improves the definition used in
351: Ref.~\cite{I4} for the GIF2 simulation, where no $V_0$ is introduced
352: (equivalent to setting $V_0 = 1$).  Given that the Mare-Nostrum universe cube
353: has 500 $h^{-1}\!$ Mpc edges, much longer than the 110 $h^{-1}\!$ Mpc edges of
354: the GIF2 simulation cube, it is important now to take the transition to
355: homogeneity into account in the definition of the coarse exponent, if we want
356: it to be a good approximation of the limit (\ref{tauq}) for moderately small
357: $V$.
358: 
359: The homogeneity scale $V_0$ can be found as the scale of crossover to
360: homogeneity in the scaling of statistical moments (Sect.~\ref{frac_dist}).  We
361: can also estimate it as the coarse-graining scale such that the mass
362: fluctuations are smaller than, say, 10\%; namely, we define it as the scale
363: such that $\mu_2 = 1.1$.  Thus, we find that the scale of homogeneity is about
364: 1/16th of the edge of the cube, namely, about 30 $h^{-1}$ Mpc.  This value is
365: similar to the value of the GIF2 homogeneity scale found in Ref.~\cite{I4},%
366: \footnote{The value found in Ref.~\cite{I4},
367: $r_0 \simeq 14\, h^{-1}\!$ Mpc, is
368: roughly equivalent to {\em half} the edge of the cube such that $\mu_2 <
369: 1.1$.}  
370: % Since we work with normalized units, we set $V_0 = 2^{-12}$.
371: where it is calculated from the crossover in the scaling of moments.
372: 
373: Besides the multifractal spectrum $f(\a)$, it is useful to define the spectrum
374: of R\'enyi dimensions \cite{Harte}
375: \begin{equation}
376: D_q= \frac{\tau(q)}{q-1}\,.
377: \label{Dq}
378: \end{equation}
379: They have an information-theoretic meaning, which will be explained in detail
380: in Sect.~\ref{entropies}.  In particular, the dimension of the mass
381: concentrate $\a_1 = f(\a_1) = D_1$ is also called the entropy dimension.
382: $D_0$ coincides with the maximum value of $f(\a)$ and with the box-counting
383: dimension of the distribution's support, while $D_2 = \tau(2)$ is the
384: correlation dimension.  In the homogeneous regime, $M_q \approx V^{q-1}$ and
385: $D_q= 3$ for any $q$.  In a uniform fractal (a {\em unifractal} or {\em
386: monofractal}) $D_q$ is also constant but smaller than three. In general, $D_q$
387: is a non-increasing function of $q$.
388: 
389: \subsection{Features of the coarse multifractal spectrum}
390: \label{features}
391: 
392: Here, we examine the features of the multifractal spectrum obtained from the
393: coarse exponent defined by Eq.~(\ref{ctauq}).  
394: 
395: In a multifractal, the cell size $V$ is, of course, irrelevant, as long as $V$
396: is sufficiently smaller than the homogeneity scale $V_0$. However, the
397: intrinsic discreteness of a multifractal sample (a finite point distribution)
398: gives rise to another scale, namely, the size of the cell such that there is
399: one point per cell on average ($V =N^{-1}$).  This scale represents the
400: minimal scale at which the distribution can be consistently considered
401: continuous.  In the initial stages of an $N$-body simulation, when there are
402: only very small deviations from the one-particle-per-cell average, it is
403: obvious that it makes no sense to consider smaller scales.  Furthermore, the
404: dynamics of gravitational collapse is deeply distorted on volumes $V <
405: N^{-1}$, so the resulting particle clusters do not represent the structures
406: that result from the collapse of a continuous medium \cite{KMS,KMSS}.
407: %Moreover, it is also the minimal scale at which the dynamical
408: %effects of discreteness in cosmological simulations can be neglected
409: %\cite{KMS,KMSS}.  
410: As a coarse-graining scale, the volume $V =N^{-1}$ produces
411: the largest variety of masses of coarse-grained objects in $N$-body
412: cosmological simulations \cite{I4}.  Thus, this cell size provides us with a
413: sort of {\em master cell distribution} that characterizes the multifractal
414: sample.  Whenever we mention halos, we refer to non-empty master cells,
415: preferably with a considerable number of particles.  Since the number of
416: dark-matter or gas particles in the Mare-Nostrum universe is a perfect cube
417: and, indeed, a power of two, the master cell distributions are easily
418: obtained.
419: 
420: Ref.~\cite{I4} shows that the mass function of halos in the GIF2 simulation
421: follows the power law $N(m) \sim m^{-2}$, except at the large mass end, where
422: it decays faster.  This power law derives from an approximation of the
423: multifractal spectrum, namely, $f(\a) \approx \a$, and therefore represents
424: the mass concentrate of the multifractal.  In contrast, the master cell
425: distribution contains no information of the matter distribution in voids
426: (zones with $\a > 3$), because they are empty \cite{I4,I5}. Hence, a part of
427: the multifractal spectrum is missing even at this scale.  As $V$ shrinks, the
428: multifractal spectrum is reduced further.
429: 
430: The length scale that corresponds to $V =N^{-1}$ in the Mare-Nostrum
431: simulation, namely, $l = N^{-1/3} = 2^{-10}$, is only a factor $2^{6}=64$
432: smaller than $l_0 = V_0^{1/3} = 2^{-4}$.  This is the largest scaling range
433: that could be attainable in principle, despite the large number of particles.
434: In fact, close to the large scale end, at $l_0 = 2^{-4}$, the coarse
435: multifractal spectrum is influenced by homogeneity, whereas close to the
436: opposite end it is influenced by discreteness. Surely, the best estimation of
437: the real spectrum is to be found somewhere in between. Let us study in detail
438: the change of the features of the coarse multifractal spectrum with scale.
439: 
440: For a given coarse-graining scale, we calculate with Eqs.~(\ref{Mq}) and
441: (\ref{ctauq}) the exponent $\tau(q)$, and hence we calculate the coarse
442: multifractal spectrum through the Legendre transform given by (\ref{aq}) and
443: (\ref{fa}).  The lower end of this spectrum corresponds to the limit $q\ra
444: \infty$, that is to say, to the cell(s) with maximum number of particles:
445: \begin{eqnarray}
446: \a_{{\rm min}}=\lim_{q\ra \infty}\a(q) = 3\frac{\log [n_{{\rm
447: max}}/(NV_0)]}{\log (V/V_0)}\,,
448: \label{amin}
449: \\ 
450: f(\a_{{\rm min}}) = -3\frac{\log [N(n_{{\rm max}})\,V_0]}{\log (V/V_0)}\,.
451: \label{famin}
452: \end{eqnarray}
453: Since $\a_{{\rm min}}$ is the local dimension of the strongest singularity, it
454: changes little with the scale, unless we approach homogeneity ($V \to V_0$),
455: which implies that $\a_{{\rm min}} \to 3$.  Usually, $N(n_{{\rm max}})=1$,
456: namely, there is only one cell with the maximum number of
457: particles. Therefore, the choice $V_0=1$, which disregards the effect of
458: homogeneity, implies that $f(\a_{{\rm min}}) =0$. However, any $V_0 < 1$, like
459: our present setting $V_0 = 1/4096$, implies that the fractal dimension
460: $f(\a_{{\rm min}})$ is negative!
461: 
462: Intuitively, negative fractal dimensions seem meaningless, but they often
463: arise in the study of random multifractals. The origin of negative fractal
464: dimensions has been discussed by Mandelbrot \cite{Mandel2}.  In brief, the
465: coarse fractal dimension of a set of singularities in a random multifractal is
466: proportional to the logarithm of their number, but the expected value of this
467: number can be smaller than one. Therefore, sets of singularities with negative
468: fractal dimension are probably empty.  In our case, by setting $V_0$ to a
469: fraction of the total volume, the number of singularities with given $\a$ in
470: cubes of size $V_0$ fluctuates and these fluctuations are more important for
471: values of $\a$ such that there are few singularities with that $\a$ in the
472: whole simulation box.  Thus, it is convenient to ``average'' over the
473: $V_0^{-1}=4096$ cubes and consider at once the 4096 singularities with
474: smallest $\a$, truncating the negative values of the multifractal spectrum.
475: 
476: In analogy with the lower end of the spectrum of local dimensions, we can
477: deduce that its upper end corresponds to the limit $q\ra -\infty$, that is, to
478: the set of cells with one particle (assuming that $V$ is not so large that
479: there are none). In fact,
480: \begin{eqnarray}
481: \a_{{\rm max}}=\lim_{q\ra -\infty}\a(q) = -3\frac{\log (N V_0)}{\log(V/V_0)}\,,
482: \label{amax}
483: \\
484: f(\a_{{\rm max}}) = -3\frac{\log [N(1) V_0]}{\log (V/V_0)}\,.
485: \label{famax}
486: \end{eqnarray}
487: Notice that the master cell distribution has $\a_{{\rm max}}=3$ and,
488: therefore, its spectrum is limited to non-void zones ($\a \leq 3$).  The value
489: of $\a_{{\rm max}}$ increases for cell sizes $V > 1/N$, as voids begin to be
490: sampled.  For sufficiently large $V$, $N(1)$ decreases and approaches $1/V_0 =
491: 4096$ (only one cell with one particle per each cube of size $V_0$).  Then,
492: $f(\a_{{\rm max}})$ decreases to zero.  At this scale, we have the complete
493: (positive) multifractal spectrum in the region $\a > 3$, corresponding to
494: voids, and the distribution can be considered continuous over the entire range
495: of $\a$ [we always discard the negative values of $f(\a)$].
496: 
497: The total span of the spectrum is
498: $$ \a_{{\rm max}} - \a_{{\rm min}} = -3\frac{\log (n_{{\rm max}}/n_{{\rm
499: min}})}{\log (V/V_0)}\,,
500: $$ 
501: % 
502: where $n_{{\rm min}} \equiv 1$ in the relevant range of $V$.  Naturally, the
503: largest span is reached when the spectrum is complete in the region $\a > 3$.
504: For the Mare-Nostrum universe, we indeed show in Sect.~\ref{MFsp} that we
505: obtain, by choosing $V$ to be the largest value such that $f(\a_{{\rm max}})
506: \geq 0$, the largest span of dimensions $\a$ and a good estimate of the full
507: multifractal spectrum.  For larger values of $V$, as the transition to
508: homogeneity begins, $n_{{\rm min}}$ grows and approaches $n_{{\rm max}}$, with
509: the consequent contraction of the span of the spectrum.
510: 
511: Scale invariance implies that the multifractal spectra at different
512: coarse-graining scales coincide in their respective ranges $[\a_{{\rm
513: min}},\a_{{\rm max}}]$, where $\a_{{\rm min}}$ is roughly constant but
514: $\a_{{\rm max}}$ increases with the scale.  However, the under-sampling of low
515: density regions that causes the truncation of the spectrum at $\a_{{\rm max}}$
516: also causes deviations from the true spectrum close to $\a_{{\rm max}}$. These
517: deviations must be corrected. We see how to do it for the Mare-Nostrum
518: multifractal spectra in Sects.~\ref{MFsp} and \ref{frac_dist}.
519: 
520: Regarding the master cell distribution and assuming for it the simple mass
521: function $N(n) = N(1)/n^2$, we can deduce interesting consequences about the
522: corresponding coarse multifractal spectrum.  First, we calculate, according to
523: Eq.~(\ref{Mq}),
524: $$
525: M_0 = \sum_{n=1}^{n_{{\rm max}}} N(n) \approx N(1) 
526: \sum_{n=1}^{\infty} \frac{1}{n^2} = N(1)\,\frac{\pi^2}{6}\;.
527: $$ 
528: % 
529: Since this sum is just the number of non-empty cells, we deduce that the
530: fraction of non-empty cells containing one particle is $N(1)/M_0 \approx
531: 6/\pi^2 = 0.61$.  Thus, the full distribution $N(n)$ is determined by just the
532: number of empty cells.  Furthermore, from the expression
533: \begin{equation}
534: M_1 = \sum_{n=1}^{n_{{\rm max}}} N(n) \frac{n}{N} \approx 
535: \frac{N(1)}{N} \ln n_{{\rm max}}\;,
536: \label{M1}
537: \end{equation}
538: and the condition $M_1 \equiv 1$ we can determine $n_{{\rm max}}$.  Then, the
539: dimension of the mass concentrate $\a_1 = f(\a_1)$ is
540: \begin{eqnarray*}
541: \a_1 &=& \tau'(1) = \frac{3}{\ln (V/V_0)} 
542: \left(\left.\frac{dM_q}{dq}\right|_{q=1} - \ln V_0 \right) \\
543: &=& \frac{3}{\ln (V/V_0)}\left( \sum_{n=1}^{n_{{\rm max}}} N(n) \frac{n}{N} 
544: \ln \frac{n}{N} - \ln V_0 \right) \\
545: &\approx& \frac{3}{\ln (V/V_0)}\,
546: \left(\frac{\ln n_{{\rm max}}}{2} - \ln (NV_0) \right). 
547: \end{eqnarray*}
548: This dimension is the arithmetic mean of the general values of $\a_{{\rm
549: min}}$ in Eq.~(\ref{amin}) and $\a_{{\rm max}}$ in Eq.~(\ref{amax}).
550: 
551: 
552: \section{Multifractal analysis of the dark matter and gas distributions}
553: \label{MF}
554: 
555: We now present the results of the multifractal analysis of the Mare-Nostrum
556: universe $z=0$ snapshot, beginning with the halo mass functions given by the
557: counts in the master cell distributions (Sect.~\ref{mfun}).  In
558: Sect.~\ref{MFsp}, we study the multifractal spectra in the range of scales
559: covering several powers of two, namely, from $l = 2^{-12}$ to $l =
560: 2^{-7}$. The latter scale is the smallest scale (among the powers of two) such
561: that $N(1) < 4096$ and therefore the spectrum corresponding to voids is
562: complete.  On smaller scales, namely, between $l = 2^{-12}$ and $l = 2^{-8}$,
563: the high-$\a$ ends of the coarse spectra deviate from the true spectrum due to
564: under-sampling of the low density regions.  In Sect.~\ref{frac_dist}, we
565: propose to correct for under-sampling by removing the erroneous ends of the
566: spectrum.  Thus, we can demonstrate scale invariance in the longest possible
567: range.
568: 
569: \subsection{Mass functions}
570: \label{mfun}
571: 
572: In Fig.\ \ref{P-S} are plotted the halo mass functions of dark-matter and gas,
573: obtained from the counts in the master cell distributions.  The mass $m$ is
574: actually defined as the number of particles, for simplicity.  Both mass
575: functions follow the power law $N(m) \sim m^{-2}$ over a considerable range of
576: $m$: least-squares fits in the $\log_2 m$ range from 0 to 9 yield slopes
577: $-2.07$, for the dark matter, and $-2.12$, for the gas.
578: 
579: \begin{figure}
580: \centering{\includegraphics[width=7.5cm]{PS_DM.eps}}
581: \centering{\includegraphics[width=7.5cm]{PS_G.eps}}
582: \caption{Log-log plots of the number of halos $N$ versus
583: their mass $m$ (number of particles)
584: at coarse-graining scale $1024^{-1}$, in the case of dark matter 
585: (left) and gas (right).
586: }
587: \label{P-S}
588: \end{figure}
589: 
590: There are $156\hspace{1pt}272\hspace{1pt}463$ cells with one dark-matter
591: particle and $170\hspace{1pt}546\hspace{1pt}782$ cells with one gas particle
592: in the master cell distribution.  According to Eq.~(\ref{famax}), the fractal
593: dimensions of the sets with $\a_{{\rm max}}=3$ are $f(\a_{{\rm max}})=2.54$
594: and $2.56$, for the dark matter and gas, respectively.  The cell with the
595: largest proportion of dark matter has $20\hspace{1pt}658$ dark-matter
596: particles and it also has the largest proportion of gas, namely,
597: $19\hspace{1pt}200$ gas particles; all the particles together form the most
598: massive halo.  The corresponding values of $\a_{{\rm min}}$, according to
599: Eq.~(\ref{amin}), are $0.61$ and $0.63$, respectively. However,
600: Eq.~(\ref{famin}) yields negative values of $f(\a_{{\rm min}})$, which we do
601: not consider.  We compute directly from Eqs.~(\ref{aq}), (\ref{fa}) and
602: (\ref{ctauq}) that the values of $\a$ such that $f(\a) = 0$ are $0.91$ (dark
603: matter) and $0.95$ (gas).
604: 
605: We have seen in the preceding section that the value of $\a_1$ corresponding
606: to the master cell distribution can be estimated as the arithmetic mean of
607: $\a_{{\rm min}}$ and $\a_{{\rm max}}=3$.  Whether we use $\a_{{\rm min}}
608: \simeq 0.6$ or $\a_{{\rm min}} \simeq 0.9$, this estimation yields smaller
609: values than the actual values, which are $2.22$ (dark matter) and $2.29$
610: (gas).  On the other hand, the estimation $m_{{\rm max}} = \exp[N/N(1)]$,
611: deduced by making $M_1 =1$ in Eq.~(\ref{M1}), yields 967 and 542,
612: respectively, well below the real values (see Fig.\ \ref{P-S}).  The problem
613: is that the power law is modified at the large mass end, as we can perceive in
614: Fig.\ \ref{P-S}. On the one hand, at the large mass end, the values of $N(m)$
615: are so small that there are many values of $m$ for each value of $N$; on the
616: other hand, $N$ as a function of the average of the corresponding values of
617: $m$ decays faster than a power law.  In fact, the above estimated values of
618: $m_{{\rm max}}$ actually mark the ends of the power laws, instead of the ends
619: of the large masses.
620: 
621: We can improve the fit of the mass function by modelling the large mass end of
622: the power law. For this, we can take inspiration from the Press-Schechter mass
623: function,
624: \begin{equation}
625: N(m) \propto \left(\frac{m}{m_*}\right)^{n/6 - 3/2}
626: \exp\left[-\left(\frac{m}{m_*}\right)^{n/3 + 1}\right],
627: \label{P-S_MF}
628: \end{equation}
629: where $n > -3$ is the spectral index of the initial power spectrum and $m_*$
630: stands for the large-mass cutoff.  In fact, 
631: %agreement with the power-law part of the spectrum demands 
632: the agreement between the power-law parts of Eq.~(\ref{P-S_MF}) and of the
633: found mass function demands  
634: $n \ra -3$. Therefore, we take
635: \begin{equation}
636: N(m) \approx N(1)\, m^{-2}
637: \exp\left[-\left(\frac{m}{m_*}\right)^{\!\e}\,\right],
638: \label{Nm}
639: \end{equation}
640: where $\e >0$ is to be fitted, as well as $m_*$.  The latter can be deduced
641: from the condition $M_1 = 1$; namely,
642: \begin{eqnarray*}
643: M_1 &\approx& \frac{N(1)}{N} 
644: \sum_{m=1}^{\infty} \frac{\exp[-(m/m_*)^\e]}{m} \\
645: &\approx&
646: -\frac{N(1)}{N} 
647: \int_{m=1}^{\infty} dm\,\frac{\exp[-(m/m_*)^\e]}{m} \approx 
648: \frac{N(1)}{N} \,\ln m_* \,.
649: \end{eqnarray*}
650: It is independent of $\e$, and coincides with the value given by
651: Eq.~(\ref{M1}) if we identify $m_{{\rm max}}$ there with $m_*$. This
652: identification is natural, because the exponential form (\ref{Nm}) is just one
653: way of introducing a mass cutoff that is more adequate than the sharp cutoff
654: used in Eq.~(\ref{M1}).  We can see why the above quoted values of $m_{{\rm
655: max}}$, below 1000, actually mark the end of the power laws. The new values of
656: $m_{{\rm max}}$ are obtained from expression (\ref{Nm}) by requiring
657: $N(m_{{\rm max}})=1$. Thus, this model raises the estimations of $m_{{\rm
658: max}}$, but the new values depend on $\e$.  For $\e =1$, $m_{{\rm max}}$ is
659: equal to 2849 (dark matter) or 2022 (gas). Naturally, better estimations are
660: obtained by taking smaller $\e$. In fact, the Press-Schechter mass function
661: must be substituted by a lognormal mass function \cite{I4}, in which the power
662: $(m/m_*)^\e$ becomes $[\ln(m/m_*)]^2$.
663: 
664: \subsection{Multifractal spectra and cosmic web structure}
665: \label{MFsp}
666: 
667: The coarse multifractal spectrum is easily computed from the counts in cells,
668: through Eq.~(\ref{Mq}) and Eqs.~(\ref{aq}), (\ref{fa}) and (\ref{ctauq}).  We
669: plot in Fig.\ \ref{MFspec} the multifractal spectra of the dark-matter and gas
670: distributions at scales from $l=2^{-12}$ up to $l=2^{-7}$. We stop at this
671: scale because we already have the full spectrum, and on larger scales it
672: begins to show signs of a transition to homogeneity.  For comparison, we also
673: plot the spectra corresponding to the distributions at $l=2^{-3}$, which are
674: homogeneous [we have computed them using Eq.~(\ref{ctauq}) with $V_0 =1$].
675: 
676: The six multifractal spectra at successive scales coincide closely in their
677: respective ranges, except near $\a_{{\rm max}}$, and the spectra corresponding
678: to the dark matter are almost identical to the ones corresponding to the gas
679: (Fig.\ \ref{MFspec}).  In addition, they all are similar to the multifractal
680: spectra of the GIF2 simulation obtained in Ref.~\cite{I4}, although they span
681: a slightly larger range of local dimensions.  By increasing the reference
682: scale $V_0$ in Eq.~(\ref{ctauq}), we observe that the span of $\a$ at a given
683: scale shrinks, and thus we deduce that the slightly smaller spans in the GIF2
684: simulation are due to having set there no homogeneity scale ($V_0 =1$).  The
685: universal multifractal spectrum of cosmological distributions that all these
686: results suggest is typical of statistically self-similar multifractals.
687: 
688: The dimension of the mass concentrate in the spectra of Fig.\ \ref{MFspec}
689: slightly rises as the coarse-graining length grows; taking all the spectra
690: into account, we estimate $\a_1 \simeq 2.4$. This value agrees with the value
691: obtained from the GIF2 simulation. It is a remarkably high value, which makes
692: the mass concentrate relatively homogeneous.  It is interesting to consider
693: the meaning of this high dimension for a cosmic web structure. This type of
694: structure presumably possesses singularities of the three possible kinds,
695: namely, singular points, curves and surfaces, called nodes, filaments and
696: sheets, respectively.  At first sight, the high value of $D_1=\a_1$ may
697: suggest that the mass concentrates in the highest dimensional structures,
698: namely, sheets (Zel'dovich's ``pancakes''). However, self-similar
699: distributions of filaments or even of nodes can also reach fractal dimensions
700: higher than two.  Therefore, detailed morphological studies are necessary to
701: decide the relative weight of sheets, filaments and nodes in the cosmic web.
702: 
703: Morphological studies of multifractal distributions are by no means easy. In
704: fact, fractal dimensions do not reveal whether a distribution consists of
705: points, curves or surfaces. This information is given by the {\em topological
706: dimension}, whereas the fractal dimension informs about the clustering of
707: objects of given topological dimension.%
708: \footnote{The topological dimension is a topological invariant, unlike the
709: Hausdorff-Besicovitch (fractal) dimension. The topological dimension can be
710: defined in several equivalent ways and is always an integer: it is zero for
711: a point, one for a curve, two for a surface, etc.
712: The Hausdorff-Besicovitch dimension is bounded below by the topological
713: dimension. Actually, Mandelbrot \cite{Mandel} defines a fractal as a
714: set with Hausdorff-Besicovitch dimension strictly higher than its topological
715: dimension. Therefore, the degree of fractal clustering is measured by the
716: difference between both dimensions.}
717: % 
718: Unfortunately, topological dimensions are very difficult to estimate from
719: finite samples of singular distributions.  One method of studying the topology
720: of a cosmic web finite sample has been devised by Sheth et al \cite{S4}. Their
721: method is based on a surface modelling algorithm (``SurfGen'').  Other methods
722: are described by van de Weygaert \& Schaap \cite{vW-Sch}, e.g., the method
723: based on the Delaunay tessellation field estimator.  Many morphological
724: studies of the cosmic web have focused on its voids, for the boundaries of
725: voids define the matter sheets (or vice versa); but there is no unique
726: definition of voids in finite samples.  Cosmic foams with self-similar
727: distributions of voids have relatively simple structures, with well defined
728: distributions of sheets, filaments and nodes.  Besides, the scaling of voids
729: is easily demonstrated in finite samples of these distributions.  However, the
730: cosmic web seems to be better described as a non-lacunar multifractal with
731: much more complex geometry \cite{I5}.%
732: \footnote{Note that this statement strictly applies to the full matter
733: distribution, whereas the cosmic web of galaxies could have a low lacunarity,
734: as discussed in Ref.~\cite{I5}.}
735: 
736: The dimension of the multifractal mass concentrate $\a_1 \simeq 2.4$ that we
737: find differs from standard determinations of the fractal dimension of the
738: galaxy distribution, which yield values close to two but usually smaller
739: \cite{Jones-RMP,Sylos-Pietro}.  However, this dimension is determined from the
740: two-point correlation function and, therefore, it corresponds to the
741: correlation dimension $\tau(2)=D_2$, which must be smaller than $\a_1=D_1$ (in
742: a multifractal). We determine $D_2$ in Sect.~\ref{frac_dist}.
743: 
744: \begin{figure}
745: \centering{\includegraphics[width=7.5cm]{mf-spec_dm.eps}}
746: \centering{\includegraphics[width=7.5cm]{mf-spec_gas.eps}}
747: \caption{ Multifractal spectra at scales 
748: $l=2^{-12},2^{-11}, \ldots, 2^{-7}$, plotted with
749: solid lines in succesively lighter tones of grey.
750: The light dashed lines are the spectra of the homogeneous distributions at
751: $l=2^{-3}$.}  
752: \label{MFspec}
753: \end{figure}
754: 
755: Another interesting dimension is $D_0$, the box-counting dimension of the
756: distribution's support.  Since it coincides with the maximum of $f(\a)$, Fig.\
757: \ref{MFspec} shows that a reliable value of $D_0$ can only be obtained from
758: the scale $l=2^{-8}$ upwards. This value is 3, confirming the conclusion that
759: the cosmic web is a non-lacunar multifractal \cite{I5}.  Note that having
760: $D_0=3$ implies that 
761: %the increasing numbers of empty cells that appear for $l \leq 2^{-8}$ are 
762: %actually empty because they are under-sampled.  
763: the empty cells that appear in increasing numbers for $l \leq 2^{-8}$ are
764: actually empty because they belong to under-sampled zones.
765: Moreover, the high-$\a$ ends of 
766: %the spectra at scales $l \leq 2^{-8}$ are given by scarcely occupied cells
767: %and, naturally, deviate from the true spectrum (best represented at
768: %$l=2^{-7}$). In particular, the maximum of $f(\a)$ is depressed if 
769: %$l \leq 2^{-9}$, creating
770: the spectra at scales $l < 2^{-8}$ are given by scarcely occupied cells and,
771: naturally, deviate from the true spectrum (best represented at $l=2^{-7}$); in
772: particular, the maximum of $f(\a)$ is depressed, creating
773: the false impression of lacunarity. In fact, it
774: is necessary to suppress scarcely occupied cells to fully demonstrate scale
775: invariance, as we show next.
776: 
777: \subsection{Scaling of second order moments and correlation dimensions}
778: \label{frac_dist}
779: 
780: The superposition of the coarse spectra at $l=2^{-12}, \ldots, 2^{-7}$ in
781: their respective $\a$ ranges that is shown in Fig.~\ref{MFspec} constitutes a
782: proof of multifractality. However, the standard proof of scale invariance for
783: multifractals is based on the definition of $\tau$-exponents in
784: Eq.~(\ref{tauq}): scale invariance demands the scaling of $M_q$ in a range of
785: cell sizes that is sufficient to calculate a meaningful $\tau(q)$ and, hence,
786: $D_q$.  The exponent $\tau(q)$ is normally calculated by fitting the slope of
787: the plot of $\log M_q$ versus $\log l$.  We now follow this procedure.
788: 
789: First of all, we need to select the values of $q$ for which we calculate $M_q$
790: and also select the appropriate range of cell sizes. The available range of
791: $q$ is bound above by the condition that $f(\a) \geq 0$ (non-negative fractal
792: dimensions). This bound can be perceived in Fig.\ \ref{MFspec}, for the slopes
793: of the spectra do not become vertical at their left-hand ends, that is to say,
794: the respective values of $q=f'(\a)$ are bounded above.  The bound depends
795: somewhat on the particular spectrum and, in fact, becomes smaller as $l$
796: grows.  An examination of the numerical values of $q$ for the spectra plotted
797: in Fig.\ \ref{MFspec} reveals that the largest integer value of $q$ that is
798: common to all the spectra is $q=2$.  Note that the values of $f'(\a)$ differ
799: much more than the respective values of $f(\a)$.
800: 
801: The possible values of $q$ must also be bounded below: although most spectra
802: in Fig.\ \ref{MFspec} can be nominally extended to $q=f'(\a) \ra -\infty$,
803: this extension is inside their unreliable high-$\a$ ends. For example, it is
804: obvious from Fig.\ \ref{MFspec} that the spectrum at $l=2^{-12}$ (for either
805: dark matter or gas) does not represent well the mass concentrate,
806: corresponding to the point of contact with the diagonal.  Therefore, that
807: spectrum is not valid even down to $q=1$.  Consequently, when we combine the
808: upper and lower bounds, the only integer value allowed is $q=2$, so we must
809: restrict ourselves to examining the scaling of $M_2$ and calculating the
810: correlation dimension $D_2 = \tau(2)$.  Notice that this dimension is
811: of special interest, since it is the one that is usually measured in galaxy
812: surveys.
813: 
814: As regards the range of cell sizes in which to look for scaling, the natural
815: range lies between the homogeneity scale $l=2^{-4}$ and the discreteness scale
816: $l=2^{-10}$. However, we have seen above that the effects of under-sampling can
817: already be perceived at $l=2^{-8}$ and become more evident at $l=2^{-9}$.  On
818: the other hand, the cells that are well populated on scales $l \leq 2^{-8}$
819: are surely not affected by under-sampling, as proved by the superposition of
820: the spectra along their left-hand sides.  A sensible way to avoid the effects
821: of under-sampling in the computation of $M_2$ is to suppress for each $l$ the
822: scarcely occupied cells that contribute to the deviant piece of the
823: corresponding multifractal spectrum.  Thus, we set a lower cell-mass cutoff $m
824: = m_0 (l/l_0)^\a$, where $l_0=2^{-4}$, $m_0= N l_0 ^3 = 2^{18}$ and, for each
825: $l$, we choose the value of $\a$ that marks the beginning of the deviant
826: spectrum.  To be definite, we assume that the deviant pieces of the spectra
827: begin at their respective maxima (see Fig.\ \ref{MFspec}).  Thus, we can
828: proceed to $l<2^{-10}$, but we stop at $l = 2^{-12}$ because lower scales
829: present several problems: (i) the spectrum hardly represents the mass
830: concentrate; (ii) the value of $M_2$ becomes very sensitive to the precise
831: value of the $m$-cutoff; (iii) the gas distribution begins to noticeably
832: depart from the dark-matter distribution.
833: 
834: \begin{figure}
835: \centering{\includegraphics[width=7.5cm]{corr-dim_dm.eps}}
836: \centering{\includegraphics[width=7.5cm]{corr-dim_gas.eps}}
837: \caption{Log-log plots for the correlation dimension $D_2$ of
838: the distributions of dark-matter (left) and gas (right), 
839: showing the fractal scaling range and the transition to homogeneity.}
840: \label{D2-range}
841: \end{figure}
842: 
843: Therefore, we compute $M_2$ from the scale $l = 2^{-12}$ upwards, and actually
844: we do not stop at $l=2^{-4}$ but at $l=2^{-2}$, to study the full transition
845: to homogeneity.  The log-log plots of $M_2$ versus scale $l$ are displayed in
846: Fig.~\ref{D2-range}. The dashed straight lines correspond to the least-squares
847: fits. The two fits in the fractal ranges between $l = 2^{-12}$ and $l =
848: 2^{-6}$ yield the following dimensions: (i) $D_2=1.255 \pm 0.012$ for the
849: dark-matter; (ii) $D_2=1.30 \pm 0.02$ for the gas.  The two fits in the
850: homogeneity ranges yield values of $D_2$ very close to three, of course.
851: 
852: In each plot, the scale at which the straight line of the fractal fit meets
853: the straight line of the homogeneity fit is a measure of the scale of
854: transition to homogeneity $l_0$. Thus, we deduce that $l_0 = 2^{-5}$,
855: approximately, for both the dark matter and the gas.  Notice that this measure
856: of the scale of homogeneity yields a smaller value than the one that we have
857: been using, $l_0 = 2^{-4}$. In fact, the transition to homogeneity is not very
858: sharp but takes place between $l = 2^{-6}$ and $l = 2^{-4}$, as
859: Fig.~\ref{D2-range} shows.
860: 
861: The value $D_2=1.30\pm 0.02$ for the gas is definitely smaller than the galaxy
862: correlation dimension $D_2= 2.0\pm 0.1$ obtained by Sylos Labini and
863: Pietronero \cite{Sylos-Pietro} but agrees with conventional values of $D_2$
864: \cite{Jones-RMP,Sylos-Pietro}. Sylos Labini and Pietronero's value stems from
865: their criticism of the treatment of finite size effects and, in particular,
866: from questioning the classical value of the scale of homogeneity $r_0 \simeq 5
867: \, h^{-1}\!$ Mpc. Indeed, they extend the scaling range of the correlation
868: function up to $30 \, h^{-1}\!$ Mpc and, consequently, $D_2$ grows. Other
869: authors also find $D_2 \simeq 2$, especially when they use long scale ranges
870: to compute it (see Table I in Ref.~\cite{Jones-RMP}). In our case, the end of
871: the scaling range at $l = 2^{-6}$ is quite clear, although we define as
872: homogeneity scale $l_0 = 2^{-5}$ (a fit up to this scale would hardly raise
873: $D_2$, anyway). The scale $l = 2^{-6}$ is $7.8 \, h^{-1}\!$ Mpc in physical
874: units.
875: 
876: To summarize the results of Sect.~\ref{MF}, the tests for scale invariance can
877: be considered successful, given the limitations imposed by the data.  Of
878: course, the scaling range necessary to affirm scale invariance is a matter of
879: opinion. A factor of $2^{-6}=64$ is reasonably good.  In addition to the
880: extent of a scaling range, one must also consider the quality of the
881: corresponding least-squares fit, namely, its standard error.  In this regard,
882: the fits for the dark-matter and gas distributions are both remarkably good.
883: We refrain from affirming that we have proved that these distributions are
884: (samples of) statistically self-similar multifractals, but we assert that
885: there is strong evidence of it.  Furthermore, there is good evidence that the
886: dark-matter and gas distributions are indistinguishable multifractals.  One
887: could object that the confidence intervals for the $D_2$ do not overlap and
888: that there are minute differences between the respective plots in Figs.\
889: \ref{P-S} and \ref{MFspec}.  To assess the statistical significance of the
890: numerical differences between the distributions of dark-matter or gas
891: particles, we carry out next a detailed study.
892: 
893: \section{Relation between the gas and dark-matter distributions}
894: \label{bias}
895: 
896: We see that the multifractal properties of the dark-matter and gas
897: distributions are very similar (along a considerable range of scales), which
898: suggests that the distributions could actually be identical.  In general, one
899: may ask if two finite samples of continuous distributions can come from the
900: same continuous distribution.  In particular, it is possible that the
901: differences between the distributions of gas and dark matter particles are
902: only due to statistical sample variance, while the continuous gas distribution
903: is unbiased with respect to the total mass distribution (dominated by the dark
904: matter).  We know that the gas dynamics is different from the collisionless
905: dark-matter dynamics, with the likely result of bias, but we need to ascertain
906: the existence of bias from the actual particle distributions by means of
907: statistical tests.
908: 
909: The first test that comes to one's mind is based on the cross-correlation
910: function of gas and dark-matter particles, in particular, the
911: cross-correlation coefficient, useful to measure the similarity of two
912: distributions. Indeed, this test confirms that both distributions are very
913: similar, as we show in sub-section \ref{cross}.  However, this test cannot
914: {\em prove} that the samples actually come from the same continuous
915: distribution. In fact, it is easy to see that there is no way to prove it and
916: we must satisfy ourselves with obtaining a probability of its being
917: true. Rather, assuming a Bayesian point of view, we can quantify the ``degree
918: of belief'' in the hypothesis that there is a common continuous distribution
919: (sub-section \ref{Bayes-sect}). The application of this method in
920: sub-sect.~\ref{appl_Bayes} allows us to confidently conclude that the gas
921: distribution is biased on nonlinear scales. Then, we study the nature of that
922: bias in sub-section \ref{G-DM_entropy}.
923: 
924: To compare the two distributions at several scales, we use counts in cells,
925: like in the multifractal analysis.  Thus, we assume that two independent
926: continuous distributions define the probabilities of the respective counts in
927: cells of given size.  In other words, we assume that the dark-matter and gas
928: distributions are both samples of respective multinomial distributions, each
929: one given by a set of probabilities defined in the cells.  The
930: cross-correlation can be easily expressed in terms of counts in cells.  The
931: Bayesian method seeks the probability (degree of belief) that two multinomial
932: samples come from the same multinomial distribution \ref{Bayes-sect}.
933: 
934: \subsection{Cross-correlations}
935: \label{cross}
936: 
937: Given a mass distribution coarse-grained with volume scale
938: $V$, its auto-correlation is measured by the second order cumulant
939: $${\bar\xi}_2 = \frac{1}{V^2} \int_V d^3x_1\, d^3x_2\, \xi_2(x_1,x_2),$$ 
940: % 
941: where ${\xi}_2$ is the two-point correlation function of the fine grain
942: distribution.  In the nonlinear regime,
943: $${\bar\xi}_2 = \frac{\langle\r^2\rangle}{\langle\r\rangle^2}-1 \approx
944: \frac{\langle\r^2\rangle}{\langle\r\rangle^2} = \frac{\mu_2}{\mu_1^2} \gg 1.$$
945: We can define the {\em cross-correlation} coefficient of gas (g) and
946: dark-matter (m) at scale $V$ as
947: \begin{eqnarray*}
948: {c}_{\rm gm} &=& \frac{{\bar\xi}_{\rm gm}} {\left(\bar\xi_{\rm 2g}
949: \,\bar\xi_{\rm 2m}\right)^{1/2}} = \frac{\langle\r_{\rm g}\,\r_{\rm m}\rangle}
950: {\left(\langle\r_{\rm g}^2\rangle \,\langle\r_{\rm m}^2\rangle \right)^{1/2}}
951: = \frac{\sum_{i=1}^M n_{{\rm g}\,i}\, n_{{\rm m}\,i}} {\left(\sum_{i=1}^M
952: n_{{\rm g}\,i}^2\right)^{1/2}\left(\sum_{i=1}^M n_{{\rm
953: m}\,i}^2\right)^{1/2}}\,,
954: \end{eqnarray*}
955: where the last expression refers to counts in volume-$V$ cells and $M$ denotes
956: the number of these cells. The cross-correlation coefficient can be viewed as
957: the cosine of the angle formed by the two $M$-dimensional vectors $\{n_{{\rm
958:     g}\,i}\}$ and $\{n_{{\rm m}\,i}\}$.
959: 
960: Given the cell counts $\{n_{{\rm g}\,i}\}$ and $\{n_{{\rm m}\,i}\}$, we can
961: compute ${c}_{\rm gm}$ at once, but we follow instead a more elaborate
962: procedure to discern the influence of the cell masses.  We first rank the
963: cells in order of decreasing {\em physical} mass, for physical mass determines
964: the importance of cells in regard to gravity.  Then, we compute the
965: cross-correlation coefficient of the ordered cells up to successive rank
966: values.  In Fig.~\ref{correl}, we plot the cross-correlation coefficient of
967: the gas and dark-matter in massive halos (taken from the master cell
968: distribution), computed in that way.  This coefficient is stably above 0.99,
969: that is to say, the correlation between both distributions is very strong.
970: Moreover, the cross-correlation coefficient increases with the coarse-graining
971: scale $l$. For example, it reaches 0.9999 at $l=2^{-5}$.  However, we have no
972: way of knowing how strong the correlation must be for allowing us to affirm
973: that both samples come from the same distribution.
974: 
975: \begin{figure}
976: \centering{\includegraphics[width=8cm]{correlation.eps}}
977: \caption{Cross-correlation coefficient of gas and dark matter in massive
978: halos, as a function of the number of halos, ranked in order of decreasing
979: mass.}
980: \label{correl}
981: \end{figure}
982: 
983: \subsection{Bayesian comparison of multinomial distributions}
984: \label{Bayes-sect}
985:  
986: Bayes' theory of probability interprets the concept of probability as a
987: measure of a state of knowledge. Bayes' theorem tells us how to adjust
988: probabilities in regard to new evidence. It writes
989: $$P(H|E) = \frac{P(E|H)\,P(H)}{P(E)}\,,$$ where $H$ is a hypothesis with {\em
990: prior} probability $P(H)$, $E$ is an event that provides new evidence for $H$,
991: and $P(E|H)$ is the conditional probability of having $E$ if the hypothesis
992: $H$ happens to be true. $P(E)$ is the a priori probability of observing the
993: event $E$ under all possible hypotheses.  $P(H|E)$ adjusts $P(H)$ and is
994: called the posterior probability of $H$ given $E$.  Bayesian analysis is
995: routinely employed for model selection in many scientific areas.
996: 
997: In Bayes' theorem, the hypothesis $H$ can belong to a continuum of
998: possibilities. For example, if we are given the results of $N$ trials of a
999: binomial experiment, we can analyse the information gained from them on the
1000: probability $p$ of ``success'' (``success'' is defined arbitrarily as one of
1001: the two possible outcomes).  This probability is a number $0 < p < 1$ (while
1002: the probability of ``failure'' is $1-p$). For a given value of $p$, the
1003: probability of $n$ successes in $N$ trials is given by the binomial
1004: distribution
1005: $$P(N,n|p) = \binom{N}{n}\, p^n (1-p)^{N-n}.$$
1006: Since $N$ and $n$ are given and $p$ is unknown, we can apply 
1007: Bayes' theorem in the form
1008: \begin{eqnarray*}
1009: P(p|N,n) &=& \frac{P(N,n|p)\,P(p)}{\int_0^1 P(N,n|p)\,P(p)\,dp} 
1010: = 
1011: \frac{p^n (1-p)^{N-n}\,P(p)} {\int_0^1 p^n (1-p)^{N-n}\,P(p)\,dp}\,.
1012: \end{eqnarray*}
1013: It yields the probability of $p$ given the data in terms of the prior
1014: probability of $p$. If no prior information about $p$ is available, we must
1015: assume that $P(p)=1$ (according to the principle of insufficient reason).
1016: Then, the posterior probability $P(p|N,n)$ is the {\em beta distribution} with
1017: parameters $n+1$ and $N-n+1$. It is trivial to check that it reaches its
1018: maximum at $p=n/N$ (mode value) and that its variance is proportional to $1/N$
1019: (for fixed $n/N$ and large $N$).
1020: 
1021: This example is, in fact, relevant to our problem, namely, to estimating the
1022: probability that the given gas and dark-matter samples belong to the same
1023: distribution. If we choose one cell, with $n_{{\rm m}}$ dark-matter particles,
1024: say, the probability that the mass fraction in that cell is $p_{{\rm m}}$ is
1025: given by the beta distribution with parameters $n_{{\rm m}}+1$ and $N_{{\rm
1026: m}}-n_{{\rm m}}+1$ ($N_{{\rm m}}$ being the total number of dark-matter
1027: particles in the sample). Analogously, the probability of a gas mass fraction
1028: $p_{{\rm g}}$ in that cell is given by the beta distribution with parameters
1029: $n_{{\rm g}}+1$ and $N_{{\rm g}}-n_{{\rm g}}+1$. We can obtain the probability
1030: of the difference $p_{{\rm m}}-p_{{\rm g}}$ by taking the product $P(p_{{\rm
1031: m}}|n_{{\rm m}},N_{{\rm m}}) P(p_{{\rm g}}|n_{{\rm g}},N_{{\rm g}})$,
1032: performing the change of the variables $p_{{\rm m}}$ and $p_{{\rm g}}$ to
1033: $p_{{\rm m}}-p_{{\rm g}}$ and $(p_{{\rm m}}+p_{{\rm g}})/2$, and integrating
1034: over the second variable (within the appropriate limits).  However, the
1035: difference $p_{{\rm m}}-p_{{\rm g}}$ is a continuous variable and its
1036: probability is a probability {\em density}; therefore, the probability that
1037: $p_{{\rm m}}=p_{{\rm g}}$ vanishes. Nevertheless, we expect to get some
1038: information from the value of the probability density at $p_{{\rm m}}=p_{{\rm
1039: g}}$. Thus, we calculate
1040: \begin{eqnarray}
1041: \lefteqn{
1042: \int_0^1 P(p|n_{{\rm m}},N_{{\rm m}})\, P(p|n_{{\rm g}},N_{{\rm g}})\, dp =}
1043: \nonumber\\
1044: & & \phantom{aaaaaaaa}
1045: \frac{B(n_{{\rm m}}+n_{{\rm g}}+1, N_{{\rm m}}-n_{{\rm m}}+N_{{\rm g}}-
1046: n_{{\rm g}}+1)}{B(n_{{\rm m}}+1, N_{{\rm m}}-n_{{\rm m}}+1)\,B(n_{{\rm g}}+1, 
1047: N_{{\rm g}}-n_{{\rm g}}+1)}\,,
1048: \label{b}
1049: \end{eqnarray}
1050: where ${B(x,y)=\G(x)\,\G(y)/\G(x+y)}$ is the Euler beta function.  The value
1051: of the integral is enhanced when the maxima of $P(p|n_{{\rm m}},N_{{\rm m}})$
1052: and $P(p|n_{{\rm g}},N_{{\rm g}})$ coincide, namely, when $n_{{\rm m}}/N_{{\rm
1053: m}} = n_{{\rm g}}/N_{{\rm g}}$. For fixed $N_{{\rm m}} = N_{{\rm g}}$, the
1054: function on the right-hand side of Eq.~(\ref{b}) is a symmetric function of
1055: $\{n_{{\rm m}},n_{{\rm g}}\}$. Therefore, for a fixed value of $n_{{\rm
1056: m}}+n_{{\rm g}}$, it is just a symmetric function of the difference $n_{{\rm
1057: m}}-n_{{\rm g}}$ and has its maximum when $n_{{\rm m}}=n_{{\rm g}}$.
1058: 
1059: One could criticize the preceding approach for only focusing on the value of
1060: the probability density at $p_{{\rm m}}=p_{{\rm g}}$, while values of $p_{{\rm
1061: m}}-p_{{\rm g}}$ close to zero might also be relevant.  We can avoid the
1062: problem of having to deal with a continuous probability by singling out the
1063: value $p_{{\rm m}}=p_{{\rm g}}$ from the outset. Thus, we formulate a Bayesian
1064: analysis with this hypothesis and the event $E=\{n_{{\rm m}},N_{{\rm
1065: m}},n_{{\rm g}},N_{{\rm g}}\}$:
1066: \begin{eqnarray*}
1067: P(p_{{\rm m}}=p_{{\rm g}} | E)
1068: = 
1069: \frac{P(p_{{\rm m}}=p_{{\rm g}})\,P(E | p_{{\rm m}}=p_{{\rm g}})}
1070: {P(p_{{\rm m}}=p_{{\rm g}})\,P(E | p_{{\rm m}}=p_{{\rm g}}) +
1071: P(p_{{\rm m}} \neq p_{{\rm g}})\,P(E | p_{{\rm m}} \neq p_{{\rm g}})}\,.
1072: \end{eqnarray*}
1073: Here, $P(E | p_{{\rm m}} \neq p_{{\rm g}})$ is just the probability of $E$
1074: given any values of $p_{{\rm m}}$ and $p_{{\rm g}}$, because the event
1075: $p_{{\rm m}}=p_{{\rm g}}$ has probability zero; namely,
1076: \begin{eqnarray*}
1077: P(E | p_{{\rm m}} \neq p_{{\rm g}}) = 
1078: \binom{N_{{\rm m}}}{n_{{\rm m}}} \binom{N_{{\rm g}}}{n_{{\rm g}}} 
1079: \int_0^1 dp_{{\rm m}} 
1080: \int_0^1 dp_{{\rm g}} \,
1081: p_{{\rm m}}^{n_{{\rm m}}} (1-p_{{\rm m}})^{N_{{\rm m}}-n_{{\rm m}}}\,
1082: p_{{\rm g}}^{n_{{\rm g}}} (1-p_{{\rm g}})^{N_{{\rm g}}-n_{{\rm g}}}.
1083: \end{eqnarray*}
1084: On the other hand,
1085: \begin{eqnarray*}
1086: P(E | p_{{\rm m}} = p_{{\rm g}}) &=&
1087: \binom{N_{{\rm m}}}{n_{{\rm m}}} \binom{N_{{\rm g}}}{n_{{\rm g}}} 
1088: \int_0^1 dp\,
1089: p^{n_{{\rm m}}+n_{{\rm g}}} (1-p)^{N_{{\rm m}}-n_{{\rm m}} + N_{{\rm
1090:       g}}-n_{{\rm g}}}.
1091: \end{eqnarray*}
1092: Computing the integrals and substituting, we obtain
1093: \begin{eqnarray*}
1094: P(p_{{\rm m}}=p_{{\rm g}} | E) = \frac{P(p_{{\rm m}}=p_{{\rm g}})\, b(n_{{\rm
1095: m}},N_{{\rm m}};n_{{\rm g}},N_{{\rm g}})} {P(p_{{\rm m}}=p_{{\rm g}})\,
1096: b(n_{{\rm m}},N_{{\rm m}};n_{{\rm g}},N_{{\rm g}}) + P(p_{{\rm m}} \neq
1097: p_{{\rm g}})\, }\,,
1098: \end{eqnarray*}
1099: where
1100: \begin{eqnarray}
1101: b(n_{{\rm m}},N_{{\rm m}};n_{{\rm g}},N_{{\rm g}}) &=& 
1102: \frac{P(E | p_{{\rm m}}=p_{{\rm g}})}{P(E | p_{{\rm m}} \neq p_{{\rm g}})} 
1103: \label{Bf1}\\  &=&
1104: \frac{B(n_{{\rm m}}+n_{{\rm g}}+1, N_{{\rm m}}-n_{{\rm m}}+N_{{\rm g}}-
1105: n_{{\rm g}}+1)}
1106: {B(n_{{\rm m}}+1, N_{{\rm m}}-n_{{\rm m}}+1)\,B(n_{{\rm g}}+1, 
1107: N_{{\rm g}}-n_{{\rm g}}+1)}\,.
1108: \label{Bf2}
1109: \end{eqnarray}
1110: This function of $\{n_{{\rm m}},N_{{\rm m}},n_{{\rm g}},N_{{\rm g}}\}$
1111: coincides with the value of the probability density of $p_{{\rm m}}-p_{{\rm
1112: g}}$ at 0 given by Eq.~(\ref{b}). Therefore, this approach is consistent with
1113: the preceding one: if $b(n_{{\rm m}},N_{{\rm m}};n_{{\rm g}},N_{{\rm g}})$ is
1114: large, then $P(p_{{\rm m}}=p_{{\rm g}} | E)$ tends to one, independently of
1115: the prior probability $P(p_{{\rm m}}=p_{{\rm g}})$. However, we have no way of
1116: estimating this prior probability.
1117: 
1118: The assignment of prior probabilities is a usual problem in Bayesian
1119: analyses, to the extent that Bayes' theory of probability has been deemed
1120: subjective. However, there is no subjectivity if we indeed understand
1121: Bayes' theory as a way of adjusting probabilities in regard to new evidence. 
1122: The {\em Bayes factor} defined in Eq.~(\ref{Bf1}) is such that
1123: \begin{eqnarray*}
1124: \log 
1125: \frac{P(p_{{\rm m}}=p_{{\rm g}} | E)}{P(p_{{\rm m}} \neq p_{{\rm g}} | E)} =
1126: \log b(n_{{\rm m}},N_{{\rm m}};n_{{\rm g}},N_{{\rm g}}) +
1127: \log
1128: \frac{P(p_{{\rm m}}=p_{{\rm g}})}{P(p_{{\rm m}} \neq p_{{\rm g}})}\,.
1129: \end{eqnarray*} 
1130: Hence, we can endow this equation with an information theory meaning: the
1131: prior information about the odds of our hypothesis is updated by the
1132: information $\log b$ provided by the event $E=\{n_{{\rm m}},N_{{\rm
1133:     m}},n_{{\rm g}},N_{{\rm g}}\}$.  The prior information is null if
1134: $P(p_{{\rm m}}=p_{{\rm g}})=P(p_{{\rm m}} \neq p_{{\rm g}})$, but the
1135: information provided by the event is independent of any prior probabilities.
1136: The information provided by $E$ is positive or negative according to whether
1137: the Bayes factor is larger or smaller than one.  The addition of informations
1138: is independent of the (common) base of the logarithms, but it is convenient to
1139: use base two and measure the information in bits. If the Bayes factor is
1140: larger than one half and smaller than two, the information provided by $E$ is
1141: smaller than one bit and can hardly be considered significant. For example,
1142: with $N_{{\rm m}} = N_{{\rm g}}=200$, $\log_2 b(100,200;100,200) = 3.00$ bits,
1143: $\log_2 b(100,200;80,200) = 0.11$ bits, and $\log_2 b(100,200;70,200) = -3.61$
1144: bits, and only the first case or the last case provide evidence for or against
1145: $p_{{\rm m}}=p_{{\rm g}}$, respectively.
1146: 
1147: Since we actually divide the sample into many cells, we need to generalize the
1148: above method of comparing binomial distributions to the case of multinomial
1149: distributions. This generalization is straightforward, except that we now have
1150: to take care of normalizing the $P(E | \cdot)$ such that $\sum_E P(E | \cdot)
1151: =1$.  The resulting Bayes factor is
1152: \begin{eqnarray*}
1153: \lefteqn{ b(n_{{\rm m}\,1}, \ldots, n_{{\rm m}\,k};n_{{\rm g}\,1}, \ldots, 
1154: n_{{\rm g}\,k}) =}\\
1155: & & \phantom{aaaaaaaa}
1156: \frac{B(n_{{\rm m}\,1}+n_{{\rm g}\,1}+1, \ldots , n_{{\rm m}\,k} +
1157: n_{{\rm g}\,k}+1)}{B(n_{{\rm m}\,1}+1, \ldots, n_{{\rm m}\,k}+1)\,B(n_{{\rm
1158: g}\,1}+1, \ldots, n_{{\rm g}\,k}+1)\,(k-1)!}\,,
1159: \end{eqnarray*}
1160: where $\{n_{{\rm m}\,i}\}_{i=1}^k$ and $\{n_{{\rm g}\,i}\}_{i=1}^k$ are the
1161: vectors denoting the numbers of dark-matter and gas particles, respectively,
1162: in the $k$ cells, and $B(x_{1}, \ldots, x_{k}) = \G(x_{1}) \cdots
1163: \G(x_{k})/\G(x_{1} + \cdots + x_{k})$ is the generalized Euler beta function.
1164: We can write this Bayes factor as follows:
1165: \begin{eqnarray}
1166: \lefteqn{ b(n_{{\rm m}\,1}, \ldots, n_{{\rm m}\,k};n_{{\rm g}\,1}, \ldots, 
1167: n_{{\rm g}\,k}) = }\nonumber\\ 
1168: & & \phantom{aaaaaaaaaaa}
1169: \binom{n_{{\rm m}\,1}+n_{{\rm g}\,1}}{n_{{\rm m}\,1}}
1170: \cdots
1171: \binom{n_{{\rm m}\,k}+n_{{\rm g}\,k}}{n_{{\rm m}\,k}} 
1172: \frac{(N_{{\rm m}}+k-1)!\,(N_{{\rm g}}+k-1)!}{(N_{{\rm m}}+N_{{\rm
1173:       g}}+k-1)!\,(k-1)!}\,, 
1174: \label{entBf}
1175: \end{eqnarray}
1176: where $N_{{\rm m}} = n_{{\rm m}\,1}+ \cdots + n_{{\rm m}\,k}$ and $N_{{\rm g}}
1177: = n_{{\rm g}\,1}+ \cdots + n_{{\rm g}\,k}$ are the total numbers of
1178: dark-matter and gas particles, respectively (which are equal, in our case).
1179: The latter form has the advantage of being the product of $k$ binomial
1180: numbers, one per cell, times an overall factor. Each
1181: binomial number expresses the number of ways of dividing the total number of
1182: particles in the corresponding cell between the respective numbers of 
1183: gas and dark-matter particles.
1184: We can associate the (base-two) logarithm of that binomial number with a
1185: ``cell entropy''. This entropy is maximal when the numbers of dark-matter and
1186: gas particles in the cell are equal and vanishes when there are no particles
1187: of one type in the cell.
1188: 
1189: Let us take $N_{{\rm m}} = N_{{\rm g}} = N$.  To compute the Bayes factor, we
1190: follow an analogous procedure to the one employed to compute the
1191: cross-correlation coefficient ${c}_{\rm gm}$.  Since the above-described
1192: Bayesian analysis is valid for any multinomial distribution or, in other
1193: words, the cells are of logical rather than physical nature, we can group
1194: several physical cells into one.  In particular, we can group the less
1195: significant cells, namely, the ones with small numbers of particles. A
1196: systematic procedure for grouping the cells consists in ordering them by
1197: decreasing total number of particles and separating the most populated ones to
1198: take them first into account. Thus, we take the first rank cell and compare it
1199: against the remainder, using the binomial Bayes factor.  The evidence for or
1200: against $p_{{\rm m}}=p_{{\rm g}}$ cannot be considered definitive yet. Then,
1201: we proceed to calculate the Bayes information of the two more populated cells
1202: plus the ``cell'' with the remainder, and so onwards. If a definite trend is
1203: soon established, that is to say, if the absolute value of the Bayes
1204: information grows steadily, we consider it as a solid evidence for or against
1205: the hypothesis, according to the sign of $\log_2 b$.
1206: 
1207: \subsection{Bayesian analysis of the distributions at several scales}
1208: \label{appl_Bayes}
1209: 
1210: Here, we apply the above-explained procedure of systematic multinomial
1211: Bayesian analysis to some relevant cell distributions. We prefer to rank the
1212: cells again in order of decreasing {\em physical} mass, as in
1213: Sect.~\ref{cross}, rather than in order of decreasing total number of
1214: particles.
1215: 
1216: \begin{figure}
1217: \centering{\includegraphics[width=7.5cm]{Bayesian_10b.eps}
1218: \includegraphics[width=7.5cm]{Bayesian_5b.eps}}
1219: \caption{Bayesian evidence (in bits) for the equality of distributions
1220:   ($p_{{\rm m}}=p_{{\rm g}}$) derived from massive cells at $l=2^{-10}$
1221:   (halos) and at $l=2^{-5}$ (transition to homogeneity).}
1222: \label{Bayes}
1223: \end{figure}
1224: 
1225: We calculate the Bayes information $\log_2 b$ (in bits) for the hypothesis
1226: $p_{{\rm m}}=p_{{\rm g}}$, considering a growing number of the most massive
1227: cells.  The result is plotted in Fig.~\ref{Bayes}, for the two most relevant
1228: scales: $l=N^{-1/3}=2^{-10}$ (corresponding to the master cell distributions)
1229: and $l=2^{-5}$ (the scale of transition to homogeneity).  In the first case,
1230: we see that the 1000 most massive halos already show that the evidence against
1231: the hypothesis is overwhelming: note that $\log_2 b$ reaches $-130$ Kbits and
1232: keeps its downward tendency.  The evidence in the second case is mixed: it is
1233: increasingly negative up to the 500th rank, reaching $-3$ Kbits, but there it
1234: starts growing and becomes positive from the 1550th rank onwards (the 1550th
1235: cell contains $84\hspace{1pt}678$ dark matter particles and
1236: $83\hspace{1pt}885$ gas particles).  Considering that the total number of
1237: cells is $l^{-3}=2^{15}=32\hspace{1pt}768$ and that the corresponding total
1238: Bayes information is 185.4 Kbits, we could say that the evidence of the
1239: hypothesis $p_{{\rm m}}=p_{{\rm g}}$ is sufficient. However, the most massive
1240: cells clearly distinguish both distributions.
1241: 
1242: Proceeding to larger scales, namely, to $l=2^{-4}$, the above pattern
1243: holds. At $l=2^{-4}$, the Bayes information has some small fluctuations about
1244: zero in the first ranks, staying above $-97$ bits, and then it definitely
1245: grows, reaching a total of 28.8 Kbits. In this case, the evidence for $p_{{\rm
1246: m}}=p_{{\rm g}}$ is solid. Of course, the evidence for $p_{{\rm m}}=p_{{\rm
1247: g}}$ is stronger at larger $l$.
1248: 
1249: Regarding the origin of the difference between $p_{{\rm m}}$ and $p_{{\rm g}}$
1250: on small scales, let us focus on the master cell distributions.  An inspection
1251: of the dark-matter and gas particle counts in massive halos reveals that these
1252: consistently have fewer gas particles than dark-matter particles.  The smaller
1253: average number of gas particles is clearly observed in the respective log-log
1254: plots of counts in cells ranked by total physical mass, which are shown in
1255: Fig.~\ref{Z_DM-G}.  We observe in the figure that both distributions
1256: approximately follow linear log-log laws (sort of Zipf's laws), with common
1257: slope, but the line that corresponds to the dark-matter particles is
1258: definitely above. In other words, the massive halos concentrate less gas,
1259: although the number of gas particles decreases according to the same pattern
1260: that the number of dark-matter particles. One can also notice that there are
1261: more fluctuations in the number of gas particles, due to their smaller
1262: physical mass.
1263: 
1264: \begin{figure}
1265: \centering{\includegraphics[width=8cm]{halos.eps}}
1266: \caption{Particle counts of dark-matter (upper line) and gas (lower line) 
1267: in halos ranked in order of decreasing mass.}
1268: \label{Z_DM-G}
1269: \end{figure}
1270: 
1271: It is useful to express the differences between dark-matter and gas particle
1272: counts in terms of the cell entropies introduced in Sect.~\ref{Bayes-sect}
1273: after Eq.~(\ref{entBf}), as we do next.
1274: 
1275: \subsection{Entropic difference between the gas and dark-matter distributions}
1276: \label{G-DM_entropy}
1277: 
1278: In the expression (\ref{entBf}) of the Bayes factor, we can consider that the
1279: $k$ cells consist of a small number of massive cells $h = k-1$ (in order of
1280: decreasing mass) and a $k$-th ``cell'' containing the remaining particles.
1281: Furthermore, we assume that the $h$ massive cells contain together a total
1282: number of particles that is small in comparison with the total number of
1283: particles.  Recalling that $N_{{\rm m}} = N_{{\rm g}} = N= 2^{30} \gg 1$, we
1284: can make a suitable approximation of the Bayes information $\log_2 b$.
1285: Indeed, under the given conditions, the largest contributions to the Bayes
1286: information come from the cell with the remaining particles and from the
1287: overall factor in Eq.~(\ref{entBf}); namely,
1288: \begin{eqnarray*}
1289: \log_2 
1290: \binom{2N-\sum_{i=1}^h (n_{{\rm m}\,i}+n_{{\rm g}\,i})}%
1291: {N - \sum_{i=1}^h n_{{\rm g}\,i}} = 
1292: 2N - \sum_{i=1}^h (n_{{\rm m}\,i}+n_{{\rm g}\,i}) - \frac{\log_2(\pi N)}{2} +
1293: {\Mfunction{O}(N^{-1})}
1294: \end{eqnarray*}
1295: and
1296: \begin{eqnarray*}
1297: \log_2 
1298: \frac{(N+h)!\,^2}{(2N+h)!\, h!} = 
1299: -2N + h\,\log_2\frac{N}{2} + \frac{\log_2(\pi N)}{2} +
1300: {\Mfunction{O}(N^{-1})}- \log_2 h!\,,
1301: \end{eqnarray*}
1302: where we have used Stirling's approximation.  Note that both contributions
1303: have a first term proportional to $N$, but these large terms cancel one
1304: another. Therefore,
1305: \begin{eqnarray}
1306: \log_2 b &=& \sum_{i=1}^h \left[
1307: \log_2\binom{n_{{\rm m}\,i}+n_{{\rm g}\,i}}{n_{{\rm g}\,i}} - 
1308: (n_{{\rm m}\,i}+n_{{\rm g}\,i}) \right] + \nonumber\\
1309: &&
1310: h\,\log_2\frac{N}{2} - \log_2 h! + {\Mfunction{O}(N^{-1})}\,,
1311: \label{Bayes_info}
1312: \end{eqnarray}
1313: which only grows logarithmically with $N$.  This Bayes information is a sum of
1314: individual cell contributions plus a global contribution.  Each cell
1315: contribution is negative, because the cell entropy is bounded above by the
1316: number of particles in the cell, as is easily proved. If each massive cell
1317: contribution is larger in absolute value than $\log_2(N/2)=29$ bits, on
1318: average, the total information due to the $h$ massive cells plus the remainder
1319: is negative.
1320: 
1321: In particular, each massive halo contributes, on average, more than
1322: $\log_2(N/2)=29$ bits (in absolute value). This is the reason why the total
1323: Bayes information of a considerable number of massive halos is negative (as
1324: shown in Fig.~\ref{Bayes}).  For example, the contribution of the most massive
1325: halo, with $n_{{\rm g}} = 19\hspace{1pt}200$ and $n_{{\rm m}} =
1326: 20\hspace{1pt}658$, is $\log_2\binom{39\hspace{1pt}858}{19\hspace{1pt}200} -
1327: 39\hspace{1pt}858 = -38.5$ bits, larger in absolute value than 29 bits.
1328: 
1329: The contribution of a massive cell to the Bayes information can be expressed
1330: in a more familiar form by using again Stirling's approximation. When
1331: $n_{{\rm m}}, n_{{\rm g}} \gg 1$, the cell entropy can be written as
1332: \begin{eqnarray}
1333: \log_2\binom{n_{{\rm m}}+n_{{\rm g}}}{n_{{\rm g}}} 
1334: &\approx&  
1335: (n_{{\rm m}}+n_{{\rm g}}) \log_2(n_{{\rm m}}+n_{{\rm g}})
1336: - {n_{{\rm g}}} \log_2 {n_{{\rm g}}}
1337: - {n_{{\rm m}}} \log_2 {n_{{\rm m}}} \label{mix}\\
1338: &=& -(n_{{\rm m}}+n_{{\rm g}}) \left[{x_{{\rm g}}}
1339: \log_2 {x_{{\rm g}}} + (1-{x_{{\rm g}}}) \log_2 (1-{x_{{\rm g}}})
1340: \right], 
1341: \end{eqnarray}
1342: where we have introduced the fraction of gas particles
1343: $$
1344: {x_{{\rm g}}} = \frac{n_{{\rm g}}}{n_{{\rm m}}+n_{{\rm g}}}\,
1345: $$ 
1346: % 
1347: (the cell entropy has an analogous expression in terms of the fraction of
1348: dark-matter particles).  In those forms, the cell entropy can be identified
1349: with the familiar {\em entropy of mixing} \cite{Reif}.  Given $x_{{\rm g}}$,
1350: the cell entropy of mixing is proportional to the total number of particles in
1351: the cell; and so is the cell's contribution to the Bayes information, the
1352: proportionality constant being the entropy of mixing per particle minus one.
1353: The maximum entropy of mixing per particle is one bit and it corresponds to
1354: the most mixed distribution, with $x_{{\rm g}} = 1/2$.  Naturally, a fully
1355: mixed cell makes a vanishing contribution to the Bayes information.
1356: 
1357: Regarding the master cell distributions, we observe in Fig.~\ref{Z_DM-G} that
1358: the ratio $n_{{\rm g}\,i}/n_{{\rm m}\,i}$ for massive halos is almost constant
1359: on average; in fact, $n_{{\rm g}\,i}/n_{{\rm m}\,i} \simeq 0.81$. Hence,
1360: $x_{{\rm g}\,i} \simeq 0.45$, and the entropy of mixing per particle is almost
1361: constant and equal to
1362: $$-{x_{{\rm g}}} \log_2 {x_{{\rm g}}} - (1-{x_{{\rm g}}}) \log_2
1363: (1-{x_{{\rm g}}}) \simeq 0.992.$$
1364: %  
1365: Therefore, each halo contribution is roughly proportional to the total number
1366: of particles in it, with a common proportionality constant, namely, $0.992 - 1
1367: = -0.008$. This yields about $-250$ bits for the contribution per halo in
1368: Eq.~(\ref{Bayes_info}).  Thus, the absolute value of every massive halo
1369: contribution is larger than 29 bits, making the total Bayes information in
1370: Eq.~(\ref{Bayes_info}) negative and regularly decreasing with the number of
1371: halos, as displayed in Fig.~\ref{Bayes} (left).  However, the value of the
1372: entropy per particle is very close to one, telling us that the distributions
1373: are very mixed, even though not completely mixed.
1374: 
1375: Note that a constant ratio $n_{{\rm g}\,i}/n_{{\rm m}\,i}$ for all the cells
1376: would be in contradiction with $N_{{\rm g}} = N_{{\rm m}}$.  Thus, the ratio
1377: $n_{{\rm g}\,i}/n_{{\rm m}\,i} \simeq 0.81$, for example, must grow
1378: eventually, as $i$ runs over scarcely occupied cells. Even assuming that the
1379: ratio $n_{{\rm g}\,i}/n_{{\rm m}\,i}$ stays almost constant as the cell mass
1380: diminishes, the contribution per cell to the Bayes information is proportional
1381: to the total number of particles in it and, therefore, it must eventually
1382: become smaller than 29 bits (in absolute value). For one reason or another,
1383: the initial downward trend of the Bayes information must cease and turn
1384: upwards.  This turn is observed in the plot for $l = 2^{-5}$ in
1385: Fig.~\ref{Bayes}.
1386: 
1387: % insert \subsubsection{Connection with thermodynamics}
1388: \subsubsection{Connection with thermodynamics}
1389: 
1390: In thermodynamics, the entropy of mixing is, of course, only one part of the
1391: total entropy.  When other thermodynamic parameters are equal, the entropy of
1392: mixing determines the equilibrium configuration to be the most mixed
1393: distribution.  In our context, the gas and the dark matter are not comparable
1394: thermodynamically because, in principle, cold dark matter does not have
1395: temperature or pressure. However, CDM particles have velocity dispersion, such
1396: that one can assign it a temperature and, hence, a thermodynamic entropy
1397: (independent of the properties of the gas).  This is the dark-matter entropy
1398: considered by Faltenbacher et al \cite{Falten} in their study of the entropy
1399: of gas and dark-matter clusters from the Mare-Nostrum universe.  Once the dark
1400: matter is assigned thermal states, it is legitimate to compare them with the
1401: thermal states of the gas.
1402: 
1403: In a mixture of ideal gases, the chemical
1404: potential of each gas can be expressed as 
1405: $$
1406: \mu = -T \log_2 \frac{\z(T)}{n}
1407: $$
1408: % 
1409: \cite{Reif}, where $n$ is the number density, $\z(T)$ is an increasing
1410: function of $T$ characteristic of each gas, and we use units consistent with
1411: measuring the entropy in bits.  The function $\z(T)$ is calculated from the
1412: possible states of the gas particles (translational and internal states); for
1413: a monoatomic gas, $\z(T) \propto T^{3/2}$.  The condition of ``chemical''
1414: equilibrium of gas and dark matter is
1415: $$
1416: \frac{\mu_{{\rm g}}}{T_{{\rm g}}} = \frac{\mu_{{\rm m}}}{T_{{\rm m}}}\,,
1417: $$
1418: which allows for ${T_{{\rm g}}} \neq {T_{{\rm m}}}$. In fact, 
1419: chemical equilibrium implies
1420: $$
1421: \frac{\z_{{\rm g}}(T)}{n_{{\rm g}}} = \frac{\z_{{\rm m}}(T)}{n_{{\rm m}}} 
1422: \,\Ra \,
1423: \frac{\z_{{\rm g}}(T)}{\z_{{\rm m}}(T)} =
1424: \frac{n_{{\rm g}}}{n_{{\rm m}}},  
1425: $$
1426: %  
1427: and therefore different temperatures for different densities.  We have seen
1428: above that ${n_{{\rm g}}}/{n_{{\rm m}}} \simeq 0.81$ for massive halos.
1429: Hence, assuming that both $\z_{{\rm g}}$ and $\z_{{\rm m}}$ correspond to
1430: monoatomic gases, we deduce that $T_{{\rm g}}/T_{{\rm m}} \simeq 0.87$.
1431: 
1432: The conclusion that the dark matter temperature is higher than the gas
1433: temperature in massive halos may seem counterintuitive. But note that it
1434: relies on the assumption of independent local thermodynamical equilibria of
1435: dark matter and gas at different but well-defined temperatures, with the local
1436: temperature of dark matter given by its local velocity dispersion.  This
1437: assumption should imply that the dark matter also has pressure and, therefore,
1438: its dynamics should be governed by similar equations to the ones that govern
1439: the gas dynamics. However, the effects of dark-matter pressure are not
1440: considered in the Mare-Nostrum or other $N$-body cosmological simulations.
1441: 
1442: \section{Entropic comparison of distributions}
1443: \label{entropies}
1444: 
1445: In the comparison of the gas and dark-matter distributions, we have found it
1446: useful to introduce a cell entropy, recognizable as the entropy of mixing.  In
1447: general, the Boltzmann-Gibbs-Shannon (BGS) entropy of a discrete probability
1448: distribution $\{p_i\}_{i=1}^M$ is defined as
1449: \begin{equation}
1450: S(\{p_i\}) = -\sum_{i=1}^M p_i \log_2 p_i\,,
1451: \label{S}
1452: \end{equation}
1453: and it represents the uncertainty or lack of information of the result of an
1454: experiment with that probability distribution.  Note that we are using now
1455: ``discrete'' in the normal sense of the word in probability theory, namely,
1456: meaning that there is a list of possible events, as opposed to the continuum
1457: of possible events in a {\em continuous} distribution; but the probabilities
1458: $p_i$ are continuous variables.  The entropy has some desirable properties,
1459: such as the bounds $0 \leq S(\{p_i\}) \leq \log_2 M$, and the property of
1460: additivity, in particular, additivity for independent sets of events
1461: \cite{Renyi}.  This property and the bounds are shared by a uni-parametric
1462: class of functions, the R\'enyi entropies
1463: \begin{equation}
1464: S_q(\{p_i\}) = \frac{\log_2 (\sum_{i=1}^M {p_i}^q)}{1-q}, \quad 
1465: q \neq 1\,.
1466: \label{Sq}
1467: \end{equation}
1468: The value of $S_1$ is obtained as the limit $q \to 1$ and it coincides with
1469: the standard BGS entropy defined by Eq.~(\ref{S}).
1470: 
1471: We can apply the definition of entropy to a {\em discrete} distribution of $N$
1472: particles in $M$ cells, with occupation numbers $\{n_i\}_{i=1}^M$ (counts in
1473: cells) and hence expected probability distribution $\{p_i = n_i/N\}_{i=1}^M$.
1474: The entropy measures the uncertainty of the cell in which an arbitrary
1475: particle is located (or a group of $q$ particles, in the case of $S_q$ with $q
1476: \in \mathbb{N}$).  In particular, we can interpret Eq.~(\ref{S}) as follows.
1477: According to Boltzmann, one should weight a macroscopical state, given by a
1478: set of occupation numbers, with the number of microscopical states compatible
1479: with it (the Boltzmann weight). Then, the entropy is the logarithm of this
1480: weight.  Since the number of states compatible with the occupation numbers
1481: $\{n_i\}_{i=1}^M$ is given by the corresponding multinomial number, the
1482: entropy is given by the logarithm of that multinomial number, namely,
1483: $$
1484: \log_2\binom{N}{n_{1} \cdots n_{M}} \approx  - N \sum_{i=1}^M p_i \log_2 p_i 
1485: \,,
1486: $$
1487: % 
1488: where we have assumed that $n_i \gg 1$, equivalent to neglecting the effect of
1489: particle discreteness.  The entropy per particle $S(\{p_i\})$ is positive and
1490: bounded above by $\log_2 M$.  If the distribution is uniform, the bound is
1491: reached; in particular, the bound is $\log_2 M = -\log_2 V$.  Then, the
1492: distribution contains the largest uncertainty or, equivalently, the smallest
1493: information. Moreover, all the R\'enyi entropies reach the same bound.
1494: 
1495: Naturally, it is important to know the behaviour of the entropies in the
1496: continuum limit of the discrete distribution $\{p_i\}_{i=1}^M$, as the cell
1497: size $V \to 0$ and $M \ra \infty$ (for the distribution of $N$ particles in
1498: $M$ cells, one must let $N \ra \infty$ before $M \ra \infty$).  Not
1499: surprisingly, the entropies diverge in the continuum limit: one needs an
1500: infinite amount of information to locate a point in a continuum.  R\'enyi
1501: \cite{Renyi} describes the growth of the $S_q$ as the distribution becomes
1502: continuous in terms of {\em dimensions}; namely, he defines for the continuous
1503: distribution the dimensions
1504: $$
1505: D_q = \lim_{V \to 0} \frac{3\,S_q(\{p_i\})}{-\log_2 V},
1506: $$ 
1507: % 
1508: assuming that the limit exits.  These R\'enyi dimensions are standard in
1509: multifractal analysis; they have been already introduced in Sect.~\ref{anal},
1510: Eq.~(\ref{Dq}), and used in subsequent sections.  The most important R\'enyi
1511: dimension is $D_1$, which is defined by the divergence of the standard BGS
1512: entropy and is the dimension of the set of singularities where the probability
1513: concentrates.  Since the full set of R\'enyi dimensions characterizes the
1514: information content of the distribution in the continuum limit, we deduce that
1515: all the continuous distributions with the same spectrum of R\'enyi dimensions
1516: appear equivalent in regard to their information content.  In particular,
1517: every continuous distribution with $D_q = 3, \; q \in \mathbb{R},$ appears
1518: equivalent to a homogeneous and uniform distribution, in which the R\'enyi
1519: entropies reach their upper bound (note that $D_q = 3$ is the upper bound to
1520: the R\'enyi dimensions).  Indeed, only part of the information contained in a
1521: continuous distribution is preserved in its R\'enyi dimensions.
1522: 
1523: One can further define the information content of a continuous distribution
1524: in terms of its probability density \cite{Renyi}, if this density is well
1525: defined.  However, we are studying distributions with singularities.  In a
1526: singular distribution, the singularities must be confined to a set of zero
1527: volume, but they can be crucial for determining the distribution (for example,
1528: consider a distribution concentrated in just one point, namely, a Dirac delta
1529: distribution).  Therefore, let us focus, for the moment, on regular
1530: distributions with well-defined probability density $p(x)$ everywhere and $D_q
1531: = 3$ for all $q$.%
1532: \footnote{ In rigorous mathematical terms, the needed regularity condition is
1533: {\em absolute continuity} with respect to the Lebesgue measure, namely, the
1534: condition that every set with zero volume (null Lebesgue measure) contains no
1535: mass.  It implies, by the Radon-Nikodym theorem, that the mass distribution is
1536: given by the integral of a density that is unique (almost everywhere)
1537: \cite{measure}.  In fact, absolute continuity allows some singularities, for
1538: example, isolated power-law singularities. These singularities are compatible
1539: with $D_1 = 3$, which is the only condition that we actually need in the
1540: following.  Moreover, there are very mild singularities that are compatible
1541: with $D_q = 3$ for all $q \in \mathbb{R}$.}
1542: % 
1543: The probability in an element of volume $V$ is given, as $V \to 0$, by $p(x)
1544:   V$, where $x$ belongs to that element of volume (this dependence of
1545:   probability on volume derives from the local dimension being $\a=3$
1546:   everywhere).  Therefore,
1547: \begin{eqnarray*}
1548: S(\{p_i\}) &\approx& -\sum_i p(x_i)\,V \log_2[p(x_i)\,V] \\
1549: &=& -\sum_i p(x_i)\,V \log_2[p(x_i)] - \log_2 V,
1550: \end{eqnarray*}
1551: where the sum runs over a partition of the total volume in volume-$V$ elements
1552: (a partition in cells, for example).  In the limit $V \to 0$, we can write the
1553: entropy as the sum of a finite part and a divergent part, namely,
1554: $$
1555: S[p(x)]  \approx - \int p(x)\, d^3x\, \log_2[p(x)] - \log_2 V.
1556: $$ 
1557: Naturally, the divergent part just tells us that $D_1 = 3$, whereas the 
1558: finite part is a non-trivial integral of the density.
1559: 
1560: The finite part of the total entropy is not defined in an absolute way: for
1561: partitions in unequal volume elements, when the continuum limit is taken, the
1562: logarithm in the integrand is replaced with $\log_2[\phi(x)p(x)]$, where
1563: $\phi(x)$ is a positive function.  On the other hand, while the total entropy
1564: is always positive, its finite part can be negative.  For these reasons, it is
1565: necessary to introduce the {\em relative entropy}.  Conventionally, the
1566: entropy of the density $p(x)$ relative to the density $q(x)$ is defined as%
1567: \footnote{Here we incur a slight notational inconsistency, since we have been
1568:   using $q$ for the parameter in the R\'enyi entropies or dimensions. Hence,
1569:   we leave it to the reader to discern from the context whether $q$ means the
1570:   probability distributions $q(x)$ or $q_i$ or the number $q$.}
1571: $$
1572: S(p | q) = \int p(x)\, d^3x\, \log_2\frac{p(x)}{q(x)}\,,
1573: $$
1574: % 
1575: where it is understood that $p(x)=0$ wherever $q(x)=0$.  The relative entropy
1576: is always positive.  It is also called the Kullback or Kullback-Leibler
1577: divergence, and it is studied in detail by Kullback \cite{Kullback} (note that
1578: ``divergence'' means discrimination measure in the statistical context).
1579: Therefore, the absolute entropy of a coarse-grained distribution gives rise,
1580: in the continuum limit, to an absolute part, the dimension, and a relative
1581: part, the relative entropy.%
1582: \footnote{It is useful (but optional) to also define the relative entropy of
1583:   discrete distributions \cite{Renyi}.}  
1584: Only the latter differentiates regular distributions.  Notice that the
1585: entropy relative to the uniform distribution is simplest but is only
1586: defined for distributions over a finite volume (in our case, the unit cube).%
1587: \footnote{The relative entropy with respect to the uniform distribution has
1588:   been considered as a measure of the evolution of inhomogeneity in cosmology
1589:   by Hosoya, Buchert \& Morita \cite{Hosoya}.}
1590: 
1591: These results hold for singular multifractal distributions with $D_1 < 3$,
1592: after the necessary adaptations. One singular distribution $\nu$ can be
1593: relatively regular, that is to say, it can be regular with respect to another
1594: singular distribution $\mu$.%
1595: \footnote{Again, the appropriate mathematical definition of regularity
1596: is absolute continuity, now with respect to the measure $\mu$ (every set with
1597: null $\mu$-measure has null $\nu$-measure). By the Radon-Nikodym theorem,
1598: there is a density $d\nu/d\mu$, unique except in a set of null $\mu$-measure.}
1599: % 
1600: This essentially means that the singularities of $\nu$ form a subset of 
1601: the singularities of $\mu$. 
1602: The entropy of $\nu$ relative to $\mu$ is defined as
1603: $$
1604: S(\nu | \mu) = \int d\nu(x)\, \log_2\frac{d\nu(x)}{d\mu(x)} \geq 0, 
1605: $$ where $d\nu(x)/d\mu(x)$ is the density of $\nu$ with respect to $\mu$ at
1606: the point $x$.  This relative entropy differentiates one multifractal
1607: distribution ($\nu$) from another ($\mu$), when the former is regular with
1608: respect to the latter and, in particular, they have the same dimension $D_1$.
1609: In fact, $S(\nu | \mu) = 0$ if and only if $\nu = \mu$.
1610: 
1611: The R\'enyi entropy $S_q$ (\ref{Sq}) also gives rise in the continuum limit to
1612: a divergent part, and hence the dimension $D_q$, and to a finite part.  This
1613: finite part motivates the definition of the relative R\'enyi entropy
1614: \begin{equation*}
1615: S_q(\nu | \mu) = \frac{1}{1-q}\,\log_2 \left[\int d\nu(x)
1616: \left(\frac{d\nu(x)}{d\mu(x)}\right)^{q-1}\right], \quad 
1617: q \neq 1\,.
1618: \end{equation*}
1619: However, this relative entropy is less useful than the 
1620: standard (Kullback-Leibler) relative entropy. 
1621: 
1622: The relative entropy differentiates distributions but has two shortcomings.
1623: First, $S(\nu | \mu)$ is only defined when $\nu$ is $\mu$-regular. Second, the
1624: relative entropy does not have the necessary properties to qualify as a
1625: distance between distributions: it fails to be symmetric or to fulfill the
1626: triangle inequality.  However, it is possible to define a real distance
1627: between any two distributions in terms of their entropies.  For discrete
1628: distributions, Endres \& Schindelin \cite{IEEE} define
1629: \begin{eqnarray*} 
1630: D^2_{PQ} &=& 2 S(R) - S(P) - S(Q) \\
1631: &=& \sum_{i=1}^M \left(p_i \log_2 \frac{2p_i}{p_i+q_i} 
1632:  + q_i \log_2 \frac{2q_i}{p_i+q_i} \right),
1633: \end{eqnarray*}
1634: where $P=\{p_i\}$, $Q=\{q_i\}$ and $R=\{(p_i+q_i)/2\}$. Then, they prove that
1635: $D_{PQ}$ is a distance.  Furthermore, Endres \& Schindelin \cite{IEEE}
1636: note that it can be applied to continuous distributions. This follows from
1637: the alternative expression
1638: $$
1639: D^2_{PQ} = S(P|R) + S(Q|R),
1640: $$ 
1641: % 
1642: that is to say, from $D^2_{PQ}$ being a sum of relative entropies, 
1643: in addition to the fact
1644: that any two continuous distributions are both regular
1645: with respect to their mean.  Therefore, $D_{PQ}$ is well defined in the
1646: continuum limit of $P$ and $Q$.
1647: 
1648: Thus, we can measure the distance between the coarse-grained distributions
1649: $p_i = n_{{\rm g}\,i}/N$ and $q_i = n_{{\rm m}\,i}/N$, where $n_{{\rm
1650:     g}\,i},n_{{\rm m}\,i} \gg 1$, and then we can take the continuum
1651: limit. The distribution $R$ corresponds to the total particle distribution.
1652: The squared distance between the coarse distributions is
1653: \begin{eqnarray}
1654: D^2_{PQ} = 
1655: 2+\frac{1}{N} \sum_{i=1}^M \left({n_{{\rm g}\,i}} \log_2 {n_{{\rm
1656: g}\,i}} +{n_{{\rm m}\,i}} \log_2 {n_{{\rm m}\,i}} - 
1657: (n_{{\rm m}\,i}+n_{{\rm
1658: g}\,i}) \log_2(n_{{\rm m}\,i}+n_{{\rm g}\,i}) \right) 
1659: \label{Dpq1}
1660: \\
1661: = \frac{1}{N} \sum_{i=1}^M \left({n_{{\rm g}\,i}} \log_2 {n_{{\rm
1662: g}\,i}} +{n_{{\rm m}\,i}} \log_2 {n_{{\rm m}\,i}} + 
1663: (n_{{\rm m}\,i}+n_{{\rm g}\,i}) 
1664: [1-\log_2(n_{{\rm m}\,i}+n_{{\rm g}\,i})] \right).
1665: \label{Dpq2}
1666: \end{eqnarray}
1667: Referring to the expression (\ref{mix}) of the cell entropy, we deduce that,
1668: in the sum of terms (one per cell) given by Eq.~(\ref{Dpq2}), each term
1669: represents the gap between the maximum cell entropy of mixing (one bit per
1670: particle) and its actual value, just like in the sum of cell contributions in
1671: the Bayes information (\ref{Bayes_info}).  Naturally, $D^2_{PQ}$ decreases
1672: with mixing and vanishes for the most mixed distribution $P = Q =
1673: R$. Conversely, it takes its maximum, $D^2_{PQ} = 2$, when $\{n_{{\rm
1674: g}\,i}\}$ and $\{n_{{\rm m}\,i}\}$ are disjoint, namely, when they are not
1675: mixed at all [as we deduce from Eq.~(\ref{Dpq1})].  Regarding the continuum
1676: limits of $P$ and $Q$, Endres \& Schindelin's distance is maximal if they are
1677: {\em mutually singular}, namely, if they concentrate in disjoint sets. The
1678: continuum limits of disjoint $\{n_{{\rm g}\,i}\}$ and $\{n_{{\rm m}\,i}\}$
1679: give rise to two mutually singular distributions but the definition
1680: encompasses more general cases.%
1681: \footnote{The definition of mutually singular distributions is given by, e.g.,
1682:   Capinski \& Kopp \cite{measure}.  A particularly clear case of mutually
1683:   singular distributions occurs when they have disjoint supports, but this is
1684:   not necessary: for example, the uniform distributions in the Cantor set and
1685:   in the unit interval, respectively, are mutually singular, although the
1686:   Cantor set is contained in the unit interval.}
1687: 
1688: Let us notice that the above defined statistical distance is consistent with
1689: our Bayesian analysis but cannot replace it.  Firstly, it relies on the
1690: approximation $n_{{\rm g}\,i},n_{{\rm m}\,i} \gg 1$, that is to say, on
1691: neglecting the discreteness effect due to particle counts.  In this
1692: approximation, the entropy of mixing in the form given by Eq.~(\ref{mix}) is
1693: just the asymptotic form of the cell entropies in Eq.~(\ref{Bayes_info}); but
1694: note that the global contribution in Eq.~(\ref{Bayes_info}) diverges as $N \ra
1695: \infty$.  Lastly, it is a general fact that a statistical distance cannot
1696: provide a sharp criterion to decide if two discrete distributions are samples
1697: from the same continuous distribution, and it is on the same footing as the
1698: cross-correlation coefficient in that regard.
1699: 
1700: Endres \& Schindelin's distance can be connected with a standard statistical
1701: measure of discrimination as follows.  Let us note that $D^2_{PQ}$ adopts a
1702: simplified form when $P$ and $Q$ are close \cite{IEEE}, namely,
1703: $$ D^2_{PQ} \approx \frac{1}{\ln 2}\sum_{i=1}^M \frac{(p_i - q_i)^2}{2(p_i
1704: + q_i)} = \frac{1}{2\ln 2}\,\chi^2_{PQ}\,,
1705: $$
1706: %
1707: where the last expression refers to Pearson's chi-square test of
1708: discrimination, which can be considered a particular case of the 
1709: Endres-Schindelin distance.%
1710: \footnote{The connection of Pearson's chi-square test with information theory
1711:   can be obtained directly from the relative entropy \cite{Kullback}. However,
1712:   $\chi_{PQ}$ is much closer to Endres \& Schindelin's distance: it is also a
1713:   distance and, furthermore, $\chi^2_{PQ}/(2\ln 2) \leq D^2_{PQ} \leq
1714:   \chi^2_{PQ}\,,$ for any $P$ and $Q$.}
1715: In our case, 
1716: $$ \chi^2 = \sum_{i=1}^M \frac{(n_{{\rm m}\,i} - n_{{\rm
1717: g}\,i})^2}{n_{{\rm m}\,i}+n_{{\rm g}\,i}}\,.
1718: $$ 
1719: %
1720: The chi-square test has the advantage of highlighting that the expected
1721: fluctuations of $|n_{{\rm m}\,i} - n_{{\rm g}\,i}|$ in a common distribution
1722: are of the order of $(n_{{\rm m}\,i} + n_{{\rm g}\,i})^{1/2}$.  At any rate,
1723: the test is based on an approximation of $D^2_{PQ}$ and neither can it
1724: provide a sharp criterion of discrimination.
1725: 
1726: \subsection{Bias as entropic distance}
1727: 
1728: In cosmology, the bulk of mass belongs to the dark matter, so the distribution
1729: of gas (or galaxies) is assumed to be ``biased'' with respect to the total
1730: matter distribution, dominated by the dark matter.  Since we normalize to one
1731: both the dark matter and the gas total masses, both components play a
1732: symmetrical r\^ole in our statistical analyses. Therefore, our measure of bias
1733: must be just a measure of discrimination between two probability distributions
1734: (a ``divergence'' or distance).  There are many such measures, but the notions
1735: of relative entropy and Endres-Schindelin distance naturally arise in
1736: connection with our Bayesian analysis.  Regarding the Endres-Schindelin
1737: distance, mutually singular distributions are most distant, namely, at
1738: distance $\sqrt{2}$.  This distance diminishes if the distributions
1739: concentrate in a common set, but vanishes only when they coincide.  The
1740: relative entropy is not a distance but it is useful as well, because it
1741: diverges for mutually singular distributions and, therefore, it separates
1742: distributions better.  In fact, the relative entropy can be symmetrized with
1743: respect to the compared distributions, and then it diverges unless they are
1744: mutually regular.%
1745: \footnote{The symmetric relative entropy $S(P|Q) + S(Q|P)$ is called the
1746: Jeffreys divergence $J(P,Q)$ \cite{Renyi,Kullback}. Despite being symmetrical,
1747: it is not a proper distance, for it still fails to fulfill the triangle
1748: inequality.  It is trivially finite for distributions that are mutually
1749: regular, namely, absolutely continuous with respect to one another.  Kullback
1750: \cite{Kullback} always works within an equivalence class of mutually regular
1751: distributions.}
1752: 
1753: The simplest example of comparison of two distributions occurs when they are
1754: both regular, in particular, when they have everywhere well-defined densities
1755: $p(x)$ and $q(x)$. In spite of their individual regularity, they are mutually
1756: singular if they do not overlap, that is to say, if each density is positive
1757: only where the other density vanishes, then being at Endres-Schindelin
1758: distance $\sqrt{2}$.  As they overlap more and, furthermore, the densities
1759: approach one another, their Endres-Schindelin distance and their symmetric
1760: relative entropy tend both to zero.  On the other hand, the symmetric relative
1761: entropy is finite only if both distributions vanish in the same point set
1762: (disregarding sets of zero volume, of course).
1763: 
1764: Regarding singular distributions, the first condition for two distributions to
1765: be at small Endres-Schindelin distance is that they have the same R\'enyi
1766: dimensions and, therefore, the same multifractal spectrum.  However, this
1767: condition is far from being sufficient.  Indeed, the multifractal spectrum
1768: only gives the ``size'' (the dimension) of every set of singularities with
1769: common strength (local dimension), but tells us nothing about the precise
1770: geometry (location or shape) of those sets.  Like in the case of regular
1771: distributions, two distributions are at small Endres \& Schindelin's distance
1772: if the strength and location of their mass concentrations, in particular,
1773: their singularities, essentially coincide.  As regards the symmetric relative
1774: entropy, the singularities must actually coincide for it to be finite.
1775: 
1776: It has been remarked above that a statistical distance (or divergence) cannot
1777: provide a sharp distinguishability criterion.  In fact, the distinguishability
1778: criterion provided by the Bayes factor only makes sense for finite point
1779: distributions, namely, for deciding if two finite point distributions can be
1780: samples from the same multinomial distribution.  In this regard, the Bayesian
1781: comparison of the dark-matter and gas cell distributions in Sect.\
1782: \ref{appl_Bayes} has clearly ruled out a common multinomial distribution on
1783: nonlinear scales.  Nevertheless, the entropy of mixing per particle is very
1784: close to the maximum of one bit; for example, it is $0.992$ bits for massive
1785: halos in the master cell distributions.  Therefore, the two distributions are
1786: indeed very mixed (very close).
1787: 
1788: Furthermore, the closeness of the gas and dark matter distributions suggests
1789: that their individual singularities coincide and, therefore, the two
1790: distributions are mutually regular.  In the coarse formalism that we use, the
1791: local dimension of cell $i$ is
1792: $$
1793: \a_i = 3\frac{\log[n_i/(NV_0)]}{\log(V/V_0)}\,.
1794: $$
1795: Therefore, the difference between the strenghs of gas and dark matter
1796: singularities is 
1797: $$
1798: \a_{{\rm g}\,i} - \a_{{\rm m}\,i}
1799: = 3\frac{\log(n_{{\rm g}\,i}/n_{{\rm m}\,i})}{\log(V/V_0)}\,.
1800: $$
1801: % 
1802: We can see that this difference vanishes if $n_{{\rm g}\,i}/n_{{\rm m}\,i}$
1803: stays bounded (above and below) while the cell volume $V$ shrinks.  Although
1804: we have found that the ratio $n_{{\rm g}\,i}/n_{{\rm m}\,i}$ is not unity in
1805: populated cells, its logarithm is small (in absolute value) with respect to
1806: $-\log(V/V_0)$ at the lower end of the multifractal scaling range, thus making
1807: $\a_{{\rm g}\,i}$ and $\a_{{\rm m}\,i}$ almost equal.  In general, if we
1808: define a local bias factor as the local relative gas concentration, the
1809: condition for common gas and dark-matter singularities is mild: the local bias
1810: factor must be bounded away from zero and infinity.
1811: 
1812: \section{Discussion and Conclusions}
1813: \label{discuss}
1814: 
1815: We have improved the method of coarse multifractal analysis based on counts in
1816: cells by devising a procedure for extracting from a sample of a distribution
1817: the maximal information about its multifractal properties. The procedure is
1818: based on a clear understanding of the r\^ole of the upper and lower cutoffs to
1819: scaling, which are, respectively, the homogeneity and discreteness scales.
1820: The homogeneity scale is used in the definition of coarse multifractal
1821: exponents [Eq.~(\ref{ctauq})], while the discreteness scale is crucial to
1822: understand and quantify the effects of under-sampling.  We have employed our
1823: procedure to analyse the gas and dark matter distributions in the Mare-Nostrum
1824: universe at redshift $z=0$.
1825: 
1826: The only intrinsic scale present in an $N$-body simulation is actually the
1827: discreteness scale $V= N^{-1}$ (besides the size of the simulation cube, which
1828: we take as the reference scale).  The homogeneity scale is present as well but
1829: it is dynamical and grows with time.  Between these two scales the matter
1830: distribution can be considered continuous and representative of the nonlinear
1831: dynamics.  The discreteness scale $V= N^{-1}$ defines what we call the master
1832: cell distribution, which best resolves the overall mass distribution. The mass
1833: function of objects at this scale (halos) adopts a power-law form with a
1834: large-mass cutoff, similar to the Press-Schechter mass function. However, its
1835: power-law exponent is $-2$, which would correspond to an initial power
1836: spectrum with index $n=-3$ in the Press-Schechter theory, whereas the actual
1837: value in the Mare-Nostrum universe is $n=1$. In conclusion, the Mare-Nostrum
1838: mass function confirms the form of the mass function found in Ref.~\cite{I4}
1839: and its independence of the initial power spectrum.
1840: 
1841: Of course, the Press-Schechter theory and the consequent mass function are not
1842: applicable to equal-size objects.  However, Vergassola et al \cite{V-Frisch},
1843: in their study of the adhesion model (described in Ref.~\cite{Shan-Zel}), also
1844: define coarse-grained objects of equal size and, nevertheless, they find a
1845: power-law mass function with exponent depending on the initial spectral index
1846: and with an exponential large-mass cutoff, like in the Press-Schechter theory.
1847: % But the precise forms of Press-Schechter and Vergassola et al power-law
1848: % exponents only agree in one dimension.
1849: On the other hand, Vergassola et al \cite{V-Frisch} show that the adhesion
1850: model gives rise to a multifractal cosmic-web structure (see also
1851: Ref.~\cite{Bou-M-Parisi}).  In this regard, it is especially interesting to
1852: compare our results with theirs, and to emphasize that the power-law exponent
1853: $-2$ is unrelated to the initial power spectrum, unlike their power-law
1854: exponent.  The dependence of their power-law exponent on the initial power
1855: spectrum is surely due to the nature of the Zel'dovich approximation, in which
1856: the dynamics is trivial before the formation of singularities.  In contrast,
1857: the real gravitational dynamics is {\em chaotic}.  Therefore, the multifractal
1858: attractor of the real dynamics is independent of the initial conditions and
1859: must arise even when the initial conditions do not have a scale invariant
1860: power spectrum.
1861: 
1862: The mass function power-law exponent $-2$ is, in fact, naturally associated
1863: with the multifractal mass concentrate. Furthermore, we find that the precise
1864: form of the exponential large-mass cutoff suggests that the power law is
1865: actually an approximation of a lognormal mass function, as expected in a
1866: multifractal \cite{I4} and found in the Mare-Nostrum universe on larger
1867: scales.
1868: 
1869: Our first direct test of scale invariance consists in calculating the coarse
1870: multifractal spectrum in a range of nonlinear scales, namely, from $l=2^{-12}$
1871: up to $2^{-7}$.  For this, we use the improved definition of coarse
1872: exponents~(\ref{ctauq}), which includes the scale of homogeneity (estimated
1873: through the condition $\mu_2 =1.1$).
1874: %by priorly estimating the scale of homogeneity and then explicitly introducing
1875: %it in our definition [Eq.~(\ref{ctauq})].
1876: This improvement is necessary when the scale of homogeneity is considerable
1877: smaller than the box size.  The resulting multifractal spectra (Fig.\
1878: \ref{MFspec}) agree in their respective ranges (except near $\a_{{\rm
1879: max}}$). Moreover, the spectra corresponding to the dark matter and to the gas
1880: are almost identical.  However, the introduction of the scale of homogeneity
1881: produces an anomalous extension of the multifractal spectrum: it gives rise to
1882: {\em negative} fractal dimensions. They can be understood as representing
1883: improbable matter fluctuations that can be ignored.
1884: 
1885: From the multifractal spectra, we deduce two important dimensions, namely, the
1886: dimension of the mass support $D_0 = 3$ and the dimension of the mass
1887: concentrate $D_1 \simeq 2.4$.  Both dimensions provide information on the type
1888: of multifractal cosmic-web structure.  The former dimension shows that this
1889: multifractal is non-lacunar while the latter shows that it is not very
1890: concentrated.  The overall weak concentration indicated by $D_1 \simeq 2.4$
1891: can be due to the dominance of surface singularities (``pancakes'') but can
1892: also be due to the clustering of lower dimensional singularities, namely,
1893: filaments or nodes.  Cosmic web singularities are difficult to define in
1894: galaxy or $N$-body samples, but can be partially unveiled with appropriate
1895: algorithms \cite{S4,vW-Sch}.  At any rate, one must notice that a non-lacunar
1896: cosmic web structure has a very complex geometry \cite{I5}.  Of course, this
1897: geometry is determined by the dynamics of gravitational collapse and, in
1898: particular, by its type of anisotropy; but further discussion of this question
1899: is beyond the scope of this work (the r\^ole of anisotropic collapse in the
1900: formation of the cosmic web is discussed in Ref.~\cite{Rien}, for example).
1901: 
1902: % Further geometrical (or topological) features can be obtained by 
1903: 
1904: Our study of the multifractal spectra on decreasing scales from $l=2^{-7}$ to
1905: $2^{-12}$, including the discreteness scale $l= N^{-1/3}= 2^{-10}$, allows us
1906: to discern the progressive influence of discreteness. The most obvious change
1907: is, of course, the shrinking range of $\a$, namely, the reduction of $\a_{{\rm
1908: max}}$ caused by lack of mass resolution: depleted small cells must be
1909: empty. Furthermore, the mass distribution is under-sampled in cells with few
1910: particles, altering the ends of the spectra near $\a_{{\rm max}}$.  We can
1911: measure these deviations, for we can compare small scale spectra with 
1912: the complete spectra at $l=2^{-7}$.
1913: Actually, the spectra are almost complete at $l=2^{-8}$. For $l>2^{-7}$, 
1914: there appear early signs of the transition to homogeneity.
1915: 
1916: It is interesting to connect our results about the influence of discreteness,
1917: which only concern the statistical properties of the redshift $z=0$
1918: distributions, with the studies by Kuhlman, Melott \& Shandarin \cite{KMS} and
1919: Splinter et al \cite{KMSS} of the {\em dynamical} effects of
1920: discreteness. Those authors conclude that these effects are the more important
1921: the less converging the particle motion is. Thus, we have, on the one hand,
1922: that expanding volume elements give rise to voids, with local dimension $\a >
1923: 3$, which are only well represented in the multifractal spectra corresponding
1924: to scales considerably larger than $l = N^{-1/3}$. On the other hand,
1925: collapsing volume elements give rise to mass concentrations with the smaller
1926: dimension the larger is the number of independent axis along which they
1927: collapse.  These mass concentrations can be well represented in the spectra
1928: corresponding to $l < N^{-1/3}$.  For example, isotropic collapse gives rise
1929: to the smallest dimension concentrations, which are the most robust against
1930: the effects of undersampling; and, in fact, the low-$\a$ end of the
1931: multifractal spectrum is essentially correct even for scales $l <
1932: 2^{-12}$. However, the strong singularities with low $\a$ do not represent the
1933: full cosmic web structure.
1934: 
1935: Our second and most direct test of scale invariance is made in the standard
1936: way, namely, by studying the dependence of the second order moment $M_2$ on
1937: the scale $l$: we calculate $M_2(l)$ from $l=2^{-12}$ to $2^{-2}$, a broad
1938: range that includes the discreteness and homogeneity scales. On the smaller
1939: scales, we correct for the effect of discreteness by suppressing under-sampled
1940: cells, according to the information provided by the already computed
1941: spectra. We find two well-defined scaling ranges: the fractal range, spanning
1942: from $l=2^{-12}$ to $2^{-6}$, and the homogeneous range, from $l=2^{-4}$
1943: upwards. The transition to homogeneity takes place between $l=2^{-6}$ and
1944: $l=2^{-4}$.  For definiteness, we choose as homogeneity scale $l_0=2^{-5}$,
1945: which in physical units is 16 $h^{-1}$ Mpc.  The fractal correlation
1946: dimensions are $D_2=1.26$, for the dark-matter, and $D_2=1.30$, for the gas,
1947: in accord with conventional values of the galaxy correlation dimension
1948: \cite{Jones-RMP,Sylos-Pietro}.
1949: 
1950: To find out if the equivalence of the gas and dark matter distributions goes
1951: beyond their scaling properties, we have undertaken a detailed statistical
1952: study of the relation between these distributions.  Since we employ the method
1953: of counts in cells, we have specified two kinds of comparison: (i) the two
1954: cell distributions, defined by their respective sets of occupation numbers
1955: $\{n_i\}$, are compared as if they were two discrete probability distributions
1956: with respective probabilities $\{p_i = n_i/N\}$; (ii) the two cell
1957: distributions $\{n_{{\rm m}\,i}\}$ and $\{n_{{\rm g}\,i}\}$ are compared to
1958: decide if it is likely that they are samples from the same multinomial
1959: distribution (given by some coarse distribution $\{p_i\}$).  The first kind of
1960: comparison leads us to measures discriminating between discrete probability
1961: distributions (and between their continuum limits).  We have considered
1962: firstly the cross-correlation coefficient and lastly entropic distances (or
1963: ``divergences''), actually motivated by our method of deciding if two cell
1964: distributions are samples of the same multinomial distribution.  Since there
1965: are many (pseudo)distances to discriminate between discrete probability
1966: distributions, the comparison based on one of them has no absolute value.
1967: However, all the measures that we employ to discriminate between the coarse
1968: gas and dark matter distributions tell us that they are very close.
1969: 
1970: To decide if it is likely that the two cell distributions $\{n_{{\rm m}\,i}\}$
1971: and $\{n_{{\rm g}\,i}\}$ are samples from the same multinomial distribution,
1972: we develop a Bayesian method of analysis.  The two distributions are compared
1973: by means of the Bayes information about the equality $p_{{\rm m}} = p_{{\rm
1974: g}}$, namely, by means of the logarithm of the corresponding Bayes
1975: factor~(\ref{Bf1}).  The Bayes information corresponding to a set of massive
1976: cells can be expressed as a sum of negative cell terms, proportional to the
1977: entropy of mixing per particle minus one, added to a positive global term.
1978: The application of this formula to the master cell distributions, starting
1979: from the most massive halos, demonstrates gas biasing.  In particular, the gas
1980: is less concentrated in massive halos.  The bias is attenuated on larger
1981: scales but only disappears at $l=2^{-4}$, namely, at the scale of full
1982: homogeneity.  Naturally, it is to be expected that there is no bias at
1983: homogeneity, for it essentially preserves the initial conditions.  However, we
1984: do not have any argument that forbids that the bias vanishes at a smaller
1985: scale, so the fact that it vanishes only at homogeneity could be coincidental.
1986: 
1987: Since the Bayesian analysis can be formulated in terms of the entropy of
1988: mixing, we have studied in detail the entropic comparison of continuous
1989: distributions.  We must assume that the R\'enyi entropies of the compared
1990: distributions have well defined continuum limits, which amounts to assuming
1991: that the distributions are multifractal (including regular distributions with
1992: $D_q=3$).  Thus, the first element of comparison is the spectrum of R\'enyi
1993: dimensions or, equivalently, the multifractal spectrum.  As regards their
1994: multifractal spectra, the dark matter and gas distributions in the
1995: Mare-Nostrum universe are indistinguishable.  However, the multifractal
1996: spectrum gives the sizes of the sets of dark-matter or gas concentrations (or
1997: depletions) with equal strength but is insensitive to the location of those
1998: sets.  In fact, the R\'enyi dimensions only contain partial information about
1999: a continuous distribution.  In particular, $D_1$ represents only one part of
2000: its entropy.  Another part of the entropy is of relational nature and can be
2001: expressed as a relative entropy or as a statistical entropic distance equal to
2002: (the square root of) the {\em neg-entropy} of mixing, proportional to one
2003: minus the entropy of mixing per particle.  The high entropy of mixing or small
2004: entropic distance between the gas and dark-matter distributions is due to the
2005: fact that their respective singularities actually coincide, namely, the
2006: respective singularities at the same positions have equal local dimensions.
2007: 
2008: The appearance of common singularities in the gas and in the dark matter
2009: surely has a physical origin, despite the differences between the dynamics of
2010: each component.  It is natural to conjecture that the common multifractal
2011: structure is due to the fact that the gas and the dark matter are both
2012: dominated, on a long range of scales, by the gravitational interaction, which
2013: produces common power-law singularities.  The differences in the dynamics are
2014: the cause of gas biasing but do not interfere with the essential multifractal
2015: features of the distributions (except on very small scales).  In fact, the
2016: Mare-Nostrum universe is not based on a very realistic model of gas dynamics,
2017: insofar as it does not consider thermal radiation or conduction.
2018: Nevertheless, if the cosmic web singularity structure is due to gravity only,
2019: the analysis of future simulations will corroborate that the gas biasing does
2020: not alter that structure.  Then, we can speak of a kind of {\em universality}:
2021: the cosmic dynamics has a unique type of cosmic web multifractal attractor,
2022: independent of the initial conditions.  In particular, the multifractal
2023: spectrum obtained here from the Mare-Nostrum universe or before from the GIF2
2024: simulation \cite{I4} must be characteristic of the cosmic web.
2025: 
2026: 
2027: \begin{acknowledgments}
2028: I thank Gustavo Yepes for making the Mare-Nostrum data available to me. 
2029: \end{acknowledgments}
2030: 
2031: \begin{thebibliography}{99}
2032: 
2033: \bibitem{Shan-Zel} Shandarin S.F. and Zel'dovich Ya.B. \rmp{61}{1989}{185}
2034: \bibitem{Rien} van de Weygaert R., \textit{Froth Across the Universe, Dynamics and Stochastic Geometry of the Cosmic Foam}, in: Modern Theoretical and Observational Cosmology, M.~Plionis and S.~Cotsakis (eds.) Vol.\ 276, 119, Kluwer 2002
2035: \bibitem{Gott1} Gottl\"ober S., Yepes G., Wagner Ch. and Sevilla, R., The Marenostrum Universe, Proceedings of XXVI Astrophysics Moriond Meeting, 2006, \astroph{0608289}
2036: \bibitem{Falten} Faltenbacher, A., Hoffman, Y., Gottl\"ober, S. and Yepes, G., \newjournal{Mon.\ Not.\ R.\ Astron.\ Soc.}{}{376}{2007}{1327--1334}
2037: \bibitem{Gott2} Gottl\"ober S., Yepes G., \apj {664}{2007}{117--122}
2038: \bibitem{Mandel} 	Mandelbrot B.B.,	The fractal geometry of nature (rev.\ ed.\ of: Fractals, 1977),	W.H. Freeman and Company	(1983)
2039: \bibitem{Valda} 	Valdarnini R., Borgani S. and Provenzale A.,	\apj {394}{1992}{422}
2040: \bibitem{Colom} 	Colombi, S., Bouchet, F.R. and Schaeffer, R.,	\newjournal{Astron.\ \& Astrophys.}{}{263}{1992}{1}
2041: \bibitem{Yepes} 	Yepes G., Dom{\'\i}nguez-Tenreiro R. and Couchman, H.P.M.,	\apj {401}{1992}{40}
2042: % \bibitem{I} Gaite J., 2005, Europhysics Letters 71, 332--338
2043: \bibitem{I4} 	Gaite J.,	\apj {658}{2007}{11}
2044: \bibitem{I5} Gaite J., \newjournal{JCAP}{}{11}{2009}{004} [\arXivid{0911.1871}]
2045: \bibitem{Borgani} Borgani S., \prep{251}{1995}{1--152}
2046: \bibitem{Jones-RMP} Jones B.J., Mart\'{\i}nez V.J., Saar E. and Trimble V., \rmp {76}{2004}{1211}
2047: \bibitem{Sylos-Pietro} Sylos Labini F. and Pietronero L., \newjournal{Eur.\ Phys.\ Jour.\ }{}{B 64}{2008}{615--623}
2048: % Statistical Physics for Cosmic Structures, {\tt arXiv:0712.0293}
2049: \bibitem{Pee} Peebles P.J.E., Lectures at the Nonlinear Cosmology Workshops, Nice 2004 and 2006, available at the \href{http://www.oca.eu/etc7/ncp06/program.html}{workshop Web site}
2050: \bibitem{KMS} Kuhlman B., Melott A.L. and Shandarin S.F., \apj{470}{1996}{L41}
2051: \bibitem{KMSS} Splinter R.J., Melott A.L., Shandarin S.F. and Suto Y., \apj{497}{1998}{38--61}
2052: \bibitem{Springel} Springel V., \newjournal{Mon.\ Not.\ R.\ Astron.\ Soc. }{}{364}{2005}{1105}
2053: \bibitem{Harte} Harte D., Multifractals: theory and applications, Chapman \& Hall, Boca Raton (2001)
2054: \bibitem{Mandel2} Mandelbrot B.B., \newjournal{Proc.\ R.\ Soc.\ Lond.}{}{A 434}{1991}{79--88}
2055: \bibitem{S4} Sheth J.V., Sahni V., Shandarin S.F. and Sathyaprakash B.S., \newjournal{Mon.\ Not.\ R.\ Astron.\ Soc.}{}{343}{2003}{22}
2056: \bibitem{vW-Sch} van de Weygaert R. and Schaap W., \textit{The Cosmic Web: Geometric Analysis}, in: Data Analysis in Cosmology, eds. V. Mart\'{\i}nez et al, Lecture Notes in Physics, 665, 289--419, Springer-Verlag (2008)
2057: %eds. V. Mart\'{\i}nez, E. Saar, E. Mart\'{\i}nez-Gonzalez, M.J. Pons-Border\'{\i}a
2058: \bibitem{Reif} Reif F., Fundamentals of Statistical and Thermal Physics, McGraw-Hill, New York (1965)
2059: % \bibitem{Wherl} Wherl A., \rmp{50}{1978}{221--260}
2060: \bibitem{Renyi} R\'enyi A., Calcul des probabilit\'es, Dunod, Paris (1966)
2061: % Probability Theory, Elsevier (1970)
2062: \bibitem{measure} Capinski M. and Kopp P.E., Measure, Integral and Probability, second edition, Springer Undergraduate Mathematics Series (2004) 
2063: \bibitem{Kullback} Kullback S., Information Theory and Statistics, Dover, NY (1968)
2064: \bibitem{Hosoya} Hosoya A., Buchert T.\ and Morita M., \prl{92}{2004}{141302}
2065: \bibitem{IEEE} Endres D.M. and Schindelin J.E., \newjournal{IEEE Trans.\ Info.\ Theory}{}{49}{2003}{1858}
2066: \bibitem{V-Frisch} Vergassola M., Dubrulle B., Frisch U. and Noullez A., \newjournal{Astron.\ \& Astrophys.}{}{289}{1994}{325--356}
2067: \bibitem{Bou-M-Parisi} 	Bouchaud J.P., M\'ezard M. and Parisi G., \pre{52}{1995}{3656}
2068: 
2069: \end{thebibliography}
2070: 
2071: \end{document}
2072: 
2073: