gr-qc0505082/eqs.tex
1: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2: %%%  The optimization paper
3: %$Id: eqs.tex,v 1.120 2005/05/17 13:34:16 badkri Exp $
4: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5: \documentclass[twocolumn,showpacs,preprintnumbers,amsmath,amssymb]{revtex4}
6: %\documentclass[preprint,showpacs,preprintnumbers,amsmath,amssymb]{revtex4}
7: 
8: \usepackage{dcolumn}% Align table columns on decimal point
9: \usepackage{bm}% bold math
10: \usepackage{graphicx}
11: 
12: \def\ba{\begin{eqnarray}}
13: \def\ea{\end{eqnarray}}
14: \def\F{\mathcal{F}}
15: \def\n{\mathbf{n}}
16: \def\th{\textrm{\mbox{\tiny{th}}}}
17: \def\Tobs{T_{\textrm{\mbox{\tiny{used}}}}}
18: \def\Tcoh{\Delta T}
19: \def\Tsft{T_{\textrm{\mbox{\tiny{sft}}}}}
20: \def\coh{{\textrm{\mbox{\tiny{coh}}}}}
21: \def\nd{{\mathbf{n}}_d}
22: \def\SSB{\textrm{\mbox{\tiny{ssb}}}}
23: \def\ns{\textrm{\mbox{\tiny{NS}}}}
24: \def\min{\textrm{\mbox{\tiny{min}}}}
25: \def\max{\textrm{\mbox{\tiny{max}}}}
26: \def\sft{\textrm{\mbox{\tiny{sft}}}}
27: \def\res{\textrm{\mbox{\tiny{res}}}}
28: \def\RMS{\textrm{\mbox{\tiny{RMS}}}}
29: 
30: \def\ib{{(i)}}
31: \def\bmath#1{\mbox{\boldmath$#1$\unboldmath}}
32: \def\bmaths#1{\mbox{\scriptsize\boldmath$#1$\unboldmath}}
33: 
34: 
35: 
36: 
37: \def\etal{{\it et al.}}
38: \def\ie{{\it i.e.}}
39: \def\eg{{\it e.g.}}
40: \def\lap{\hbox{${_{\displaystyle<}\atop^{\displaystyle\sim}}$}}
41: \def\gap{\hbox{${_{\displaystyle>}\atop^{\displaystyle\sim}}$}}
42: 
43: \def\lesssim{\mathrel{\hbox{\rlap{\hbox{\lower4pt\hbox{$\sim$}}}\hbox{$<$}}}}
44: \def\gtrsim{\mathrel{\hbox{\rlap{\hbox{\lower4pt\hbox{$\sim$}}}\hbox{$>$}}}}
45: \def\alt{\mathrel{\hbox{\rlap{\hbox{\lower4pt\hbox{$\sim$}}}\hbox{$<$}}}}
46: \def\agt{\mathrel{\hbox{\rlap{\hbox{\lower4pt\hbox{$\sim$}}}\hbox{$>$}}}}
47: 
48: \newcommand {\be}{\begin{equation}}
49: \newcommand {\ee}{\end{equation}}
50: 
51: \begin{document}
52: 
53: \title{Improved Stack-Slide Searches for Gravitational-Wave Pulsars}
54: 
55: \newcommand*{\AEI}{Max-Planck-Institut f\"ur
56:     Gravitationsphysik, Albert-Einstein-Institut, Am M\"uhlenberg 1,
57:     D-14476 Golm, Germany}\affiliation{\AEI}
58: 
59: \author{Curt Cutler}\email{curt.cutler@aei.mpg.de}\affiliation{\AEI}
60: 
61: \author{Iraj Gholami}\email{iraj.gholami@aei.mpg.de}\affiliation{\AEI}
62: 
63: \author{Badri Krishnan}\email{badri.krishnan@aei.mpg.de}\affiliation{\AEI}
64: 
65: 
66: \date{\today}
67: 
68: \begin{abstract}
69: 
70: We formulate and optimize a computational search strategy for detecting
71: gravitational waves from isolated, previously-unknown neutron stars
72: (that is, neutron stars with unknown sky positions, spin frequencies,
73: and spin-down parameters).
74: It is well known that fully coherent searches over the relevant
75: parameter-space volumes are not computationally feasible, and so more
76: computationally efficient methods are called for.  The
77: first step in this direction was taken by Brady\&Creighton (2000), who
78: proposed and optimized a two-stage, stack-slide search algorithm.  We
79: generalize and otherwise improve upon the Brady-Creighton scheme in
80: several ways.  Like Brady\&Creighton, we consider a stack-slide scheme,
81: but here with an
82: arbitrary number of semi-coherent stages and with a coherent
83: follow-up stage at the end.
84: We find that searches with three semi-coherent stages are significantly
85: more efficient than two-stage searches (requiring about $2$--$5$ times less
86: computational power for the same sensitivity) and are only slightly
87: less efficient than searches with four or more stages.
88: We calculate the signal-to-noise ratio required for detection, as
89: a function of computing power and neutron star spin-down-age, using
90: our optimized searches.
91: \end{abstract}
92: 
93: 
94: \pacs{04.80.Nn, 95.75.Pq, 97.60.Gb}
95: \preprint{AEI-2005-104}
96: \maketitle
97: 
98: 
99: \section{Introduction}
100: \label{sec:intro}
101: 
102: 
103: In analyzing data from Earth-based and space-based gravitational-wave (GW)
104: detectors, we will be computationally limited in performing certain
105: types of searches--especially searches for long-lived signals described
106: by several unknown parameters.
107: For such signals, the number of templates signals required to
108: discretely cover the parameter space (at useful resolution)
109: typically increases rapidly as a function of the observation time.
110: For ground-based detectors, such as LIGO, a well-known example is the
111: search for nearly periodic GWs from unknown, isolated, rapidly
112: rotating neutron stars (NSs). We will refer to NSs that are continuously
113: emitting GWs as ``GW pulsars''.
114: By ``unknown'', we mean that the GW pulsar's
115: position on the sky, frequency, and frequency derivatives are all
116: unknown, and so must be searched over. (The NS could be unknown either
117: because it is electromagnetically inactive, or because its
118: electromagnetic emission
119: does not reach us--e.g., because we do not intersect its radio pulsar beam.)
120: Brady et al.~\cite{bccs}
121: showed that straightforward matched-filter searches for unknown
122: GW pulsars would
123: be severely computationally limited; for example, searches for young, fast
124: NSs (NSs with GW frequencies as high as $1\,$kHz and spin-down ages
125: as short as $40$ yr) would be limited to observation times of order one day.
126: To address this problem, Brady \& Creighton~\cite{bc}
127: (henceforth referred to as BC) were the first to
128: consider hierarchical, multistage, semi-coherent searches for GW pulsars.
129: Briefly, a semi-coherent search is one where a sequence of
130: short data stretches are all coherently searched, using
131: some technique akin to matched filtering,
132: and then the resulting powers from the different stretches are summed.
133: The method is only
134: ``semi-coherent'' because powers are added instead of complex amplitudes; i.e.,
135: information regarding the overall phase of the signal in different
136: stretches is  discarded. This allows one to use a much coarser
137: grid on parameter space than would be required in a fully coherent search of
138: the same data.  BC developed a ``stack-slide''
139: method for summing the powers along different tracks in the time-frequency
140: plane, in close analogy to the ``power stacking'' method
141: (sometimes called the Radon transform) used in radio pulsar searches.
142: The basic idea of their two-stage search is to
143: identify a list of ``candidates'' (basically, promising-looking
144: regions in parameter space) in the first stage, using some
145: fraction of the
146: available data, and then to ``follow up'' those candidates using more
147: data in the second stage. In their scheme, both the first and second
148: stages are semi-coherent.
149: 
150: In this paper we revisit the problem of constructing efficient, hierarchical
151: searches for GW pulsars. We build on the BC treatment, but
152: we also significantly generalize and otherwise improve upon their
153: work. The most important improvements are that we consider searches
154: with $n$ semi-coherent stages (not just $2$), with surviving
155: candidates being winnowed at each stage, and we add on a fully coherent
156: final stage to verify or debunk any remaining candidates.
157: We also explicitly account for the unknown polarization of the source,
158: while this complication was omitted for in BC, for simplicity.
159: Other important differences between our work and theirs will be highlighted
160: below.
161: 
162: This paper is organized as follows.  Section \ref{sec:basics} sets up
163: notation, describes the expected signal from an isolated GW pulsar,
164: and reviews the stack-slide algorithm.  Our general multistage strategy for
165: searching through large parameter spaces for GW pulsars,
166: using a combination of semi-coherent methods and coherent methods,
167: is explained in Section~\ref{sec:hierarchical}.
168: Our general search scheme contains a fair number of
169: free parameters (the number
170: and duration of the coherently analyzed stretches in each semi-coherent
171: stage, as well as the coarseness of the discrete grid used to
172: cover the parameter space of sought-for signals), which we
173: can adjust to make the search as efficient
174: as possible. Our general scheme for performing this optimization is
175: described in  Section~\ref{subsec:optimize}.
176: Section~\ref{sec:formulae} develops all the formulae
177: we need to evaluate the computational cost of any of our strategies, for
178: any desired sensitivity.
179: More specifically, section~\ref{subsec:metric} reviews the
180: template-counting formulae developed in BC;
181: section~\ref{subsec:fd} develops the equations
182: relating the thresholds that candidates must pass at different stages
183: (to advance to the following stage) to the false dismissal (FD) rates
184: at those stages, and hence to the overall sensitivity of the
185: search;
186: and section~\ref{subsec:cost}
187: derives estimates for the dominant computational cost of each
188: part of the search.
189: Section~\ref{sec:results} describes our results: the optimal
190: strategy (within our general scheme) and its sensitivity.
191: Section~\ref{sec:conclusions} concludes with a summary of our main results
192: and a discussion of open issues and future work.
193: 
194: 
195: \section{Notation and basics}
196: \label{sec:basics}
197: 
198: \subsection{The signal from a GW pulsar}
199: \label{subsec:pulsar}
200: 
201: Here we briefly review the expected GW
202: signal from a spinning neutron star.  Let $x(t)$ be the output of some
203: detector.  In the absence of any signal, $x(t)$ is
204: just noise $n(t)$, which we shall assume to be a stationary, Gaussian
205: stochastic process with zero mean.  In the presence of a signal, we
206: have
207: %
208: \be
209: x(t) = n(t) + h(t)
210: \ee
211: %
212: where the signal $h(t)$ is a deterministic function of time.
213: We assume that the GW pulsar is isolated and at rest with
214: respect to us, so that effects due to its motion can be neglected.
215: (More precisely, we assume these effects can absorbed into an overall
216: Doppler shift, and so are unobservable.)
217: Let $t_\SSB$ be
218: time measured in the Solar System Barycenter (SSB) frame. The form of
219: $h(t)$ in this frame is a constant-amplitude sinusoid with phase given by
220: %
221: \be \label{eq:phase}
222: \Phi(t_\SSB) = \Phi_0 + 2\pi f_0 \Delta t_\SSB +
223: 2\pi \sum_{k=1}^{s}\frac{f_k}{(k+1)!}\left( \Delta t_\SSB
224: \right)^{k+1}
225: \ee
226: %
227: where $\Delta t_\SSB \equiv t_\SSB - t_\SSB^{(0)}$, with $t_\SSB^{(0)}$
228: being a fiducial start time; $\Phi_0$, $f_0$ and $f_k$ are respectively the
229: phase, frequency, and spin-down parameters at the start time, and $s$ is the
230: number of spin-down parameters that we search over.  Assuming that the
231: pulsar is isolated and emitting GWs due to a small
232: deviation from axisymmetry, the waveforms for the two polarizations are
233: %
234: \ba
235: h_+ &=& \frac{1}{2}h_0(1+\cos^2\iota)\cos\Phi(t)\, ,
236: \label{eq:waveformplus} \\  h_\times  &=&
237: h_0\cos\iota \sin\Phi(t) \label{eq:waveformcross}
238: \ea
239: %
240: where $h_0$ represents the angle-independent amplitude of the wave,
241: $\iota$ is the angle
242: between the spin-axis of the pulsar and the direction of the waves'
243: propagation, and
244: the frequency $f = \dot\Phi/2\pi $ of the emitted GWs is equal
245: to twice the rotational frequency of the star.
246: 
247: Let $\mathbf{n}$ be the unit vector pointing from the Solar System
248: toward the pulsar,  $\mathbf{r}(t)$ be the position of the detector in
249: the SSB frame, and $\mathbf{v}(t)$ be its velocity with $t$ being the
250: time in the detector frame.  Ignoring relativistic
251: corrections~\footnote{Of course an actual search should must take into
252:   account the so-called 
253: Einstein and Shapiro delays, but these are unimportant for the question
254: of how the search is most efficiently {\it organized}, which is the
255: focus of this paper.}, a wave
256: reaching the Sun at time $t_\SSB$ will reach the detector at time
257: %
258: \be \label{eq:tssb} t = t_\SSB -
259: \frac{\mathbf{r}(t)\cdot\mathbf{n}}{c} \,.\ee
260: %
261: As seen from Eqs.~(\ref{eq:phase}) and (\ref{eq:tssb}), to a good
262: approximation, the instantaneous frequency of the
263: signal as seen by the detector is given by the familiar Doppler shift
264: expression
265: %
266: \be \label{eq:doppler}
267: f(t) = \hat{f}(t) \left( 1 +
268: \frac{\mathbf{v}(t)\cdot\mathbf{n}}{c}\right)
269: \ee
270: %
271: where $\hat{f}(t)$ is the instantaneous frequency of the signal in the
272: SSB frame, and is given by
273: %
274: \be \label{eq:fhat}
275: \hat{f}(t) = f_0 + \sum_{k=1}^{s}\frac{f_k}{k!}\left( \Delta t_\SSB \right)^{k}\,.
276: \ee
277: %
278: Eqs.~(\ref{eq:doppler}) and (\ref{eq:fhat}) describe the frequency
279: modulation of the received signal.
280: The received signal is also amplitude modulated by the
281: time-changing antenna pattern of the detector as it is carried along
282: by the Earth's rotation.
283: The received signal $h(t)$ is a linear
284: combination of $h_+$ and $h_\times$:
285: %
286: \be\label{eq:detoutput}
287: h(t) = F_+(\n,\psi)h_+(t) + F_\times(\n,\psi)h_\times(t)
288: \ee
289: %
290: where $\psi$ is the polarization angle of the signal, and $F_{+,\times}$ are
291: the antenna pattern functions.  Due to the motion of the Earth, the $F_{+,\times}$
292: depend implicitly on time:
293: %
294: \ba F_+(t) &=& a(t)\cos 2\psi + b(t)\sin 2\psi  \\
295: F_\times (t) &=& b(t)\cos 2\psi - a(t)\sin 2\psi \ea
296: %
297: where the functions $a(t)$ and $b(t)$ are independent of $\psi$.
298: (In these equations, the angle between the arms of the detector is
299: taken to be $\pi/2$.)
300: We refer the reader to \cite{jks} for
301: explicit expressions for $a(t)$ and $b(t)$.
302: 
303: 
304: The modulated frequency is described by the $s+3$ parameters
305: consisting of
306: $f_0$ and $\vec{\lambda}:= (\mathbf{n}, \{f_k\}_{k=1\ldots s})$; we
307: shall often denote the pair $(f_0,\vec{\lambda})$ by the boldface symbol
308: $\bmath{\lambda}$.  Apart from the parameters $\bmath{\lambda}$, the
309: waveform (\ref{eq:detoutput})
310: depends on other parameters: the pulsar's orientation $\iota$,
311: polarization angle $\psi$, the initial phase $\Phi_0$, and the
312: amplitude $h_0$.  The optimal matched filter statistic
313: \cite{jks} for detecting the waveform must, in principle, search over
314: the entire parameter space $(\bmath{\lambda},\iota,
315: \psi,\Phi_0,h_0)$.  However, it turns out that the computationally
316: challenging part of the search involves just the $\mathbf{\lambda}$; the
317: optimization of over $(\iota,
318: \psi, \Phi_0, h_0)$ can be done analytically,
319: by means of the
320: $\F$-statistic defined in \cite{jks}.  The $\F$-statistic {\it is}
321: the optimal matched filter statistic maximized over $(\iota,
322: \psi, \Phi_0, h_0)$.  It is therefore only a function of
323: $(f_0,\vec{\lambda})$ and it is given by
324: %
325: \be\label{eq:fstatdef}
326: \F(f_0,\vec{\lambda}) = 4\left[\frac{B|F_a|^2 +
327: A|F_b|^2 - 2C\mathcal{R}(F_aF_b^\star)}{\Tcoh S_n(f_0) D} \right]
328: \ee
329: %
330: where $S_n(f)$ is the single-sided
331: power spectral density of the detector noise $n(t)$, and
332: %
333: \begin{eqnarray}
334: F_a &=& \int_{-\Tcoh/2}^{\Tcoh/2}
335: x(t)a(t)e^{-i\Phi(t;\bmaths{\lambda})}\,dt \, ,\label{eq:Fa}\\
336: F_b &=& \int_{-\Tcoh/2}^{\Tcoh/2}
337: x(t)b(t)e^{-i\Phi(t;\bmaths{\lambda})}\,dt \, , \label{eq:Fb} \\
338: A &=& (a||a)\,, \qquad B = (b||b) \,,\\
339: C &=& (a||b)\,,\qquad D = AB-C^2\,.
340: \end{eqnarray}
341: %
342: Here we have used the notation
343: %
344: \begin{equation}
345: (x||y) = \frac{2}{\Tcoh}\int_{-\Tcoh/2}^{\Tcoh/2} x(t)y(t)\,dt\,.
346: \end{equation}
347: %
348: In cases where the amplitude modulation can be ignored (e.g., for short
349: data segments, $<<1\,$ day long,  where the
350: $a(t)$ and $b(t)$ can be approximated as constant), we see that $\F$ is
351: proportional to the demodulated Fourier transform which matches just
352: the phase evolution:
353: %
354: \be \label{eq:approxF}
355: \F \propto |\tilde{X}(f,\vec{\lambda})|^2
356: \ee
357: %
358: where
359: %
360: \be \label{eq:demodulated}
361: \tilde{X}(f,\lambda) = \int_{-\Tcoh/2}^{\Tcoh/2}x(t)e^{-i\Phi(t;\bmaths{\lambda})}\,dt\,.
362: \ee
363: %
364: The $\F$-statistic is the optimal (frequentist) detection
365: statistic for GW pulsars, and it is at the core of
366: some algoriths currently used to search for GW pulsars
367: in  LIGO and GEO data~\cite{S1:pulsar}.
368: Some important properties of the
369: $\F$-statistic are reviewed in section IV.B, and
370: a more detailed description can be found in \cite{jks}.
371: 
372: 
373: \subsection{The Stack-Slide algorithm}
374: \label{subsec:stackslide}
375: 
376: The stack-slide algorithm is best described with reference to the Doppler
377: shift formula of Eq.~(\ref{eq:doppler}). Imagine we have a data
378: stream $x(t)$ covering an observation time $\Tobs$, and we wish to
379: search for a GW pulsar with some parameters $\bmath{\lambda}$.  We break up the
380: data into $N$ smaller segments of length $\Tcoh=\Tobs/N$, and
381: calculate the Fourier spectrum of each segment. For now we assume each
382: segment is sufficiently short that the signal frequency remains
383: confined to a single discrete frequency bin.  If there is a signal present, it
384: will most likely be too weak to show up in a single segment with any
385: significant signal-to-noise-ratio (SNR).  However, we can increase the
386: SNR by adding the power from the different segments.  We must {\it not}
387: use the the same frequency bin from each segment,
388: but rather must follow
389: the frequency evolution given by Eq.~(\ref{eq:doppler}).  Thus,
390: we `stack' the power after `sliding' each segment in frequency space.
391: Note that the sliding depends on $\vec{\lambda}$.  Thus, in practice, we
392: choose a grid in the space of $\vec{\lambda}$'s and the sliding is done
393: differently at each grid point.
394: 
395: As described above, the sensitivity of the stack-slide algorithm is
396: restricted due
397: to the length of $\Tcoh$; we should not take $\Tcoh$ to be too large, since then
398: we would lose SNR due to the signal power being spread over several frequency bins.
399: However, we can gather all the signal power back into a
400: single bin by taking account of the Doppler modulation and spin-down while
401: calculating the spectrum of a segment; i.e., we de-modulate each data
402: segment before summing.
403: 
404: With these concepts at hand, we can now describe the stack-slide
405: search for the $\F$-statistic.   The strategy is very similar to the
406: power summing method described earlier in this section.  Again we
407: break up the data of length $\Tobs$ into $N$ segments, each of length
408: $\Tcoh = \Tobs/N$.  We choose a point $\vec{\lambda}_d$ in
409: parameter space, and demodulate the signal accordingly.  We calculate
410: $\F(f,\vec{\lambda}_d)$ as a function of the frequency for each
411: segment and add the $\F$-statistic values after sliding the different
412: segments in frequency space appropriately.
413: 
414: As explained in BC, the resolution of sky- and
415: spin-down-space that suffices for the demodulation is not
416: fine enough for for the stack-slide step. Thus at each stage
417: we two grids on parameter space: a coarse one for
418: performing the short-segment demodulations and a fine one for
419: sliding and stacking the short-segment results.
420: We refer the reader to \cite{ip}
421: and the appendix of \cite{hough} for a detailed
422: derivation of the formula relating the required amount of sliding
423: to the parameters $\vec{\lambda}_d$.
424: 
425: 
426: \section{A multistage hierarchical search}
427: \label{sec:hierarchical}
428: 
429: 
430: \subsection{The general algorithm}
431: \label{subsec:algorithm}
432: 
433: The stack-slide search algorithm described in the previous section has
434: two components: 1) calculation of the $\F$-statistic for data stretches of length
435: $\Tcoh$, 2) summation of the resulting $\F$ values along the appropriate tracks in the time-frequency plane.
436: (If there are $N$ coherently analyzed segments, then the sums have  $N$ terms.)
437: If we had unlimited computational resources, we would
438: simply do a fully coherent search on all the data; i.e., set $N=1$ and take $\Tcoh$
439: to be the entire observation time.
440: However, the number of templates required for a fully
441: coherent search increases as a high power of $\Tcoh$, making this
442: impractical for all-sky searches.
443: 
444: To illustrate this point, consider an all-sky search for young, fast
445: pulsars, i.e., GW pulsars that have a spin-down age as short as $\tau_\min
446: = 40\,$yr and that emit GWs with frequency up to
447: $f_\max = 1000\,$Hz.
448: Let us assume that we have $30\,$days of data available to
449: us.  Imagine two different ways of looking for this pulsar: a full
450: $30-$day coherent integration versus a semi-coherent method where the
451: available data is broken up into 30 equal segments.  The formula for
452: the number of templates required for these searches is given below in
453: Eq~(\ref{eq:Np}).  It turns out that the full coherent search
454: requires $\sim 4.2\times 10^{15}$ templates if we are to not lose more
455: than $30\%$ of the signal power.  On the other hand, the semi-coherent
456: search requires only $\sim 1.5\times 10^{11}$ templates for the same
457: allowed fractional loss in signal power.  The ratio of the
458: the number of templates required for the two types of searches
459: increases rapidly with the observation time; for instance, for an
460: observation time of $40\,$ days, the corresponding numbers are $\sim 5.5 \times
461: 10^{16}$ and $\sim 8.3 \times 10^{11}$ for the full coherent and
462: semi-coherent searches respectively.
463: 
464: As illustrated by the above example, semi-coherent searches for unknown
465: GW pulsars are a compromise
466: forced upon us by limited computing power. Such searches will remain
467: computationally limited for the foreseeable future, so it behooves us to
468: organize them as efficiently as possible. In this paper we consider
469: a class of multistage, hierarchical  search algorithms.
470: Since our main ``problem'' is the
471: large volume of parameter space we need to search over,
472: the basic idea behind these algorithms is to identify and  discard
473: unpromising regions of parameter space
474: as fast as possible--without discarding real signals.
475: The type of scheme we consider is illustrated schematically in
476: Fig.~\ref{fig:hierarchical}.
477: The first stage is a semi-coherent search through some fraction of the
478: available data. A threshold is set, and candidates exceeding
479: this threshold are passed to the next stage.
480: The second stage is similar to the
481: first, but includes additional data and generally entails a finer
482: resolution of parameter space.
483: (The latter means that any candidate that survives the first semi-coherent
484: stage gives rise to a little crowd of nearby candidates that are
485: examined in the second semi-coherent stage.)
486:  Any candidate that exceeds the
487: second-stage threshold is passed on to the third stage, and so on.
488: In an $(n+1)$-stage search, any candidate surviving all $n$ semi-coherent
489: selections is subjected to a final, coherent search (which we consider
490: the $(n+1)^{\rm th}$ stage); if the final,
491: coherent threshold is exceeded, then a detection is announced.
492: We impose as a constraint that the false alarm (FA) rate for the entire search
493: must be $< 1\%$; i.e., if the data is actually just noise, then
494: the probability that a detection is announced must be $< 1\%$.
495: For reasons explained below, in realistic examples
496: this inequality is all too easy to satisfy ;
497: the actual FA rate for our optimized searches
498: is typically smaller than $1\%$ by many orders of magnitude.
499: 
500: In the end, our search will be able to detect
501: a GW pulsar signal whose rms strength (at the detector) $h_\RMS$
502: exceeds some threshold value $h_{\th}$
503: (with a false dismissal rate of $10--15\%$).
504: We can think of $1/h_{\th}$
505: as the search's sensitivity.
506: We will optimize our search to get the maximum
507: sensitivity for any given computing power or, equivalently, to
508: find the minimum computer power necessary to attain any
509: given sensitivity.
510: 
511: %
512: \begin{figure}
513:   \begin{center}
514:   \includegraphics[width=\columnwidth]{hierarchical.eps}
515:   \caption{A hierarchical scheme for the analysis of large parameter space
516:   volumes for continuous wave searches.  Each step analyzes only those
517:   regions in parameter space that have not been discarded by any of
518:   the previous steps.}\label{fig:hierarchical}
519:   \end{center}
520: \end{figure}
521: %
522: 
523: The problem of optimizing a semi-coherent, hierarchical search
524: scheme for GW pulsars was first studied by BC.  The
525: present study builds upon the BC formalism, but there are also
526: some important differences. We call attention to the following ones:
527: %
528: \begin{description}
529: \item[1)] BC consider a hierarchical search
530: consisting of exactly two semi-coherent stages.
531: In the present work, we consider a
532: search consisting of an arbitrary number of semi-coherent steps, plus a
533: fully coherent, ``follow-up'' stage (utilizing all the available data)
534: to assess the significance of any surviving candidates.
535: The effect of the final, follow-up stage is to ensure
536: that the overall false alarm rate (fixed at exactly $1\%$ by BC) is
537: greatly reduced and, for all practical purposes, ceases to be a
538: constraint.
539: \item[2)] In BC's second semi-coherent
540: stage, all the data used in the first-stage is reanalyzed, along with some
541: ``fresh'' (as yet unanalyzed) data. {\it A priori}, it is not clear whether
542: this strategy is more efficient than one in which each semi-coherent stage
543: analyzes only fresh data. (E.g, the first stage analyzes $20$
544: days of data and generates candidates, the second semi-coherent stage
545: searches for  those candidates
546: in the {\it next} $50$ days of data and generates a list of candidates that
547: have still ``survived'', these survivors are searched for again in the
548: {\it next} $150$ days of data, etc.)
549: In this paper, we investigate both kinds of schemes: schemes where
550: previously analyzed data is always {\it recycled} into subsequent
551: stages, and schemes where each semi-coherent stage analyzes only fresh data.
552: 
553: \item[3)] For simplicity, BC ignored the fact that the GWs have
554: two possible polarizations (in effect pretending that the detectors measure a
555: scalar wave). This is a reasonable approximation when estimating
556: the number of grid-points needed to cover the parameter space, but
557: not, say, when trying to estimate the FA and FD rates
558: as a function of the threshold at some intermediate stage in the
559: search. (Roughly speaking, scalar waves with the same matched-filter SNR
560: would be easier to detect than actual GWs, since with GWs the full
561: SNR is ``split'' between the two polarizations, in a way that is
562: unknown {\it a priori}.) In this paper we aim to
563: make realistic estimates of a GW pulsar's detectability for a
564: given matched-filter SNR (and given region of parameter space to be
565: searched over), so we take polarization into account wherever it makes
566: a significant difference. In practice, this just means that we use
567: the $\F$-statistic, Eq.~(\ref{eq:fstatdef}), as our detection
568: statistic.
569: 
570: \item[4)] When estimating computational costs, BC assume that the
571: demodulations will be done using \emph{stroboscopic
572: re-sampling}, a method modeled closely on the FFT algorithm.
573: A different demodulation method, which we shall refer to as the
574: SFT method, is currently being used by the GW pulsar search codes in
575: the  LIGO Scientific Collaboration (LSC) software library~\cite{lscsoft}.
576: The SFT method takes as its input a  short FFT database
577: (FFT'ed sets of short-time data stretches), and can be more efficient
578: than \emph{stroboscopic re-sampling} in cases where only a narrow
579: frequency range of the demodulated time series is of interest.
580: In this paper we explore the possibility of using different demodulation methods at
581: different stages of the search, and attempt to find the most efficient
582: combination.
583: \end{description}
584: %
585: All the above points will be elaborated on in later sections of the
586: paper.
587: 
588: 
589: \subsection{The general optimization scheme}
590: \label{subsec:optimize}
591: 
592: In this section we further discuss our search algorithm and its
593: optimization.  First we establish some notation.  Let $n$ be the total
594: number of semi-coherent
595: stages.  Let $N^{\ib}$ be the number of stacks used in the $i^{\th}$
596: stage and $\Tcoh^{\ib}$ be the length of each stack; the superscript $i$
597: will always refer to the $i^{\th}$ semi-coherent stage.  The
598: resolution of the template grid used to cover parameter space is given
599: in terms of the maximum fractional mismatch in signal power
600: $\mu^\ib_\max$ \cite{owen}.  Our detection statistic is $\rho^{\ib}$,
601: the sum of the $\F$ values from the different stacks (obtained after
602: sliding appropriately):
603: %
604: \be
605: \rho^{\ib} = \sum_{k=1}^{N^{\ib}}\F_k^{\ib} \,.
606: \ee
607: %
608: Denote the distribution of $\rho^{\ib}$ in the absence of any signal
609: by $p(\rho^\ib)$. In the presence of a GW signal of amplitude $h_\RMS$,
610: let the distribution of $\rho^{\ib}$ at the gridpoint nearest the
611: actual signal be $p(\rho^\ib | h_\RMS,\mu_\max^{\ib})$.
612: Let $\rho_\th^\ib$ be the $i^{\th}$-stage threshold, which a
613: candidate must exceed to advance to the next stage.
614: that we use to reject candidates or advance them to the next stage.
615: The $i^{\th}$-stage FA rate  (per candidate) $\alpha^\ib$ and FD rate
616: (per candidate)
617: $\beta^\ib$ are given by
618: %
619: \ba
620: \alpha^\ib(\rho_\th^\ib) &=& \int_{\rho_\th^\ib}^{\infty}p(\rho)d\rho
621: \,, \label{eq:falsealarm}\\
622: \beta^\ib (\rho_\th^\ib; h_\RMS ,\mu_\max^{\ib}) &=& \int_0^{\rho_\th^\ib}
623: p(\rho|h_\RMS, \mu_\max^{\ib} )d\rho \,  .
624: \label{eq:falsediss}\ea
625: Practically identical formulae apply to the final, coherent stage as well.
626: 
627: Again, we require of any search algorithm
628: that, at the very
629: end of the search, it results in a false detection less
630: than $1\%$ of the time. Given this constraint, we parametrize the
631: search's sensitivity by the signal amplitude $h_{th}$ such that
632: an embedded signal with $h_\RMS > h_{th}$ would be detected
633: $\agt 85-90\%$ of the time. We enforce the latter condition as follows.
634: We set the first-stage threshold $h_{\th}$
635: such that a signal of amplitude
636: $h_\RMS = h_{\th}$ will pass to the second stage
637: $90\%$ of the time. At all subsequent stages we set the threshold
638: such that the same signal with strength $h_\RMS = h_{\th}$ has a
639: $99\%$ chance of passing to the next higher stage.
640: That is, we adjust the
641: the $i^{\th}$-stage threshold $\rho_\th^\ib$
642: so that $\beta^{(1)} = 0.10$, while $\beta^\ib = 0.01$ for $i > 1$,
643: and $\beta^{({\coh})} = 0.01$ as well.
644: The motivation behind making $\beta^{(1)}$ lower than
645: $\beta^\ib$ for $i > 1$ is the following: We believe that a computationally efficient
646: algorithm will have the property that a true signal that is strong
647: enough to pass the first-stage threshold {\it should} generally pass over all the
648: others. Any source that is not sufficiently strong to make it through to
649: the end of the detection pipeline should be discarded as soon as possible,
650: so as not to waste computing power.  This reflects the basic idea
651: behind our hierarchical searches: to eliminate unpromising regions of parameter
652: space as quickly as possible, so that computational resources can be
653: focused on the more promising regions.
654: Basically, the first-stage threshold determines the
655: sensitivity of the whole search, and
656: subsequent steps whittle down the number of candidates
657: (i.e., the number of small patches in parameter space that perhaps contain a
658: true signal) until any remaining patches can be fully, coherently analyzed.
659: 
660: To fully specify our search algorithm, we have to choose the
661: parameters $\Gamma \equiv (n,
662: \{N^\ib\}, \{\Tcoh^\ib\}, \{\mu_\max^\ib\}, \mu_\max^{coh})$,
663: where $n$ is the number of semi-coherent stages and
664: $i = 1,...,n$.
665: In doing so, we are subject to certain
666: requirements or constraints:
667: %
668: \begin{itemize}
669: \item The total amount of data
670: available is no more that some $T_\max$ (say, 1 year).
671: 
672: \item We wish to detect (with $\sim 90 \%$ FD rate and $< 1 \%$ overall
673: FA rate) any unknown signal of amplitude $h_\RMS$ greater than
674: $h_{th}$ (say, $10^{-26}$).
675: 
676: \end{itemize}
677: %
678: Our task is to choose the parameters $\Gamma$ that minimize the
679: total required computational power $P$, subject to the above constraints.
680: We arrive at a cost function $P(h_{th})$, the computational cost
681: of reaching any given sensitivity level. (Really, P is function
682: of the product $h^2_{th} T_{\rm max}/S_n(f)$, but we are regarding
683: $T_{\rm max}$ and $S_n(f)$ as fixed.)
684: We can immediately invert this function to
685: determine $h_{th}(P)$, the sensitivity achievable for any given
686: computing power.
687: 
688: Let us first deal with the constraint on the total amount of data.
689: We are going to consider simultaneously two different modes of all-sky
690: searches.
691: In ``data recycling mode,'' at each stage we start back at the beginning of the
692: data, but take progressively larger values of $N^{(i)}\Tcoh^{(i)}$.  Thus the
693: first stage looks at data in the interval $[T_0, T_0 + N^{(1)}\Tcoh^{(1)}]$, the
694: second stage looks at $[T_0, T_0 + N^{(2)}\Tcoh^{(2)}]$ and so on.  The total
695: observation time is thus
696: %
697: \begin{equation} \label{eq:tobsrecyc}
698: \Tobs = N^{(n)}\Tcoh^{(n)}\,.
699: \end{equation}
700: %
701: In ``fresh-data'' mode,
702: rather than always starting over from the beginning, we analyze fresh
703: data at each stage. The first stage looks at data in the range $[T_0, T_0 +
704: N^{(1)}\Tcoh^{(1)}]$, the second stage looks at  $[T_0 + N^{(1)}\Tcoh^{(1)}, T_0 +
705: N^{(1)}\Tcoh^{(1)} + N^{(2)}\Tcoh^{(2)}]$, etc. The total observation time is thus
706: %
707: \begin{equation}\label{eq:tobsfresh}
708: \Tobs = \sum_{i=1}^{n}N^{(i)}\Tcoh^{(i)}\,.
709: \end{equation}
710: %
711: In either data-recycling or fresh-data mode,
712: one constraint is that $\Tobs \leq T_\max$ where
713: $T_\max$ is the total amount of data available. Also, in either mode,
714: at each stage we look only at portions of parameter
715: space that exceeded the threshold set at the previous stage.
716: 
717: Next we consider our constraints on the overall FA and FD rates
718: for the pipeline.
719: The final, coherent follow-up stage is expected to be much more
720: sensitive than any of the preceding steps; therefore the overall FA rate
721: is essentially set by the final stage threshold alone.
722: (The earlier stages serve only
723: to whittle down the number of candidates, $N_{coh}$, that are
724: analyzed in the final coherent stage.)
725: If the threshold in the final follow-up stage is
726: $\rho_\th^{(coh)}$, then the overall FA rate is no larger than
727: ${\alpha}^{(coh)}(\rho_\th^{(coh)})$ times the number of effectively
728: independent candidates in parameter space. We approximate the latter, crudely,
729: by $\sim N_p(T_{\rm max}, 0.2, 1)$;
730: in practice ${\alpha}^{(coh)}(\rho_\th^{(coh)})$
731: turns out to be so minuscule that the crudeness of this approximation is
732: irrelevant.
733: 
734: The overall false dismissal requirement is also easily handled.  Let
735: $\tilde{\beta}$ be the total false dismissal rate of the multistage
736: search.  Each stage has its own threshold $\rho_\th^\ib$ and
737: corresponding false dismissal rate $\beta^\ib$.  If each stage,
738: including the follow-up stage, were to analyze completely independent
739: data, we would have
740: %
741: \be
742: \tilde{\beta} = 1 - \prod_{i=1}^{n+1} (1-\beta^\ib) \approx
743: \beta^{(1)} + \cdots \beta^{(n+1)} \,.
744: \ee
745: %
746: (where we use ``$\beta^{(n+1)}$'' interchangeably with $\beta^{(coh)}$).
747: In our fresh-data search mode, the data at different stages are
748: independent, {\it except} for the final, follow-up stage.
749: And in our recycled-data scheme, the
750: data examined at higher stages includes all the data examined in earlier
751: stages. Then when $\beta^{(1)}=0.1$ and
752: \be\label{eq:beta}
753: \beta^{(2)}= \beta^{(3)} = ... = \beta^{(n)} = \beta^{(coh)} = 0.01 \, ,
754: \ee
755: it is clear that $\tilde \beta$ is roughly in the range
756: $(10+n-1)\% $ to (10 + n)\% $, for fresh-data mode and $10\% $ to
757: (10 + n)\% $
758: for recycled-data mode. Since $n \approx 3$ turns out to be optimal
759: (see below), we crudely summarize this by saying that our
760: strategies have an overall FD rate of $10-15\%$ at the
761: threshold value of $h_\RMS$.
762: 
763: 
764: 
765: Finally, we turn to the search's computational cost, which we wish to
766: minimize. Let us denote the total number of floating point operations for the
767: $i^{th}$ semi-coherent stage by $C^\ib$ and for the final coherent stage by
768: $C^{(\coh)}$.  Expressions for $C^\ib$ and $C^{(\coh)}$ are given in the
769: next section. For now, it is sufficient to say the total computational
770: cost is
771: %
772: \be C_{total} = \big(\sum_{i=1}^n C^\ib\big) +
773: C^{(coh)}\, ,
774: \ee
775: %
776: and that if we wish to analyze the data in roughly real time,
777: the required computational power (operations per unit time) is
778: %
779: \begin{equation} \label{eq:comppower}
780: P = \frac{C_{total}}{\Tobs} \, .
781: \end{equation}
782: %
783: Depending on which mode we are working in, $\Tobs$ is given by
784: Eq.~(\ref{eq:tobsrecyc}) or Eq.~(\ref{eq:tobsfresh}).
785: 
786: Again, our strategy for optimizing the search is to minimize $P$,
787: subject to the constraints listed above.
788: 
789: \section{Template counting, confidence levels, and computational cost}
790: \label{sec:formulae}
791: 
792: \subsection{Template counting formulae}
793: \label{subsec:metric}
794: 
795: This section gives the template counting formulae originally derived
796: by BC using the metric formulation developed in \cite{owen}.
797: 
798: For simplicity, the parameter space is covered by spheres of proper radius
799: $\sqrt{\mu_\max}$ ($\mu_\max$ is the maximum allowed fractional
800: mismatch in the detection statistic \cite{owen}) using a cubic grid.
801: However it is worth keeping in mind that a cubic grid over-estimates the
802: number of required templates even in two
803: dimensions, and the difference increases rapidly with the
804: dimensionality \cite{jhc}.
805: 
806: As mentioned earlier, for each semi-coherent stage,
807: we have a coarse grid for the demodulation and a fine grid for the
808: stack-slide analysis.
809: Following BC, for simplicity we shall require that at any given
810: semi-coherent stage, the maximal mismatch $\mu_\max$ for the fine
811: grid is the same as $\mu_\max$ for the coarse one.
812: However (unlike BC), we allow $\mu_\max$ to vary from one stage to
813: the next.
814: 
815: The number of templates (or gridpoints) $N_p$ is a function of
816: the mismatch $\mu_\max$, the coherent time baseline $\Tcoh$, and the
817: number of stacks $N$ (which is unity for the coarse grid).
818: BC have derived the following expressions for the number of
819: gridpoints, $N_{pc}$ and $N_{pf}$, in the coarse and fine grids,
820: respectively:
821: \ba
822: N_{pc} &=& N_p(\Tcoh, \mu_\max, 1)\,, \label{eq:npc}\\
823: N_{pf} &=& N_p(\Tcoh, \mu_\max, N)\,. \label{eq:npf}
824: \ea
825: where $N_p$ is given in Eq.~(2.22) of BC:
826: %
827: \be \label{eq:Np}
828: N_p = \mathop{\textrm{max}}_{s\in
829:   \{0,1,2,3\}}\left[\mathcal{M}_s\mathcal{N}_sG_s\prod_{k=0}^s\left(1+
830:   \frac{0.3r\Omega^{k+1}\tau_\min^k}{c\,k!\sqrt{\mathcal{M}_s}}\right)\right]  \, .
831: \ee
832: %
833: Here $r=1\,$AU is Earth's orbital radius, $\Omega = 2\pi/(1 {\rm yr})$,
834: %
835: \be
836: \mathcal{N}_s = \frac{s^{s/2}}{(s+2)^{s/2}}\frac{f_\max^s
837:   \Tcoh^{s(s+3)/2}}{(\mu_\max/s)^{s/2}\tau_\min^{s(s+1)/2}}\, ,
838: \ee
839: \be
840: \mathcal{M}_s = \big(\frac{f_\max}{1 {\rm Hz}}\big)^2\frac{(s+2)}{4\mu_\max}\left( \frac{1}{A^2} +
841: \frac{1}{B^2} + \frac{1}{C^2}\right)^{-1/2}\, ,
842: \ee
843: where
844: \be
845: A = 0.014\ , B = 0.046\left(\frac{\Tcoh}{1\textrm{ day}}\right)^2 \,
846: C = 0.18\left(\frac{\Tcoh}{1\textrm{ day}}\right)^5 \,
847: \ee
848: and the functions $G_s$ are given in Appendix A of BC.
849: Roughly speaking, the factor $\mathcal{M}_s$ counts distinct patches
850: on the sky as set by the Earth's one-day spin period, $\mathcal{N}_s$
851: counts distinct ``patches'' in the space of spin-down parameters,
852: the $G_s$ give the dependence of $N_p$ on the number of stacks, $N$, and the
853: factors of the form $\left(1+
854:   \frac{0.3r\Omega^{k+1}\tau_\min^k}{c\,k!\sqrt{\mathcal{M}_s}}\right)$
855: effectively account for the increase of search volume required when
856: the frequency derivative $d^kf/dt^k$ is dominated by the Doppler shift
857: from the Earth's motion around the Sun rather than by the pulsar's
858: intrinsic spin-down. In our numerical work we use the full expressions for
859: the $G_s$ given in the Appendix A of BC, but for completeness we
860: note that BC also give the following approximate fits to the
861: $G_s$, which are valid when  $N\gg 4$:
862: %
863: \ba
864: G_0(N) &= & 1 \,,\\
865: G_1(N) &\approx & 0.524N \,,\\
866: G_2(N) &\approx & 0.0708N^3 \,,\\
867: G_3(N) &\approx & 0.00243N^6 \,.
868: \ea
869: %
870: The $N_p$ results in BC were derived under the assumption that the
871: observation time is significantly less than one year.  As
872: we shall see below, in the cases where the
873: total available data covers an observation time of a year or more,
874: it turns out
875: that for the optimal search, the initial semi-coherent stages
876: typically analyze a few days' to a few months'
877: worth of data.
878: Also, most of the search's computational cost
879: is spent on these early stages. (This is especially true
880: for the young-pulsar search, which is the most computationally challenging.)
881: Therefore, it seems reasonable for our purposes
882: to simply use the $N_p$ formulae from BC for {\it all} observation times.
883: Since the cost-errors we make by using the BS formulae
884: will be confined to the later
885: stages, and since the overall sensitivity of the search is effectively set
886: at the first stage, we believe these errors will not significantly
887: affect the total computational cost, for fixed threshold (though
888: they may affect the relative allocation of resources between
889: the different stage).
890: Of course, the validity of this assumption can only really
891: be checked by re-doing the calculation
892: using more accurate expressions for the $N_p$'s,
893: appropriate for year-long observation times, but unfortunately such
894: expressions are not currently available.
895: 
896: Even for short observation times, the
897: $N_p$ calculation in BC used the approximation
898: (\ref{eq:approxF}), which neglects the amplitude
899: modulation of the signal; however this approximation is not expected to cause
900: significant errors in estimating template numbers.
901: 
902: 
903: 
904: \subsection{False dismissal rates and the thresholds}
905: \label{subsec:fd}
906: 
907: 
908: In this subsection, we discuss the statistical properties of the
909: stack-slide search and solve the false dismissal constraint to obtain
910: expressions for the thresholds.
911: 
912: It is shown in \cite{jks} that the distribution of the $\F$-statistic
913: (or to be more precise, $2\F$), for each coherent search, is given by
914: a non-central $\chi^2$
915: distribution.  The non-centrality parameter $\eta$ is given in terms
916: of the signal $h(t)$ by:
917: %
918: \ba
919: \eta &=& 4 \left(1-\frac{\mu_\max}{3}\right)
920: \int_0^\infty \frac{|\tilde{h}(f)|^2}{S_n(f)} df \nonumber \\
921: &=& \left(1-\frac{\mu_\max}{3}\right)\frac{2 h_\RMS^2\Tcoh}{S_n(f)}\,,
922: \label{eta}
923: \ea
924: %
925: where $\tilde{h}(f)$ is the Fourier transform of $h(t)$.
926: We have included a fitting factor of $1-\mu_\max/3$ to account for
927: the average loss in power due to the mismatch between the signal and
928: template.
929: $h_\RMS$ is the root-mean-square value of the signal $h(t)$.
930: We can relate $h_\RMS$
931: to the amplitude $h_\RMS$ defined in Eqs.~(\ref{eq:waveformplus}) and
932: (\ref{eq:waveformcross}), as follows. If one averages
933: $h_\RMS$ over all sky-positions as well as over the polarization
934: parameters $\iota$ and $\psi$, one obtains
935: $<h^2_\RMS> = (2/25)h^2_0$ (see Eq. (93) of \cite{jks}).
936: 
937: More explicitly, the distribution is
938: %
939: \ba
940: p(\F|\eta) &=& 2\chi^2(2\F|\eta,4) \nonumber \\ &=&
941: \left(\frac{2\F}{\eta}\right)^{1/2} I_1(\sqrt{2\F\eta})e^{-\F-\eta/2}
942: \ea
943: %
944: where $\chi^2(\cdot|\eta, \nu)$ is the $\chi^2$ distribution with
945: $\nu$ degrees of freedom and non-centrality parameter $\eta$, and
946: $I_1$ is the modified Bessel function of first order.
947: The statistic $\rho$ of interest for the stack-slide
948: search is the sum of the $\F$-statistic over $N$ stacks.
949: Assuming the $\F$-statistic for the $N$ stacks to be statistically
950: independent, $2\rho$
951: must follow a $\chi^2$ distribution with $4N$ degrees of freedom and
952: non-centrality parameter $N\eta$
953: %
954: \be
955: p(\rho|\eta,N) = 2\chi^2(2\rho|N\eta,4N)\,.
956: \ee
957: %
958: The mean and variance of $\rho$ are given respectively by
959: %
960: \be \label{eq:meanvar}
961: \bar{\rho} = 2N + \frac{N\eta}{2} \,, \qquad \sigma_\rho^2 = 2N + N\eta\,.
962: \ee
963: %
964: Using the distribution $p(\rho^\ib)$, the false alarm rate
965: for the $i^{th}$ semi-coherent stage (defined in
966: Eq.~(\ref{eq:falsealarm})) can be evaluated analytically:
967: %
968: \begin{equation}
969: \alpha^\ib(\rho_\th^\ib) =
970: e^{-\rho_\th}\sum_{k=0}^{2N^\ib-1}\frac{(\rho_\th^\ib)^k}{k!}  \,. \label{eq:fa-analytical}
971: \end{equation}
972: %
973: As discussed earlier, the overall false alarm probability $\tilde{\alpha}$
974: for the search is set by the final coherent follow-up stage.  For this
975: stage, $N=1$ so that if the threshold on $\rho$ is $\rho_\th^{(coh)}$,
976: then it is easy to see from the previous equation that:
977: %
978: \be \label{eq:alphacoh}
979: \tilde{\alpha} = (1+\rho_\th^{(coh)})e^{-\rho_\th^{(coh)}}\,.
980: \ee
981: %
982: In the presence of a signal, the non-central $\chi^2$ distribution for
983: $\rho$ is a little cumbersome to work with, and it is useful to
984: replace it by a Gaussian with the appropriate mean and variance.  So
985: we say that the distribution of $\rho$ must be approximately Gaussian
986: with mean and variance as in eq. (\ref{eq:meanvar}):
987: %
988: \be
989: p(\rho|\eta,N) = \frac{1}{\sqrt{2\pi\sigma_\rho^2}}
990: \,\,e^{-(\rho-\bar{\rho})^2/2\sigma_\rho^2} \, .
991: \ee
992: %
993: This approximation is not valid when $N$ is of order unity.
994: Then for any given $h_{\th}$, we should set the threshold of the
995: $i^{th}$ stage, $\rho^{\ib}$ by the false dismissal requirement:
996: %
997: \be\label{rhoi}
998: \int_{0}^{\rho^{\ib}} p(\rho|\eta^{\ib}_\th,N) d\rho = \beta^\ib \,,
999: \ee
1000: %
1001: where
1002: %
1003: \be
1004: \eta^{\ib}_\th := \left(1-\frac{\mu_\max}{3}\right)\frac{2h_\th^2\Tcoh^{\ib}}{S_n(f)}\,.
1005: \ee
1006: %
1007: Here the factor of $1-\mu_\max^\ib/3$ accounts for the average
1008: loss in power due to the mismatch between the signal parameters and
1009: nearest gridpoint parameters.
1010: Eq.~(\ref{rhoi}) can be solved to find $\rho^\ib_\th$ as a function
1011: of $h_{\th}$, $\Tcoh^{\ib}$, and $\mu^{\ib}$.
1012: Or equivalently, it gives
1013: $h_{\th} = h_{\th}(\rho^{\ib}, \Tcoh^{\ib}, \mu^{\ib})$.
1014: This equation can easily be solved by using the properties of the
1015: complementary error function.  By changing variables in the integral,
1016: we can rewrite the false dismissal rate as
1017: %
1018: \be
1019: \beta^\ib =
1020: \frac{1}{2}\textrm{erfc}\left(\frac{\bar{\rho}^\ib-\rho^\ib}{\sqrt{2}\sigma_\rho^\ib}\right)\,.
1021: \ee
1022: %
1023: If $h_\th$ is the smallest value of $h_\RMS$ for which the false
1024: dismissal rate is no bigger than $\beta^\ib$, then we have
1025: %
1026: \ba\label{eq:thresholdsolve}
1027: \rho^\ib(h_\th) &=& \bar{\rho}^\ib -
1028: \sqrt{2}\sigma_\rho^\ib\,\textrm{erfc}^{-1}(2\beta^\ib) \nonumber \\
1029: &\approx& 2N^\ib\left[1 + \frac{\eta_\th^\ib}{4}\right] \nonumber \\ &-&
1030: 2\textrm{erfc}^{-1}(2\beta^\ib) \sqrt{{N}^\ib}\sqrt{1 +\frac{\eta_\th^\ib}{2}}\,.
1031: \ea
1032: %
1033: 
1034: In practice, we fix one value of $h_\th$ (our sensitivity goal)
1035: for an an entire search,
1036: and we then set the threshold $\rho^{\ib}$ at each stage by solving
1037: Eq.~(\ref{eq:thresholdsolve}), with the false dismissal rates set by
1038: $\beta^{(1)} = 0.1$ and $\beta^\ib = \beta^{\rm coh} = 0.01$ for
1039: $i\geq 2$.
1040: Our rationale for this choice is as follows.
1041: At each stage, one can estimate the signal strength of any
1042: successful candidate. If after the first stage, one can already predict
1043: that a candidate is not strong enough to pass over the threshold at the
1044: second or a higher stage, then one might as well discard it immediately
1045: and so not waste computer power on a likely failure.
1046: Put the other way, an efficient algorithm should ensure that
1047: a true signal that is strong enough to pass over the first stage is also
1048: strong enough to pass over all subsequent stages. Then the
1049: false dismissal rate for the whole search will be only a little larger
1050: than the FD rate of the first stage alone, or a little more than $10\%$.
1051: (An {\it overestimate} of the total FD rate is the sum of the rates for
1052: each of the stages, or $13\%$ for a 3-stage search.)
1053: 
1054: 
1055: \subsection{Computational Cost}
1056: \label{subsec:cost}
1057: 
1058: Let us begin with the first semi-coherent stage.  Here, the number of
1059: points in the
1060: coarse and fine grids are respectively
1061: %
1062: \ba
1063: N^{(1)}_{pc} &=& N_p(\Tcoh^{(1)},\mu_\max^{(1)}, 1) \,, \\
1064: N^{(1)}_{pf} &=& N_p(\Tcoh^{(1)},\mu_\max^{(1)},N^{(1)}) \,.
1065: \ea
1066: %
1067: If we are searching in a frequency range from small frequencies up to $f_\max$,
1068: the data must be sampled in the time domain (at least) at the Nyquist
1069: frequency $2f_\max$.  The minimum number of data points that we must
1070: start out with in the time domain is then $2f_\max\Tcoh$. To calculate
1071: the $\F$-statistic for each stack, we need to first calculate the
1072: quantities $F_a$ and $F_b$ which appear in equation
1073: (\ref{eq:fstatdef}).  We describe two methods below which
1074: may be called the \emph{stroboscopic resampling method} and the
1075: \emph{SFT method}. Given $F_a$ and $F_b$, the cost of combining
1076: them to get $\F$ is negligible.\\
1077: 
1078: 
1079: \noindent \textit{The stroboscopic resampling method:} The method
1080: suggested in \cite{jks} (and also in \cite{bc}) is based on the
1081: observation that the integrals in Eqs.~(\ref{eq:Fa}) and (\ref{eq:Fb})
1082: look \emph{almost} like a Fourier transform; the difference being the
1083: form of $\Phi(t)$ in the exponential.  However, by suitably resampling
1084: the time series, effectively redefining the time variable so that the
1085: spectrum of a real signal would look like a spike in a single frequency
1086: bin, the integral can be written as a Fourier transform and we can
1087: then use the FFT algorithm.  Since the cost of calculating an FFT for
1088: a time series containing $m$ data points is $3m\log_2 m$, the operations
1089: cost of
1090: calculating the $\F$-statistic for each stack should be approximately
1091: $12f_\max \Tcoh\log_2(2f_\max\Tcoh)$.  Repeating this for $N^{(1)}$
1092: stacks and for each point in the coarse grid, we see that the total
1093: cost of calculating $F_a$ and $F_b$, and therefore the $\F$-statistic,
1094: is approximately
1095: %
1096: \be
1097: 12N^{(1)}N^{(1)}_{pc}f_\max\Tcoh^{(1)}\log_2(2f_\max\Tcoh^{(1)})\,.
1098: \ee
1099: %
1100: We now need to appropriately slide each segment in frequency space and
1101: stack them up, i.e. add the $\F$-statistic values from each
1102: stack to get our final statistic $\rho$.  This has to be done for each
1103: point in the fine grid.  The cost of sliding is negligible and we need
1104: only consider the cost of adding the $\F$-statistic values.
1105: Since adding $N^{(1)}$ real
1106: numbers requires $N^{(1)}-1$ floating point operations, we see that
1107: the cost of stacking and sliding for all frequency bins and for all
1108: points in the fine grid is approximately
1109: %
1110: \be
1111: f_\max\Tcoh^{(1)}{N_{pf}^{(1)}}
1112: (N^{(1)}-1)  \,.
1113: \ee
1114: %
1115: Thus, the computational cost for the first semi-coherent stage is
1116: %
1117: \ba\label{eq:hierarchical-cost-1}
1118: C^{(1)}_\res &=& f_\max\Tcoh^{(1)}N_{pc}^{(1)}\left[
1119: 12N^{(1)}\frac{\log(2f_\max\Tcoh^{(1)})}{\log 2} \right. \nonumber \\
1120: &+& \left.
1121: \frac{N_{pf}^{(1)}}{N_{pc}^{(1)}}(N^{(1)}-1)  \right] \,.
1122: \ea
1123: %
1124: The subscript $\res$ indicates that this result is for the
1125: stroboscopic resampling method.\\
1126: 
1127: \noindent \textit{The SFT method:} An alternative method is to
1128: use as input not the time series, but rather a bank of short time
1129: baseline Fourier Transforms (SFTs).  This is in
1130: fact the method currently being used in the search codes of the LIGO
1131: Scientific Collaboration \cite{lscsoft}.
1132: Here one first breaks up the data into short segments of length $\Tsft$,
1133: and calculates the Fourier transform of each
1134: segment. (These segments, which are to be combined \emph{coherently}, are
1135: not to be confused with the segments used in the stack-slide algorithm
1136: which are combined incoherently).  $\Tsft$ should be
1137: short enough so that the signal does not drift by more than half a
1138: frequency bin over this time.
1139: Typical values of $\Tsft$ are $1800$s.
1140: The exact method of calculating the
1141: $\F$-statistic from an SFT database is sketched in Appendix A, and
1142: the operations count is also derived there. The result is
1143: (see Eq.~(\ref{eq:sftcostapp})):
1144: %
1145: \be
1146: \approx 640 N^{(1)}N_{pc}^{(1)}f_\max \frac{(\Tcoh^{(1)})^2}{\Tsft}
1147: \,\,\textrm{Flops}\,.
1148: \ee
1149: %
1150: Note that the SFT method of calculating the $\F$-statistic is
1151: $\mathcal{O}((\Tcoh^{(1)})^2)$ while for the stroboscopic resampling
1152: method it is
1153: $\mathcal{O}(\Tcoh^{(1)}\log \Tcoh^{(1)})$.
1154: 
1155: 
1156: The total cost of stacking and sliding in the first hierarchical stage
1157: using the SFT method is thus:
1158: %
1159: \ba\label{eq:c1sft}
1160: C^{(1)}_\sft &=& f_\max\Tcoh^{(1)}N_{pc}^{(1)}\left[ \frac{640 N^{(1)}
1161: \Tcoh^{(1)}}{\Tsft} \right. \nonumber \\  &+& \left.
1162: \frac{N_{pf}^{(1)}}{N_{pc}^{(1)}}(N^{(1)}-1) \right]\,.
1163: \ea
1164: %
1165: When all frequencies are to be searched over,
1166: stroboscopic resampling produces the $\F$-statistic
1167: about an order of magnitude more cheaply than the SFT method, for typical
1168: values of $\Tcoh^{(1)}$.  However when previous stages have narrowed
1169: the search to a small fraction of the whole frequency band (for any given
1170: $\vec\lambda$), the SFT method can be the more efficient one.
1171: We should also mention here that it is possible to start with SFTs and
1172: combine them in such a way as to get a $\mathcal{O}(\Tcoh^{(1)}\log
1173: \Tcoh^{(1)})$ operations count; this is in fact the method used in
1174: \cite{abjk}. However, in this paper, by the ``SFT method'' we always
1175: mean the method described here in Appendix B, with the operation count
1176: given above in Eq.~(\ref{eq:c1sft}).
1177: 
1178: It also seems likely that the resampling method could be
1179: modified so as to be the most efficient one, even when only
1180: wanted to demodulate a small
1181: frequency band
1182: \be
1183: \Delta f = \textrm{max}\left\{1,\frac{\Tcoh^\ib}{\Tcoh^{(i-1)}}\right\}
1184: \ee
1185: around every selected candidate.  Presumably the first step would be
1186: to heterodyne the data to shift
1187: the relevant frequency range to a neighborhood of zero-frequency.
1188: Then one would filter out frequencies higher than $\Delta f$,
1189: followed by the usual demodulation.
1190: Eq.~(\ref{eq:hierarchical-cost-i}) would then be modified, so that the
1191: new cost of demodulating would be $12N^\ib\Tcoh^\ib\Delta
1192: f\log_2(2\Tcoh^\ib\Delta f)$.  However since the details of this
1193: modified demodulation method have not yet been worked out, we will
1194: not consider it further in this paper.
1195: 
1196: This completes our analysis of the first stage computational costs for
1197: both methods.  The analysis for the subsequent stages proceeds
1198: similarly; the only difference is
1199: that subsequent stages analyze only those regions of parameter space
1200: that have not been discarded by any of the previous stages.  Assuming
1201: that almost all the candidates are due to noise, the false alarm rate
1202: is a good estimate of the number of candidates produced by any stage.
1203: Let us denote by
1204: $F^{(i)}$ the number of candidates which survive the $i^{th}$
1205: stage.  Since the false alarm rate for the first stage is
1206: $\alpha^{(1)}$, the number of candidates produced by the first
1207: stage is given by
1208: %
1209: \be
1210: F^{(1)} =
1211: \textrm{max}\left\{1,f_\max\Tcoh^{(1)}N_{pf}^{(1)}\alpha^{(1)}\right\}
1212: \,.
1213: \ee
1214: %
1215: Note that we will always have at least one candidate which makes it
1216: through to the next stage.
1217: To calculate the cost of a search, we of course must make some
1218: assumptions about the data to be processed. Basically, we are assuming
1219: that the data consists of Gaussian noise plus one detectable source.
1220: (Though we call $F^{(i)}$ the ``$i^{(th)}$-stage false alarm rate'', it
1221: is really the ``false alarm rate or the true-source survival rate, whichever
1222: dominates''. In practice, until the last semi-coherent stage,
1223: the FA rate always dominates.)
1224: 
1225: 
1226: To estimate the computational cost for the $i^{th}$
1227: stage, for $i>1$, recall that each of the $F^{(i-1)}$
1228: candidates produced by the $(i-1)^{th}$ stage is in fact a region in
1229: parameter space.  If we assume that the $i^{th}$ stage
1230: further refines
1231: this region, then we see that the number of $i^{th}$-stage coarse grid
1232: points
1233: in this region must be, on average,
1234: $N_{pc}^{\ib}/N_{pf}^{(i-1)}$ (again,
1235: assuming this ratio to be bigger than 1).
1236: Thus, using the stroboscopic resampling method, the number of floating
1237: point operations to calculate the $\F$-statistic in the $i^{th}$ stage
1238: is
1239: %
1240: \be
1241: F^{(i-1)} \textrm{max}\left\{
1242: 1,\frac{N_{pc}^{\ib}}{N_{pf}^{(i-1)}}\right\}
1243: 12f_\max \Tcoh^{\ib}N^{\ib}\log_2(2f_\max\Tcoh^{\ib})\,.
1244: \ee
1245: %
1246: Each candidate produced by the $(i-1)^{th}$ stage occupies a frequency
1247: band $1/\Tcoh^{(i-1)}$, and thus corresponds to $\Tcoh^\ib/\Tcoh^{(i-1)}$
1248: $i^{th}$-stage  frequency bins.  Thus the operations count
1249: for the stacking and sliding is
1250: %
1251: \ba
1252: &&F^{(i-1)}
1253: \textrm{max}\left\{1,\frac{\Tcoh^\ib}{\Tcoh^{(i-1)}} \right\}
1254: \textrm{max}\left\{1,\frac{N_{pc}^{\ib}}{N_{pf}^{(i-1)}}\right\}
1255: \times \nonumber \\
1256: && \times \frac{N_{pf}^{\ib}}{N_{pc}^{\ib}} (N^{\ib}-1)
1257: \ea
1258: %
1259: floating point operations.
1260: Combining these results, we get the computational cost for the
1261: $i^{th}$ stage ($i\geq 2$):
1262: \ba
1263: C^\ib_\res &=&  F^{(i-1)}
1264: \textrm{max}\left\{1,\frac{N_{pc}^{\ib}}{N_{pf}^{(i-1)}}\right\}
1265: \times \nonumber \\ &\times&
1266: \left[ 12N^\ib f_\max\Tcoh^\ib\frac{\log(2f_\max\Tcoh^\ib)}{\log 2}
1267: \right. \label{eq:hierarchical-cost-i}
1268: \\ &+& \left.  \textrm{max}\left\{1,\frac{\Tcoh^\ib}{\Tcoh^{(i-1)}} \right\}
1269: \frac{N_{pf}^{\ib}}{N_{pc}^{\ib}}(N^{\ib}-1)  \right] \,. \nonumber
1270: \ea
1271: %
1272: 
1273: If instead one uses the SFT method for calculating the $\F$-statistic,
1274: it is easy to see the operations count is
1275: %
1276: \ba
1277: C^\ib_\sft &=&  F^{(i-1)}
1278: \textrm{max}\left\{1,\frac{N_{pc}^\ib}{N_{pf}^{(i-1)}} \right\}
1279: \textrm{max}\left\{1,\frac{\Tcoh^\ib}{\Tcoh^{(i-1)}}\right\} \times \nonumber \\
1280: &\times&\left[\frac{640 N^\ib\Tcoh^\ib}{\Tsft} +
1281: \frac{N_{pf}^\ib}{N_{pc}^\ib}(N^\ib -1) \right] \,. \label{eq:hierarchical-cost-i-sft}
1282: \ea
1283: %
1284: 
1285: After the $n$ semi-coherent steps, we have the final coherent follow-up stage
1286: where the entire stretch of data of duration $\Tobs$ is used. For this stage,
1287: we analyze $F^{(n)}$ candidates and simply compute the $\F$-statistic without
1288: breaking up the data into any smaller stacks. The cost $C^{(coh)}$ for this
1289: using the resampling method is
1290: %
1291: \be\label{eq:hierarchical-cost-coh}
1292: C^{({\mathrm coh})}_\res =
1293: F^{(n)}\textrm{max}\left\{1,\frac{N_{p}^{(coh)}}{N_{pf}^{(n)}}
1294: \right\} 12 f_\max\Tobs\frac{\log(2f_\max\Tobs)}{\log 2}
1295: \ee
1296: %
1297: where $N_{pf}^{coh} \equiv N_p(\Tobs,\mu_{coh},1)$, and
1298: $\mu_{coh}$ is the $\mu_{max}$ of the final, coherent stage.
1299: Using the SFT method, we would have
1300: %
1301: \be
1302: C^{({\mathrm coh})}_\sft = F^{(n)}\textrm{max}\left\{1,\frac{N_{p}^{(coh)}}{N_{pf}^{(n)}}
1303: \right\} \textrm{max}\left\{1,\frac{\Tobs}{\Tcoh^{(n)}}
1304: \right\}\frac{640\Tobs}{\Tsft}
1305: \ee
1306: 
1307: So far, all results in this section are valid whether we are working in
1308: fresh-data mode or data-recycling mode.  The following formulae,
1309: for the number of candidates which survive a given
1310: stage, do however depend on which mode we are working in.  If we operate
1311: in fresh-data mode (analyzing fresh data at every
1312: stage--except the last stage, which is a coherent follow-up of all
1313: the searched data), we clearly have (for $i\geq 2$)
1314: %
1315: \ba\label{Fifresh}
1316: F^{\ib} &=& \alpha^{(i)} \textrm{max}\left\{F^{(i-1)}, 1 \right\}
1317: \textrm{max}\left(1,\frac{N_{pf}^{\ib}}{N_{pf}^{(i-1)}}\right) \nonumber
1318: \\&\times& \textrm{max}\left(1,\frac{\Tcoh^{\ib}}{\Tcoh^{(i-1)}}\right) \, .
1319: \ea
1320: %
1321: Again, our count assumes that at least
1322: one candidate gets ``promoted'' to the succeeding stage.
1323: We note that Eq.~(\ref{Fifresh}) assumes that
1324: the parameter space resolution improves at every stage of
1325: fresh-data mode (which seems always to be true for our
1326: optimized searches).
1327: We also note that Eq.~(\ref{Fifresh}) is basically
1328: identical to Eq.~(5.2) of BC, but there it is
1329: claimed to be the FA rate for data-recycling mode. That is
1330: not correct, in general, as we discuss further below.
1331: 
1332: If we are in data-recycling mode (at each step, re-analyzing old data,
1333: while also adding on new data), then the probabilities of
1334: a candidate's randomly surviving the $(i-1)^{th}$ and
1335: $i^{th}$ stages are {\it not} independent, and so Eq.~(\ref{Fifresh})
1336: is no longer valid.
1337: (To see this, consider the limit where only a very tiny bit of data is
1338: added on, and the resolution is kept fixed. Then any candidate that
1339: survives the $(i-1)^{th}$ stage has almost a $100\%$ chance of surviving
1340: the $i^{th}$ stage, even if $\alpha^{(i)}$ is extremely small.)
1341: Indeed, the rhs of Eq.~(\ref{Fifresh}) is clearly a {\it lower bound}
1342: on the $i^{th}$-stage false alarm rate, in data-recycling mode.
1343: 
1344: We can also place the following {\it upper bound} on
1345: on $F^{\ib}$ for data-recycling mode:
1346: %
1347: \be
1348: \label{FiBC}
1349: F^{\ib} = f_{\mathrm{max}}\Tcoh ^{\ib}\, N_{pf}^{\ib} \alpha^\ib  \, .
1350: \ee
1351: %
1352: The rhs of (\ref{FiBC}) is the number of false alarms that would
1353: result if one performed a semi-coherent search of the {\it entire}
1354: parameter space with the given $(N^{(i)}, \Delta T^{(i)}, \mu^{(i)},
1355: \rho^{(i)})$, while the lhs is the false alarms that result from
1356: searching only neighborhoods of the points the survived the
1357: $(i-1)^{th}$ stage. 
1358: Thus for data-recycling mode, we can say that
1359: $F^{\ib}$ is somewhere in the range
1360: \ba
1361: \label{FiBC2}
1362: F^{(i-1)} \biggl(\frac{N_{pf}^{\ib}}{N_{pf}^{(i-1)}}
1363:     \frac{\Tcoh^{\ib}}{\Tcoh^{(i-1)}} &\biggr)& \, \alpha^{(i)} \,
1364: \le  \,  F^{\ib} \nonumber \\
1365: &\le & \, f_{\mathrm{max}}\Tcoh ^{\ib}\, N_{pf}^{\ib}
1366: \alpha^\ib  \; .
1367: \ea
1368: Fortunately, when we calculate the total computational cost of some
1369: optimized search in data-recycling mode, needed to achieve some
1370: given sensitivity $h_{th}$, if we try
1371: plugging in {\it either} the upper or lower bound for
1372: $F^{\ib}$, we find the two final results differ from each other
1373: by $\lesssim 18\%$  for a young palsar ($\tau_{min} =40$ year) and
1374:     $\lesssim 5\%$ for an old one ($\tau_{min} =10^6$ year), which for our purposes is
1375: practically insignificant.  Moreover, the optimized search parameters obtained
1376: when we plug in the upper-limit estimate for $F^{\ib}$ are quite similar
1377: to those we find by plugging in the lower limit instead.
1378: Therefore it is safe for us to choose {\it either} the upper or lower limit
1379: as an estimate  of $F^{\ib}$.  For concreteness, in the rest of this
1380: paper we always estimate $F^{\ib}$ by its upper limit, which slightly
1381: overestimates the computational cost of the search.
1382: 
1383: With these results in hand, we are now ready to calculate the total
1384: computational cost of the entire search pipeline.  We have a number of
1385: choices to make.  At each stage, we can use either the stroboscopic
1386: resampling method or the SFT method in each stage, and we can work in 
1387: either the data-recycling
1388: mode or fresh-data mode from the second stage onwards.
1389: For convenience, we somewhat arbitrarily limit the choices by
1390: considering only strategies that use either
1391: data-recycling mode in every stage or
1392: fresh-data mode in every stage. As we shall see below,
1393: the efficiencies of these two sorts of searches turn out to be
1394: extremely close anyway. Therefore we strongly suspect that more
1395: general searches (using fresh-data mode in some stages and
1396: data-recycling mode in others) would not give significant improvements.
1397: 
1398: 
1399: \section{Results}
1400: \label{sec:results}
1401: 
1402: \subsection{The optimization method}
1403: \label{subsec:optimizationmethod}
1404: 
1405: We next describe our numerical
1406: optimization method.   The function we want to minimize, the
1407: computational power of Eq.~(\ref{eq:comppower}), is a
1408: complicated function on a large-dimensional space.  Our chosen method
1409: is a simulated annealing algorithm~\cite{mrrtt, kgv} based on
1410: the downhill simplex method of Nelder and Mead~\cite{nm}.  The
1411: downhill simplex method consists of evaluating the function on the
1412: vertices of a simplex and moving the simplex downhill and shrinking it
1413: until the desired accuracy is reached.  The motion of the simplex
1414: consists of a prescribed set of ``moves'' which could be either an
1415: expansion of the simplex, a reflection around a face, or a
1416: contraction. This method is turned into a simulated
1417: annealing method by adding a random fluctuation to the values of the
1418: function to be minimized, at the points of the
1419: simplex.  The temperature of the random fluctuations is reduced
1420: appropriately, or in other words ``annealed'', until the minimum is
1421: found.
1422: 
1423: There are no universal choices for the rate of annealing or the
1424: starting point of the simplex; these
1425: depend on the particular problem at hand.  For the results presented
1426: below, we have used a variety of different starting points and
1427: annealing schedules to convince ourselves that the optimization algorithm has
1428: converged and that we have indeed
1429: found the best minimum.
1430: Let us first discuss the starting temperature, whose meaning is as
1431: follows.   If $f$ is the
1432: the function to be minimized, then
1433: the temperature
1434: $\Theta$ parametrizes the amplitude of random
1435: fluctuations $f \rightarrow f + \delta f$ added to
1436: $f$ at the points of the simplex:
1437: %
1438: \be
1439: \delta f  = -\Theta \log r
1440: \ee
1441: %
1442: where $0 < r < 1$ is a uniformly distributed random number.
1443: A simplex move is always accepted if it takes the simplex downhill, but an
1444: uphill step may also be accepted due to these random fluctuations.
1445: In our case, we found that a starting temperature of $\Theta \sim
1446: 10^6$-$10^9$ gives good convergence; this value is to be
1447: compared to the typical value $\sim 10^{13}$ of the
1448: computational cost near its minimum for most of the results presented
1449: below.  We allow a maximum of $500$ iterations of the
1450: simplex.  If the simplex does not converge within $500$ iterations, we
1451: reduce the temperature by $2-5\%$ and restart the iterations from the
1452: best minimum found up to that point.  These steps are repeated until
1453: the simplex converges.   The starting point of the simplex cannot be
1454: chosen arbitrarily, and for this purpose, it is useful to have a rough
1455: idea of the location of the minimum. This requires some experimenting
1456: with a sufficiently broad range of starting points; this is especially
1457: important when the number of variables is large, as is the case for,
1458: say, a search with $n>3$.  Having found a
1459: suitable starting point for one set of pulsar parameters ($f_\max$ and
1460: $\tau_\min$), it can be reused for nearby pulsar parameter values.
1461: Occasionally, to obtain convergence to a minimum, we have taken
1462: as our starting point the minimum we found for nearby pulsar-parameter
1463: values.
1464: 
1465: We next describe how we impose the constraint that the total amount of
1466: analyzed data is less than $T_\max$.  One could imagine trying to
1467: do this using the method of Lagrange multipliers.
1468: However this seemed difficult to implement numerically  (for our
1469: highly non-linear function $P$), and we found a simpler approach that
1470: suffices. The function our algorithm minimizes is not the total
1471: computational power $P$ (defined in Eq.~(\ref{eq:comppower})) itself,
1472: but rather 
1473: %
1474: \be \label{eq:compconstr}
1475: f = P \times \left[1 + S\left(\frac{\Tobs}{T_\max}\right)\right]
1476: \ee
1477: %
1478: where $S(x)$ is a smooth function such that $S(x) = 0$ for $0 < x < 1$ but
1479: S(x) is rapidly increasing exponential function for $x > 1$.
1480: That is, we impose a very steep penalty for leaving the
1481: constraint surface.
1482: This works well, and indeed we found it useful
1483: to impose some additional (intuitively obvious)
1484: constraints in this way, such as
1485: requiring the $N^\ib$ and $\Delta T^\ib$ to all be positive; we again multiply
1486: $P$ by factor that is unity when the constraint is
1487: satisfied but is very large when the constraint is violated.
1488: This ``trick'' is used to find the {\it location} of the minimum, but
1489: of course the results we report are the values of the function $P$ there,
1490: not $f$.
1491: 
1492: There is one additional technical detail, namely, that our optimization
1493: method is meant for the case of continuous, real variables, while
1494: our variables $N^\ib$ are strictly integers.
1495: We handle this by rounding off $N^\ib$ to the nearest integer while calculating
1496: the cost function $f$, every time it is called.  The downhill simplex
1497: algorithm still treats $N^\ib$ as a
1498: continuous variable, i.e. we allow arbitrarily small changes to
1499: $N^\ib$ when the simplex is moving downhill, but such changes have no
1500: effect on $f$.  We have also tried an alternative approach where
1501: $N^\ib$ is kept as a continuous variable throughout, and rounded off
1502: only at the very end. We have found that the two approaches yield
1503: consistent results.
1504: 
1505: Finally,
1506: we cross-checked our results using two different implementations of
1507: the simulated annealing algorithm--those of
1508: \cite{nr} and \cite{gsl}--and found that they gave basically equivalent
1509: results in our case.
1510: 
1511: 
1512: \subsection{The number of semi-coherent stages}
1513: \label{subsec:nstages}
1514: 
1515: The first question we want to answer is: what is the optimum number $n$ of
1516: semi-coherent stages to use in the search?
1517: Relatedly, we want to know the most efficient method to use for
1518: the $\F$-statistic calculation (stroboscopic resampling or SFT method) and
1519: best mode to work in (fresh-data mode or data-recycling mode).
1520: To answer this, we
1521: consider an all-sky search for fast/young GW pulsars, by which we mean
1522: a search that goes up to frequency
1523: $f_\max=1000\,$Hz and that can detect pulsars with spindown ages $\tau_{\min}$
1524: as short as $40\,$yr. We assume the amount of data available is
1525: $T_\max=1\,$yr, and ask what is the computational power required to
1526: detect pulsar signals whose $h_\RMS$ is
1527: or above $h_{th}$, given by:
1528: \be\label{eq:h5e}
1529: \frac{h_{th}^2}{S_n(f)} = 2.5\times 10^{-5}\textrm{sec}^{-1}\,.
1530: \ee
1531: This signal strength corresponds to $\sqrt{\eta}\approx 39.72$ for a
1532: full 1-yr observation time with a perfectly matched template.
1533: (Here and below we are implicitly assuming that $S_n(f)$ hardly varies
1534: over the frequency range of the signal.)
1535: We choose the $i^{th}$-stage FD rates $\beta^{(i)}$ as given
1536: in (and just above) Eq.~(\ref{eq:beta}), which, along with the
1537: detection threshold given by Eq.~(\ref{eq:h5e}), determines the
1538: $i^{th}$-stage thresholds $\rho^{(i)}$.
1539: For simplicity, we set $\mu^{(coh)} = 0.2$.
1540: While this is a restriction that we simply put in by hand
1541: (to slightly reduce the space of search parameters to be optimized), we
1542: believe this choice has very little effect on the overall optimized
1543: strategy because, as we shall see shortly, the follow-up stage
1544: usually accounts for only a tiny fraction of the total computational
1545: cost.\footnote{An exception is a search for old pulsars presented in tables
1546: \ref{tab:Fresh-params-million} and \ref{tab:Fresh-cc-million}, where
1547: the cost for the follow-up stage turns out to be non-negligible. This
1548: example in particular will have to be revisited when better formulas
1549: for $N_p$ are available.}  
1550: Thus we are left with $3n$ parameters to be optimized:
1551: $(\Tcoh^\ib,\mu_\max^\ib, N^\ib)$ for $i = 1,\ldots ,n$, subject
1552: to the constraint that the total amount of data analyzed, $T_{used}$
1553: [given by Eq.~(\ref{eq:tobsrecyc}) or (\ref{eq:tobsfresh})] is less than
1554: $1\,$yr.
1555: 
1556: Plots of the minimum computational cost for different $n$ and for
1557: both the data-recycling and fresh-data modes are
1558: shown in Fig.~\ref{fig:SFT-RES}.
1559: For each mode, we consider the following three
1560: strategies: (i) Use the SFT method in each stage, (ii) Use the
1561: resampling method in each stage, and (iii) Use the resampling method
1562: in the first and final follow-up stages, and use the SFT method in all
1563: intermediate stages.  Therefore there are $6$ curves in
1564: Fig.~\ref{fig:SFT-RES}.
1565: 
1566: %
1567: \begin{figure}
1568:   \begin{center}
1569:   \includegraphics[width=\columnwidth]{BC-Fresh-SFT-RES.eps}
1570:   \caption{Computational power versus number of semi-coherent
1571:   stages for different methods of calculating the $\F$-statistic. RES
1572:   indicates the stroboscopic resampling method (strategy (ii)) and SFT is
1573:   the SFT method (strategy (i)). SFT+RES corresponds the mixture of these two
1574:   methods (strategy (iii)). For each strategy, solid lines indicate the
1575:   result for the fresh-data mode, while the dashed lines are for the
1576:   data-recycling mode.}
1577:   \label{fig:SFT-RES}
1578:   \end{center}
1579: \end{figure}
1580: %
1581: 
1582: 
1583: The most important lessons from Fig.~\ref{fig:SFT-RES} are
1584: the following:  Strategy (iii) turns out to be better than (i) or
1585: (ii).  Furthermore, for strategy (iii), there is a significant
1586: advantage in a three-stage search as compared to a two-stage or single-stage
1587: search, but there is hardly any improvement in computational cost
1588: in going to four or more
1589: semi-coherent stages.
1590: Furthermore, these results are the same
1591: whether we use the fresh-data mode or data-recycling mode, and these
1592: two modes give very similar total costs.
1593: While Fig.~\ref{fig:SFT-RES} presents results just for young/fast pulsar
1594: searches, we find the same basic
1595: pattern for old pulsars, with $\tau_{min} \sim 10^6\,$yr:
1596: strategy (ii) is the most efficient for calculating the $\F$-statistic,
1597: data-recycling mode and fresh-data mode are almost equally efficient, and
1598: having three semi-coherent stages is near-optimal (significantly better than
1599: two stages, and practically as good as four).  The main difference
1600: from the young/fast pulsar case is that the gain in going from 2 to 3
1601: stages is now only a factor $\sim 2$ in computational power, i.e., 
1602: smaller but still significant.    
1603: 
1604: In the light of these
1605: results, in the rest of this section, we consider only three-stage
1606: searches, with the first stage and final follow-up stages
1607: employing the resampling method and with the second and third stages
1608: employing  the SFT method. We continue to report results for both
1609: data-recycling mode and fresh-data mode.
1610: 
1611: 
1612: \subsection{The optimal three-stage search parameters}
1613: \label{subsec:3stage}
1614: 
1615: For the example search described above, (i.e. $f_\max=1000\,$Hz,
1616: $\tau_{\min}=40\,$yr, $T_\max=1\,$yr, and ${h_{th}^2}/{S_n} = 2.5\times
1617: 10^{-5}\textrm{sec}^{-1}$) we list the optimal search
1618: parameters for the three-stage search in data-recycling mode in Table
1619: \ref{tab:bc-params}.  The first two stages analyze about
1620: 26 days (divided in 10 segments) and 42 days (divided in 12 segments) of data, respectively,
1621: while the third stage
1622: analyzes the entire year-long data stretch (divided in 8 segments).  The total
1623: computational cost is
1624: $40.2\,$TFlops. The cost breakdown among the individual stages, and
1625: further cost breakdown into the
1626: demodulation piece $C^{(i)}_{coh}$ and the
1627: stack-slide $C^{(i)}_{ss}$ piece in each stage,
1628: are given in Table \ref{tab:bc-cc}.
1629: There we give the total count of floating point operations required, not
1630: the number of operations per second.
1631: %
1632: \begin{table}[h]
1633: \caption{The optimal search parameters in data recycling
1634:   mode. $f_\max=1000$Hz, $\tau_{\min}=40$yr, $T_\max=1$yr,
1635:   ${h_{th}^2}/{S_n} = 2.5\times 10^{-5}\textrm{sec}^{-1}$, and $\eta$ is
1636:   defined according to Eq. \ref{eta}.}
1637: \vspace{0.2cm}
1638: \begin{tabular}{|c|c|c|c|c|c|}
1639:   \hline
1640:   Stage & $\Tcoh^{(i)}$ (days) & $\mu^{(i)}$ & $N^{(i)}$ & $\Tobs^{(i)}$ (days) & $\sqrt{\eta}$ \\
1641:   \hline
1642:   &&&&&\\
1643:   1 &  2.58 & 0.7805 & 10 &  25.79 &  9.08 \\&&&&&\\
1644:   2 &  3.51 & 0.1139 & 12 &  42.13 &  13.23 \\&&&&&\\
1645:   3 & 45.66 & 0.8196 &  8 & 365.25 &  33.86 \\
1646:   \hline
1647: \end{tabular}
1648: \label{tab:bc-params}
1649: \end{table}
1650: %
1651: 
1652: %
1653: \begin{table}
1654: \caption{The computational cost to analyze one year of  data in
1655:   data-recycling mode. The search parameters are the same as given in
1656:   Table I.  $C^{(i)}_\coh$ is the cost for the coherent
1657:   demodulation step and $C^{(i)}_{ss}$ for the stack-slide step, while
1658:   $C^{(i)}$ is the sum of these two. Follow-up indicates the computational
1659:   cost require for the final follow-up stage.}
1660: \vspace{0.2cm}
1661: \begin{tabular}{|c|c|c|c|}
1662:   \hline
1663:   Stage & $C^{(i)}$ (Flop) &  $C^{(i)}_\coh$ (Flop) & $C^{(i)}_{ss}$ (Flop) \\
1664:   \hline
1665:   &&&\\
1666:   1 & $ 9.37\times 10^{20}$ & $ 6.21\times 10^{19}$ & $8.75\times 10^{20}$  \\
1667:   &&&\\
1668:   2 & $ 3.16\times 10^{20}$ & $ 2.46\times 10^{20}$ & $6.98\times 10^{19} $ \\
1669:   &&&\\
1670:   3 & $ 1.65\times 10^{19}$ & $ 2.73\times 10^{18}$ & $1.37\times 10^{19}$ \\
1671:   &&&\\
1672:   Follow-up &$ 6.30\times 10^{15}$ & & \\
1673: \hline
1674: \end{tabular}
1675: \label{tab:bc-cc}
1676: \end{table}
1677: %
1678: 
1679: Our results for fresh-data mode are qualitatively similar, and are
1680: given in Table \ref{tab:fresh-params}.
1681: In this case, the optimal search analyzes about 24 days of data
1682: in the first stage (broken up into 9 segments) and 24 more days in
1683: the second stage (broken into 6 sements).
1684: The third stage analyzes the rest of the year's worth of data, divided into
1685: 7 segments. The total computational
1686: requirement is $34.6\,$TFlops and its breakdown is given in Table
1687: \ref{tab:fresh-cc}.
1688: 
1689: %
1690: \begin{table}
1691: \caption{Same as Table I, but for fresh-data mode.}
1692: \vspace{0.2cm}
1693: \begin{tabular}{|c|c|c|c|c|c|}
1694:     \hline
1695: Stage & $\Tcoh^{(i)}$ (days) & $\mu^{(i)}$ & $N^{(i)}$ & $\Tobs^{(i)}$ (days) & $\sqrt{\eta}$ \\
1696:     \hline
1697: &&&&&\\
1698: 1 &  2.71 & 0.7829 & 9 &  24.35 &  8.82 \\&&&&&\\
1699: 2 &  4.08 & 0.0654 & 6 &  24.49 & 10.17 \\&&&&&\\
1700: 3 & 45.20 & 0.8229 & 7 & 316.42 & 31.50 \\
1701: \hline
1702: \end{tabular}
1703: \label{tab:fresh-params}
1704: \end{table}
1705: %
1706: %
1707: \begin{table}
1708: \caption{Same as Table II, but for fresh-data mode.}
1709: \vspace{0.2cm}
1710: \begin{tabular}{|c|c|c|c|}
1711:     \hline
1712: Stage & $C^{(i)}$ (Flop) & $C^{(i)}_\coh$ (Flop) & $C^{(i)}_{ss}$ (Flop) \\
1713:     \hline
1714: &&&\\
1715: 1 & $8.11\times 10^{20}$ & $6.42\times 10^{19}$ & $7.46\times 10^{20}$  \\
1716: &&&\\
1717: 2 & $2.64\times 10^{20}$ & $2.62\times 10^{20}$ & $2.67\times 10^{18}$  \\
1718: &&&\\
1719: 3 & $1.74\times 10^{19}$ & $5.54\times 10^{18}$ & $1.19\times 10^{19}$ \\
1720: &&&\\
1721: Follow-up & $1.62\times 10^{16}$ & & \\
1722: \hline
1723: \end{tabular}
1724: \label{tab:fresh-cc}
1725: \end{table}
1726: %
1727: 
1728: We note the following features of these results.  First, in both modes,
1729: basically all the data has been analyzed by the end of the third
1730: semi-coherent stage.
1731: This is not a requirement that we put in by hand, but rather it arises from the
1732: optimization: the optimal scheme ``gets through'' the entire year's worth of
1733: data before the final follow-up stage.
1734: Secondly, in data-recycling mode, $73.8\%$ of the computing time
1735: is spent in the first stage, $24.9\%$ in
1736: the second, $1.3\%$ in the third and a negligible fraction in the follow-up.  The
1737: results are similar for the fresh-data mode: approximately $74.2\%$ of the
1738: computational resources are spent in the first stage,  $24.2\%$ in the second
1739: stage, $1.6\%$ in the third stage and a negligible amount in the follow up stage.
1740: Finally, fresh-data mode entails
1741: a slightly lower computational cost than data-recycling mode.
1742: However this last fact could be an artifact either of having slightly
1743: different overall FD rates in the two cases, or of our using an overestimate
1744: of $F^{(i)}$ in the latter case. The bottom line is that, after optimization,
1745: the two modes are almost equally efficient.
1746: 
1747: If instead we consider a search for older pulsars, with $\tau_\min
1748: = 10^6\,$yr instead of $40\,$yr, then the optimal solutionfor both modes  
1749: are summarized in Tables
1750: \ref{tab:BC-params-million}--\ref{tab:Fresh-cc-million}.   A larger
1751: value of $\tau_\min$ means a smaller number of templates, and therefore
1752: a more sensitive search for fixed computational cost.
1753: 
1754: For data-recycling mode, we have lowered the threshold $h_{th}$ by a
1755: factor $2.35$, to the point where the required computational power is
1756: again $40.2\,$ Tflops, as in the example of Tables \ref{tab:bc-params}
1757: and \ref{tab:bc-cc}. The results are shown in tables
1758: \ref{tab:BC-params-million} and \ref{tab:BC-cc-million}.  
1759: Compared to the young-pulsar search, the computational power is now
1760: spread more evenly over the first two stages: the first stage
1761: consumes about $58.27\%$ of the power, the second stage $38.73\%$,
1762: third stage $3.0\%$ and negligible for the follow-up stage.
1763: %
1764: \begin{table}
1765: \caption{Search parameters for data-recycling mode with $f_\max=1000$Hz,
1766:   $\tau_{\min}=10^6$yr, $T_\max=1$yr, ${h_{th}^2}/{S_n} = 4.53\times
1767:   10^{-6}\textrm{sec}^{-1}$, and computational power $40.2$Tflops;
1768: $\eta$ is defined according to Eq.~(\ref{eta}).}
1769: \vspace{0.2cm}
1770: \begin{tabular}{|c|c|c|c|c|c|}
1771:     \hline
1772: Stage & $\Tcoh^{(i)}$ (days) & $\mu^{(i)}$ & $N^{(i)}$ & $\Tobs^{(i)}$ (days) & $\sqrt{\eta}$ \\
1773:     \hline
1774: &&&&&\\
1775: 1 & 14.84 & 0.3514 & 8 & 118.72 &  9.06 \\&&&&&\\
1776: 2 & 30.06 & 0.0917 & 6 & 180.34 & 11.70 \\&&&&&\\
1777: 3 & 52.18 & 0.0986 & 7 & 365.25 & 16.63 \\
1778: \hline
1779: \end{tabular}
1780: \label{tab:BC-params-million}
1781: \end{table}
1782: %
1783: %
1784: \begin{table}
1785: \caption{The computational cost to analyze one year of  data in
1786:   data-recycling mode. The search parameters are the same as given in
1787:   Table V.  $C^{(i)}_\coh$ is the cost for the coherent
1788:   demodulation step and $C^{(i)}_{ss}$ for the stack-slide step, while
1789:   $C^{(i)}$ is the sum of these two. Follow-up indicates the computational
1790:   cost require for the final follow-up stage.}
1791: \vspace{0.2cm}
1792: \begin{tabular}{|c|c|c|c|}
1793:     \hline
1794: Stage & $C^{(i)}$ (Flop) & $C^{(i)}_\coh$ (Flop) & $C^{(i)}_{ss}$ (Flop) \\
1795:     \hline
1796: &&&\\
1797: 1 & $7.41\times 10^{20}$ & $2.85\times 10^{18}$ & $7.39\times 10^{20}$  \\
1798: &&&\\
1799: 2 & $4.93\times 10^{20}$ & $3.77\times 10^{20}$ & $1.16\times 10^{20}$  \\
1800: &&&\\
1801: 3 & $3.82\times 10^{19}$ & $1.34\times 10^{19}$ & $2.48\times 10^{19}$ \\
1802: &&&\\
1803: Follow-up & $6.18\times 10^{13}$ & & \\
1804: \hline
1805: \end{tabular}
1806: \label{tab:BC-cc-million}
1807: \end{table}
1808: %
1809: 
1810: For the case of fresh-data mode, we have lowered the threshold $h_{th}$ by a factor
1811: $2.36$, to the point where the required computational power is
1812: again $34.6\,$ Tflops, as in the example of Tables
1813: \ref{tab:fresh-params} and \ref{tab:fresh-cc}.
1814: Once again, compared to the young-pulsar search, the computational
1815: costs are spread more evenly over the first two stages: the first stage
1816: consumes about $31.3\%$ of the power, the second stage $30.4\%$,
1817: third stage $14.0\%$.  In this case, the cost for the
1818: follow-up stage is $24.0\%$, which is not negligible.  
1819: This indicates that, for this case, the earlier stages have not succeeded in 
1820: reducing the number of candidates to a low level.  
1821: The overall sensitivity, though, is still almost identical to the
1822: data-recycling case.
1823: %
1824: \begin{table}
1825: \caption{Search parameters for fresh-data mode with $f_\max=1000$Hz,
1826:   $\tau_{\min}=10^6$yr, $T_\max=1$yr, ${h_{th}^2}/{S_n} = 4.47\times
1827:   10^{-6}\textrm{sec}^{-1}$, and computational power $34.6$Tflops;
1828: $\eta$ is defined according to Eq. \ref{eta}.}
1829: \vspace{0.2cm}
1830: \begin{tabular}{|c|c|c|c|c|c|}
1831:     \hline
1832: Stage & $\Tcoh^{(i)}$ (days) & $\mu^{(i)}$ & $N^{(i)}$ & $\Tobs^{(i)}$ (days) & $\sqrt{\eta}$ \\
1833:     \hline
1834: &&&&&\\
1835: 1 & 11.77 & 0.2074 & 9 & 105.96 &  9.04 \\&&&&&\\
1836: 2 & 10.97 & 0.0199 & 6 &  65.82 &  7.13 \\&&&&&\\
1837: 3 & 27.64 & 0.0206 & 7 & 193.47 & 12.22 \\
1838: \hline
1839: \end{tabular}
1840: \label{tab:Fresh-params-million}
1841: \end{table}
1842: %
1843: %
1844: \begin{table}
1845: \caption{Same as Table VI, except for fresh-data mode. The 
1846: search parameters are those of Table VII.}
1847: \vspace{0.2cm}
1848: \begin{tabular}{|c|c|c|c|}
1849:     \hline
1850: Stage & $C^{(i)}$ (Flop) & $C^{(i)}_\coh$ (Flop) & $C^{(i)}_{ss}$ (Flop) \\
1851:     \hline
1852: &&&\\
1853: 1 & $3.46\times 10^{20}$ & $3.07\times 10^{18}$ & $3.43\times 10^{20}$  \\
1854: &&&\\
1855: 2 & $3.35\times 10^{20}$ & $3.33\times 10^{20}$ & $1.91\times 10^{18}$  \\
1856: &&&\\
1857: 3 & $1.54\times 10^{20}$ & $9.23\times 10^{19}$ & $6.19\times 10^{19}$ \\
1858: &&&\\
1859: Follow-up & $2.67\times 10^{20}$ & & \\
1860: \hline
1861: \end{tabular}
1862: \label{tab:Fresh-cc-million}
1863: \end{table}
1864: %
1865: 
1866: Let us now discuss the false alarm rate.  We require
1867: that the overall FA rate be less than $1\%$, and we
1868: claimed in section~\ref{subsec:optimize} that this is automatically
1869: satisfied in typical, realistic cases.  We can now verify this claim. 
1870: For the $\tau_\min=40\,$yr search summarized in tables
1871: \ref{tab:bc-params}--\ref{tab:fresh-cc}, 
1872: using (\ref{eta}), with $\mu_\max = \mu_\max^{(coh)} = 0.2$, the
1873: threshold corresponds to
1874: $\rho^{(coh)}_\th \approx \bar{\rho} = 2 + \eta/2 \approx 738 $.
1875: By Eq.~(\ref{eq:alphacoh}), this corresponds to
1876: $\tilde{\alpha} \approx 10^{-318}$ (for either mode).  Using
1877: Eq.~(\ref{eq:Np}), the number of independent templates
1878: required for a full coherent search
1879: of the entire parameter space, using $1\,$yr of data, is
1880: $f_\max T_\max N_p(1\textrm{yr},0.2,1) \approx  10^{34}$.
1881: The overall false alarm
1882: rate is thus $FA = f_\max T_\max N_p\tilde{\alpha} \approx 10^{-284} \ll 1\%$
1883: \footnote{Strictly speaking, Eq.~(\ref{eq:Np}) for the number
1884:   of templates is valid only for observation times which are
1885:   significantly less than a year.  However, unless the discrepancy is
1886:   many orders of magnitude, the numbers obtained show that it is
1887:   obviously sufficient for the purposes of this argument. }.
1888: 
1889: Similarly, for the case $\tau_\min = 10^6\,$yr, $h^2_{th}/S_n = 4.53
1890: \times 10^{-6}$ (data recycling mode, tables
1891: \ref{tab:BC-params-million} and \ref{tab:BC-cc-million})), we get
1892: $\eta \approx 286$ so that $\tilde\alpha \approx 10^{-61}$.  In this
1893: case, have $N_p 
1894: \approx 10^{17}$ so that $FA = f_\max T_\max N_p \tilde{\alpha}
1895: \approx 10^{-34}$. If we look at fresh data mode (tables
1896: \ref{tab:Fresh-params-million} and \ref{tab:Fresh-cc-million}) with
1897: $\tau_\min =  10^6\,$yr, $h^2_{th}/S_n = 4.47 \times 10^{-6}$, we get 
1898: $FA \approx 10^{-33}$. These values are greater than for the case of
1899: young pulsars, but still vastly smaller than $1\%$.   
1900: 
1901: The basic point is simply this: For an all-sky search, sensitivity is
1902: limited by computing power, so the detection threshold $h_{th}$ in
1903: practice is substantially higher than what it would be for infinite
1904: computing power.  This means that for a signal to be detectable, it
1905: must have quite a high SNR (in the matched filter sense)--which means
1906: that the FA rate is exponentially small.
1907: Being computationally limited means that when we {\it do} detect something,
1908: we can be very confident that it is not simply random noise masquerading as
1909: a signal~\footnote{Of course, no sensible person would ever claim that
1910: he or she had detected a GW pulsar with FA probability of less than
1911: $10^{-284}$. In such a case, the ``statistical error'' is so ridiculously
1912: small that the true FA rate is dominated by the other,
1913: hard-to-quantify factors,
1914: such as the probability of having some bug in the instrumentation or
1915: in the data analysis code.}
1916: 
1917: How accurate are our numerical results?  The total computational cost is a
1918: complicated function on a 9-dimensional space and thus is not easy to
1919: visualize.  We can, however, take appropriate sections of this function
1920: to examine its behavior near the minimum.  Thus, we can ask
1921: whether variations in, say, $\Tcoh^{(1)}$ or $\Tcoh^{(2)}$ away from
1922: their optimal values, increase the computational cost (as they should,
1923: if should if we have truly found a minimum).
1924: To answer this, in Fig.~\ref{fig:DT1} we plot the total
1925: computational power as a function of
1926: $\Tcoh^{(1)}$ and $\Tcoh^{(2)}$, respectively,
1927: for the young-pulsar searches summarized in Tables I-IV.
1928: All the other parameters fixed at their optimal values.
1929: The minima of these curves agree precisely with our simulated
1930: annealing results.
1931: Similarly, Figs.~\ref{fig:N1} and
1932: \ref{fig:mu123} carry the same message, as well as showing
1933: the strong dependence of the computational cost
1934: on $N^\ib$ and $\mu_\max^\ib$.  (It is not so clear from the
1935: plot of $P$ vs.  $\mu_\max^{(3)}$ that this curve has
1936: a minimum in the range shown, but it does in fact have a very shallow
1937: one.)
1938: 
1939: For the plot of computational cost
1940: versus $N^{(3)}$, we are not allowed to keep $\Tcoh^{(3)}$ fixed, since
1941: that could violate the constraint $\Tcoh^{(3)}N^{(3)} \leq T_\max$.
1942: (Recall that $\Tcoh^{(3)}N^{(3)} = T_\max =1\,$yr for the optimal 3-stage
1943: solution, which is therefore just at the boundary of the constraint
1944: region.)  Therefore, we choose to plot the computational cost as a
1945: function of $N^{(3)}$ while simultaneously varying $\Tcoh^{(3)}$
1946: according to $\Tcoh^{(3)} = T_\max/N^{(3)}$.
1947: 
1948: A noteworthy feature of these plots is that the
1949: computational power $P$ depends more sensitively on the
1950: early-stage parameters than the late-stage ones; e.g., more
1951: sensitively on $N^{(1)}$ and $N^{(2)}$ than on $N^{(3)}$.
1952: This result should not be surprising since,
1953: as mentioned earlier, for the young-pulsar search the
1954: computational cost of the higher stages is relatively small.
1955: %
1956: \begin{figure}
1957:   \begin{center}
1958:   \includegraphics[width=\columnwidth]{BC-Fresh-DeltaT.eps}
1959:   \caption{Computational power $P$ as a function of
1960:   $\Delta T^{(1)}$ and $\Delta T^{(2)}$, with all other parameters fixed to their optimal
1961:   values. }\label{fig:DT1}
1962:   \end{center}
1963: \end{figure}
1964: %
1965: \begin{figure}
1966:   \begin{center}
1967:   \includegraphics[width=\columnwidth]{BC-Fresh-N.eps}
1968:   \caption{Computational power $P$ as a function of
1969:   $N^{(1)}$, $N^{(2)}$ and $N^{(3)}$.
1970:   For the $N^{(1)}$ and $N^{(2)}$ plots, all other parameters have fixed
1971:   to their optimal values. For the $N^{(3)}$ plot, we have also
1972:   varied $\Tcoh^{(3)}$
1973:   according to $\Tcoh^{(3)} = T_\max/N^{(3)}$ in order to satisfy the
1974:   constraint that the amount of data available is
1975:   $T_\max$.}\label{fig:N1}
1976:   \end{center}
1977: \end{figure}
1978: %
1979: \begin{figure}
1980:   \begin{center}
1981:   \includegraphics[width=\columnwidth]{BC-Fresh-Mu.eps}
1982:   \caption{Computational power $P$ as a function of
1983:   $\mu_\max^{(1)}$, $\mu_\max^{(2)}$, and $\mu_\max^{(3)}$.  For each
1984:   plot, all other parameters are fixed at their optimal
1985:   values.}\label{fig:mu123}
1986:   \end{center}
1987: \end{figure}
1988: %
1989: 
1990: \subsection{The spindown-age and the SNR}
1991: \label{subsec:taumin}
1992: 
1993: How does the (minimum) computational cost depend on
1994: the shortest spindown timescale that we search over, $\tau_\min$?
1995: Consider again
1996: the case where we have one year of data and we perform an
1997: all-sky search up to a
1998: frequency of $f_\max = 1000\,$Hz.  Fig.~\ref{fig:bcfresh-taumin} shows the
1999: result for the both data-recycling and fresh-data mode,
2000: for two different values
2001: of the 1-year SNR.  Note that these results do pass simple
2002: sanity checks: the computational cost decreases as the SNR
2003: increases (since it is easier to look for stronger signals), and
2004: the computational cost decreases as $\tau_\min$ increases (since it
2005: is easier to search through a smaller parameter space).
2006: 
2007: %
2008: \begin{figure}
2009:   \begin{center}
2010:   \includegraphics[width=\columnwidth]{BC-Fresh-CCvsTau-1year-NS=3-h2sn.eps}
2011:   \caption{The minimum computational power $P$ required for
2012:   analyzing 1 year's worth of data as a function of the pulsar's
2013: spindown age $\tau_\min$. We
2014:   consider a three-stage search in the both data-recycling and
2015:   fresh-data mode, for two
2016:   different signal strengths. The data-recycling mode results are shown
2017:   with dashed lines, while the fresh-data results are in dotted lines.
2018:   In parts of the curves, the results for the two modes are
2019:   so close together that it is hard to distinguish them.}
2020: \label{fig:bcfresh-taumin}
2021:   \end{center}
2022: \end{figure}
2023: %
2024: %
2025: \begin{figure}
2026:   \begin{center}
2027:   \includegraphics[width=\columnwidth]{BC-Fresh-SNRvsTau-1year-CC=1e13.eps}
2028:   \caption{The 1-year SNR (with zero mismatch) as a function
2029:   of  $\tau_\min$, for fixed computational power
2030:   $P= 10^{13}\,$Flops. The dashed line indicates the result for
2031:   data-recycling mode and the dotted line for fresh-data. Since these two
2032:   result are very close to each other, it may be difficult to
2033:   distinguish them.}\label{fig:bcfresh-snr}
2034:   \end{center}
2035: \end{figure}
2036: %
2037: One can also ask: for a given available computational power, how does
2038: the threshold SNR scale with $\tau_\min$?  This is shown in
2039: Fig.~\ref{fig:bcfresh-snr}. The plot is based on the assumption that
2040: we have one year's worth of data and that we have 10 TFlops of
2041: computing power at our disposal. By ``SNR'', here we mean the
2042: matched-filter SNR, for a perfectly matched filter.
2043: Fig.~\ref{fig:bcfresh-snr} tells us
2044: that a search for unknown GW pulsars with spindown ages
2045: $> 10^6\,$yr  can detect $\sim 85-90\%$ of pulsars whose SNR is $>17$
2046: (again, with FA rate $<< 1\%$).
2047: In an all-sky search for very young pulsars, with $\tau_{min} =
2048: 40\,$yr, the SNR required for detection (with the same FD and FA
2049: rates) increases to $\sim 43$.  In comparison, for a source where the
2050: sky position and frequency are known in advance (from radio
2051: observations), an SNR of only $4.56$ is required for detection, with a
2052: $10\%$ FD rate and $1\%$ FA rate~\cite{S1:pulsar}.
2053: 
2054: 
2055: Fig.~\ref{fig:bcfresh-snr} strongly suggests that one would
2056: like to simultaneously perform at least two different
2057: all-sky searches: one for old GW pulsars and another for young ones,
2058: with comparable (within a factor ten) computer power devoted to each, but
2059: with quite different thresholds.
2060: (If one set the same threshold for both old and young pulsars, then
2061: almost all computing resources would end up being spent on the young
2062: ones.)
2063: Clearly, to determine the ``best'' apportionment of resources between the
2064: two types of searches would require some additional inputs/assumptions, but
2065: at least Fig.~\ref{fig:bcfresh-snr} seems a good first step towards
2066: making an intelligent allocation.
2067: 
2068: 
2069: \section{Conclusions}
2070: \label{sec:conclusions}
2071: 
2072: Let us first summarize the main results of this paper.  We have
2073: studied general hierarchical strategies for searching for
2074: gravitational waves from unknown, isolated GW pulsars.  In particular, we
2075: have considered multistage hierarchical algorithms where each stage
2076: (except the last)
2077: consists of a coherent demodulation of short stacks of data followed
2078: by appropriate sliding and stacking of the $\F$-statistics results from
2079: the different stacks.
2080: The successive stages serve to quickly reduce the
2081: number of candidates; they are followed by final coherent follow-up stage to
2082: fully analyze the remaining candidates.
2083: 
2084: We have optimized this strategy by minimizing the computational cost $P$
2085: subject to the constraints which specify the total amount of data
2086: available and the desired confidence levels.
2087: Of course, $P$ depends on the size of the parameter space--
2088: in particular on the range of frequencies and spindown ages that
2089: are searched over.
2090: Carrying out the optimization, and varying over the number $n$ of
2091: semi-coherent stages, we found that the advantages of the multistage
2092: approach saturate at $n=3$ (i.e., $n=4$ and $5$ are scarcely better).
2093: 
2094: The optimized search parameters $(N^\ib,\Delta T^\ib,\mu^\ib)$
2095: we report should only be considered a rough guide for carrying
2096: out a search in practice because i) in many places we have
2097: used theoretical estimates
2098: of the operations counts instead of those obtained by profiling
2099: existing codes, ii) we have not considered
2100: issues of memory storage or the cost of performing any Monte-Carlo simulations,
2101: and iii) the detector noise has throughout been assumed to be Gaussian and
2102: stationary.  Furthermore, the template counting formulae (\ref{eq:Np})
2103: used in this paper are, strictly speaking, valid only for observation times
2104: significantly less than a year.  The numbers presented in this work
2105: must be recalculated when better approximations become available.
2106: In spite of these limitations, we believe that our results do
2107: provide a useful qualitative guide to what an optimized all-sky search
2108: ``looks like.''  In order to optimize actual search codes, applied to
2109: actual data, one must 1) profile the codes to determine the actual
2110: computational cost of the different operations, and 2) do Monte Carlo
2111: studies to determine the {\it actual} $\alpha^\ib$ for different thresholds.
2112: (Recall that the formulae given  here are
2113: based on the assumption stationary, Gaussian noise.) The latter could
2114: require considerable work, especially if the results are strongly
2115: frequency-dependent, with some bands being much ``better behaved''
2116: than others.
2117: 
2118: 
2119: Finally, we mention some other possibilities for future work:
2120: %
2121: \begin{itemize}
2122: \item It would be trivial to extend our work to consider searches that are
2123:   less computationally challenging than all-sky ones, but that are still computationally limited.
2124:   E.g., one could consider searches for unknown NSs in supernova remnants
2125:   (such as SN 1987A), in which case the sky-position is well known but
2126:   the frequency and spindown parameters must be searched over. Similarly, one
2127:   could consider a search over a small fraction of sky, e.g., a portion containing
2128:   the Galactic center or the disk.
2129: \item The formulae for operations counts, confidence levels etc. can
2130:   also be derived for case when the Hough transform~\cite{hough}
2131:   is used in the semi-coherent stages instead of the stack-slide method;
2132:    the optimization of multistage, hierarchical Hough-type searches would
2133: then proceed in the same way as developed here.
2134: \item We expect that the lessons learned in this paper will carry over to
2135:   searches for GW pulsars in low-mass X-ray binaries, which
2136:   are also a computationaly limited~\cite{vecchio}.
2137:   However the details are yet to be worked out.
2138: \item The problem of searching, in LISA data, for the inspiral signals
2139:   of stellar-mass compact objects captured by $\sim 10^6 M_{\odot}$ BHs
2140:   in galactic nuclei, is similar to the GW pulsar search problem, but
2141:   even more computationally challenging~\cite{emri}.
2142:   We expect that the lessons learned in this
2143:   paper will also be very useful in formulating and optimizing a
2144:   search algorithm for LISA capture sources.
2145: \item
2146: 
2147: In this paper we have tacitly assumed that the search is performed
2148: by a single computer or computing cluster.
2149: However, at least in the next few years,
2150: the most computationally intensive GW searches will be directed by
2151: \texttt{Einstein@Home}\cite{eah}, which relies on tens of thousands
2152: of individual participants donating their idle computing power.
2153: In this case, there might be additional constraints that we
2154: have not yet considered, relating to the rate at which data
2155: and intermediate results can be sent back and forth between the
2156: \texttt{Einstein@Home}\cite{eah} servers and participants' computers, how
2157: much storage is available for use on participants' computers, etc.
2158: We intend to study hierarchical searches in this context also,
2159: to see which if any of the lessons learned here must be modified for the
2160: \texttt{Einstein@Home} context.
2161: 
2162: 
2163: \end{itemize}
2164: %
2165: 
2166: \section*{Acknowledgements}
2167: 
2168: The authors thank Bruce Allen, Teviet Creighton, Maria Alessandra
2169: Papa, Reinhard Prix, Bernard Schutz, Xavier Siemens and Alicia Sintes
2170: for valuable discussions. BK acknowledges the hospitality of the
2171: University of the Balearic Islands in Spain.
2172: 
2173: \begin{appendix}
2174: 
2175: \section{Computational cost of the SFT method}
2176: \label{app:sftmethod}
2177: Here we estimate the computational cost (in floating point operations)
2178: of calculating the $\F$-statistic via the SFT method.
2179: This result is used in Sec.IV.C.
2180: 
2181: We begin by reviewing the details of the SFT method; our description
2182: closely follows that given in the documentation of the software
2183: package LIGO Algorithms
2184: Library~\cite{lscsoft}. Imagine
2185: that we wish to compute the $\F$-statistic for a data stretch of length
2186: $\Tcoh$.  Divide this data into
2187: M shorter segments of length $\Tsft = \Delta T/M$,
2188: each containing $N$ data points (so there are
2189: $MN$ data points within $\Tcoh$). The sampled values of
2190: $x(t)$ can then be written as $x_{\alpha j}$ where $0\leq \alpha < M$
2191: labels the segment and $0\leq j < N$ labels points within a
2192: segment. Eq.~(\ref{eq:Fa}) can then be discretized as follows:
2193: %
2194: \be
2195: F_a(\bmath{\lambda}) = \sum_{\alpha = 0}^{M-1}\sum_{j=0}^{N-1} a_{\alpha j}x_{\alpha
2196: j} e^{-i\Phi_{\alpha j}(\bmaths{\lambda})}\,
2197: \ee
2198: %
2199: and similarly for Eq.~(\ref{eq:Fb}).
2200: Let $\tilde{x}_{\alpha k}$ be the discrete Fourier transform of
2201: $x_{\alpha j}$ along the index $j$, so that
2202: %
2203: \be
2204: x_{\alpha j} = \frac{1}{N}\sum_{k=0}^{N-1}\tilde{x}_{\alpha k}e^{2\pi ijk/N}\,.
2205: \ee
2206: %
2207: Then if we approximate the amplitude
2208: modulation function $a(t)$ as constant over the short-time baseline $\Tsft$,
2209: the expression for $F_a$ becomes
2210: %
2211: \be
2212: F_a(\bmath{\lambda}) = \sum_{\alpha = 0}^{M-1}a_\alpha
2213: \sum_{k=0}^{N-1}\tilde{x}_{\alpha k} \left[
2214: \frac{1}{N}\sum_{j=0}^{N-1}\exp\left(\frac{2\pi i jk}{N} -
2215: i\Phi_{\alpha j}(\bmath{\lambda})\right) \right] \,.
2216: \ee
2217: %
2218: The short-time baseline $\Tsft$ is generally chosen so that neither
2219: pulsar spindown nor the Doppler effect causes the signal power to
2220: shift by more than, say, half a frequency bin.  Then we can find
2221: functions $A_{\alpha}(\vec{\lambda})$ and $B_{\alpha
2222:   k}(\bmath{\lambda})$ such that to a good approximation
2223: %
2224: \be
2225: \Phi_{\alpha j}(\vec{\lambda}) - \frac{2\pi jk}{N}  = A_{\alpha}(\vec{\lambda}) +
2226: \frac{B_{\alpha k}(\bmath{\lambda}) j}{N} \,.
2227: \ee
2228: %
2229: Thus we have
2230: %
2231: \ba
2232: &&\frac{1}{N}\sum_{j=0}^{N-1}\exp\left(\frac{2\pi i jk}{N} -
2233: i\Phi_{\alpha j}(\bmath{\lambda})\right) \nonumber \\&& =
2234: e^{-iA_{\alpha}(\vec{\lambda})} \left( \frac{1-e^{-iB_{\alpha
2235: k}(\bmaths{\lambda})}}{1-e^{-iB_{\alpha k}(\bmaths{\lambda})/N}} \right) \,.
2236: \ea
2237: %
2238: Next we assume $N$ is large enough that $1-e^{-iB_{\alpha
2239: k}(\bmaths{\lambda})/N} \approx iB_{\alpha k}(\bmath{\lambda})/N$; then
2240: %
2241: \be
2242: F_a = \sum_{\alpha =
2243: 0}^{M-1}a_\alpha e^{-iA_{\alpha}(\vec{\lambda})}\sum_{k=0}^{N-1} \tilde{x}_{\alpha
2244: k}P[B_{\alpha k}(\bmath{\lambda})]
2245: \ee
2246: %
2247: where
2248: %
2249: \ba
2250: P[x] &=& \frac{1}{N}\frac{1-e^{-ix}}{1-e^{-ix/N}} \nonumber
2251: \\ &\approx& \frac{\sin x}{x} -i\frac{1-\cos x}{x} \,.
2252: \ea
2253: %
2254: Now arises the great advantage of the SFT method: the function $P[x]$ is sharply peaked about $x=0$,
2255: so the sum over $k$
2256: can be approximated by retaining only a few terms:
2257: %
2258: \be \label{eq:Fa-LALDemod}
2259: F_a = \sum_{\alpha =
2260: 0}^{M-1}a_\alpha e^{-iA_{\alpha}(\vec{\lambda})}\sum_{k^\prime= -D}^{ D}
2261: \tilde{x}_{\alpha k^\prime}P[B_{\alpha k^\prime}(\bmath{\lambda})]
2262: \ee
2263: %
2264: where $k^\prime = k - k^\star$ and $k^\star$ is the value of $k$ such
2265: that $B_{\alpha k^\star}(\bmath{\lambda}) = 0$, and $D$ is the number of
2266: terms that we retain in the sum on either side of $k^\star$.
2267: It turns out that $D=16$ suffices to calculate the
2268: $\F$-statistic to within a few percent.
2269: 
2270: Eq.~(\ref{eq:Fa-LALDemod}) is our final approximation for $F_a$.
2271: Analogous expressions hold for $F_b$, and the final $\F$-statistic
2272: is calculated from $F_a$ and $F_b$ using Eq.~(\ref{eq:fstatdef}).
2273: Thus, with the SFT method, for each point ${\bf \lambda}$ in parameter
2274: space we need to calculate $A_\alpha(\vec{\lambda})$,
2275: $B_{\alpha k}(\bmath{\lambda})$, and the amplitude modulation functions,
2276: $a_\alpha$ and $b_\alpha$, and then to perform the sums in
2277: Eq.~(\ref{eq:Fa-LALDemod}). It is then easy to see that to calculate the
2278: $\F$-statistic for $n$ frequency bins, for a fixed value of
2279: $\vec{\lambda}$, the number of floating point operations required is
2280: roughly some $\textrm{constant}$ times $nMD$.   To estimate the constant,
2281: let $C_1$ be the cost of calculating $P[B_{\alpha k}]$ for each
2282: $\alpha$ and $k$ value.  Since multiplying two complex
2283: numbers requires $6$ operations, and adding two complex numbers requires $2$
2284: operations, we see that calculating the sum over $k^\prime$ in equation
2285: (\ref{eq:Fa-LALDemod}) requires
2286: %
2287: \be (C_1 + 6)(2D + 1) + 4D
2288: \ee
2289: %
2290: operations. Similarly, if $C_2$ is the cost of calculating $a_\alpha e^{-iA_{\alpha}}$ for
2291: every $\alpha$, then the cost of calculating $F_a$ is
2292: %
2293: \be M[(C_1 + 6)(2D + 1) + 4D] + MC_2 + 6M + 2(M-1) \,.\ee
2294: %
2295: Thus, to find
2296: $F_a$ and $F_b$ for every frequency bin requires $\approx 2\times
2297: (2C_1 + 16)DM$ operations.  Since the cost of combining $F_a$ and $F_b$ to
2298: get $\F$ is negligible compared to this, and assuming $C_1$ to be of
2299: order unity, we see that the operation count for
2300: calculating the $\F$-statistic
2301: for $n$ frequency bins is approximately
2302: %
2303: \be
2304: \sim 40 nMD\,.
2305: \ee
2306: %
2307: For the first stage in the hierarchical search, the
2308: $\F$-statistic is evaluated for $N^{(1)}N_{pc}^{(1)}f_\max\Tcoh$ bins.
2309: So, taking $D=16$ and using $M = \Tcoh^{(1)}/\Tsft$, the cost is
2310: %
2311: \be \label{eq:sftcostapp}
2312: \approx 640 N^{(1)}N_{pc}^{(1)}f_\max \frac{(\Tcoh^{(1)})^2}{\Tsft} \, .
2313: \ee
2314: %
2315: \noindent
2316: At higher stages, we evaluate the $\F$-statistic
2317: \be
2318: F^{(i-1)}
2319: \textrm{max}\left\{1,\frac{N_{pc}^\ib}{N_{pf}^{(i-1)}} \right\}
2320: \textrm{max}\left\{1,\frac{\Tcoh^\ib}{\Tcoh^{(i-1)}}\right\}\, N^{(i)}
2321: \ee
2322: times, so the operations count is
2323: \ba
2324: &&F^{(i-1)}
2325: \textrm{max}\left\{1,\frac{N_{pc}^\ib}{N_{pf}^{(i-1)}} \right\}
2326: \textrm{max}\left\{1,\frac{\Tcoh^\ib}{\Tcoh^{(i-1)}}\right\}  \nonumber \\
2327: && \ \ \times \left[\frac{640 N^\ib\Tcoh^\ib}{\Tsft} \right] \,.
2328: \ea
2329: 
2330: \end{appendix}
2331: 
2332: 
2333: \begin{thebibliography}{}
2334: 
2335: \bibitem{bccs}   P.R.~Brady, T.~Creighton, C.~Cutler, and B.F.~Schutz,
2336:  \textit{Phys. Rev.} {\bf D57} 2101 (1998).
2337: 
2338: \bibitem{bc} P.R.~Brady and T.~Creighton,
2339: \textit{Phys. Rev.} {\bf D61}, 082001 (2000).
2340: 
2341: \bibitem{jks}   P.~Jaranowski, A.~Kr\'olak, and B.F.~Schutz,
2342:  \textit{Phys. Rev.} {\bf D58} 063001  (1998).
2343: 
2344: \bibitem{S1:pulsar} B.~Abbott et al. (The LIGO Scientfic
2345: Collaboration),
2346: \textit{Phys. Rev.} \textbf{D69}, 082004 (2004).
2347: 
2348: \bibitem{ip} R.~Prix and Y.~Itoh, \texttt{gr-qc/0504006}.
2349: 
2350: \bibitem{hough} B.~Krishnan, A.M.~Sintes, M.A.~Papa, B.F.~Schutz,
2351: S.~Frasca, and C.~Palomba,
2352: \textit{Phys. Rev.} \textbf{D70}, 082001 (2004).
2353: 
2354: \bibitem{lscsoft} The software can be found on the following websites:
2355:   \texttt{http://www.lsc-group.phys.uwm.edu/daswg/projects/\\lal.html}
2356:   and \texttt{http://www.lsc-group.phys.uwm.edu/\\daswg/projects/lalapps.html}
2357: 
2358: \bibitem{owen} B.~Owen, \textit{Phys. Rev.} \textbf{D53}, 6749 (1996).
2359: 
2360: \bibitem{jhc} J.H.~Conway and N.J.A.~Sloane, \textit{Sphere Packings,
2361: Lattices and Groups}, Springer (1991).
2362: 
2363: \bibitem{abjk} P.~Astone, K.M.~Borkowski, P.~Jaranowski, and
2364: A.~Kr\'olak,
2365: \textit{Phys. Rev.} {\bf D65} 042003 (2003).
2366: 
2367: \bibitem{mrrtt} N.~Metropolis, A.~Rosenbluth, M.~Rosenbluth,
2368:   A.~Teller, E.~Teller, \textit{J. Chem. Phys.} \textbf{21}, 1087
2369:   (1953).
2370: 
2371: \bibitem{kgv} S.~Kirkpatrick, C.D.~Gelatt Jr., and M.P.~Vecchi,
2372:   \textit{Science} \textbf{220}, No. 4598, 671 (1983).
2373: 
2374: \bibitem{nm} J.A.~Nelder and R.~Mead, \textit{Comput. J.} \textbf{7},
2375:   308-313, 1965.
2376: 
2377: \bibitem{nr} W.H.~Press, \textit{Numerical Recipes in C}, Cambridge
2378:   University Press (2002).
2379: 
2380: \bibitem{gsl} \texttt{http://www.gnu.org/software/gsl/}.
2381: 
2382: \bibitem{vecchio} S.V~Dhurandhar and A.~Vecchio,
2383:  \textit{Phys. Rev.} {\bf D63} 122001 (2001).
2384: 
2385: \bibitem{eah}
2386:   \texttt{http://www.physics2005.org/events/einsteinathome/\\\#einsteinathome}
2387: 
2388: \bibitem{emri}  J.\ R.\ Gair, L.\ Barack, T.\ Creighton,
2389: C.\ Cutler, S.\ L.\ Larson, E.\ S.\ Phinney, and M.\ Vallisneri,
2390: Proceedings of the Eighth GWDAW Meeting (Milwaukee, 2003); gr-qc/0405137.
2391: 
2392: \end{thebibliography}
2393: 
2394: 
2395: \end{document}
2396: 
2397: 
2398: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%5
2399: