q-bio0611029/PS.tex
1: \documentclass[11pt]{article}
2: \usepackage{latexsym, a4wide}
3: \usepackage{amsmath, rotating, color}
4: \usepackage{amsfonts,amssymb, amsthm}
5: %\usepackage{etex}
6: \usepackage{pictexwd}
7: %\usepackage[notcite,notref]{showkeys} % shows labels
8: %\textheight229mm
9: %\textwidth140mm
10: % max. 256 mm
11: 
12: \newcommand{\dickm}[1]{\text{\boldmath ${#1}$}}
13: \newcommand{\sO}{\mathcal{O}}
14: \newcommand{\so}{\text{\scriptsize$\mathcal{O}$}}
15: 
16: \newtheorem{theorem}{Theorem}
17: \newtheorem{proposition}{Proposition}[section]
18: \newtheorem{lemma}[proposition]{Lemma}
19: \newtheorem{corollary}[proposition]{Corollary}
20: \newtheorem{definition}[proposition]{Definition}
21: \theoremstyle{definition}
22: \newtheorem{remark}[proposition]{Remark}
23: \newtheorem{example}[proposition]{Example}
24: \newtheoremstyle{step}{3pt}{0pt}{}{}{\bf}{}{.5em}{}
25: \theoremstyle{step} \newtheorem{step}{Step}
26: \newcommand{\bs}[2]{\setcounter{step}{#1}{\addtocounter{step}{-1}}\begin{step}(#2)\end{step}}
27: \newtheorem{case}{Case}
28: \pagestyle{headings}
29: \numberwithin{equation}{section}
30: 
31: \setcounter{secnumdepth}{2}
32: \newcommand{\ml}[1]{\mbox{}\marginpar{\raggedleft\hspace{0cm}#1}}
33: \setcounter{tocdepth}{2}
34: %\renewcommand{\thefootnote}{\alph{footnote}}
35: \newcommand\unnumberedfootnote[1]{ %
36:         \let\temp=\thefootnote %
37:         \renewcommand{\thefootnote}{}%
38:         \footnote{#1}%
39:         \let\thefootnote=\temp%
40:         \addtocounter{footnote}{-1}}
41: 
42: \newcommand{\ij}[2]{#1\text{{\bf ---\!\!---}}#2}
43: 
44: 
45: %\newcommand{\ij}[2]{\text{\parbox{1cm}{\beginpicture
46: %\setcoordinatesystem units <.5cm,.5cm>
47: %\setplotarea x from 0 to 1, y from 0.2 to 1.1
48: %\plot 0.5 0.5 1.5 0.5 /
49: %\put{$#1$} [rC] at 0.2 0.5
50: %\put{$#2$} [lC] at 1.8 0.5 \endpicture}}}
51: 
52: \newcommand{\fjk}{\text{\parbox{2cm}{\beginpicture
53: \setcoordinatesystem units <.5cm,0.5cm>
54: \setplotarea x from 0 to 2, y from 0 to 2
55: \plot 1 0.5 1 1 0.5 1.5 1 1 1.5 1.5 /
56: \put{$\bullet$} [cC] at 1 0.5
57: \put{$\pi'_{(j)}$} [cC] at 0.1 2
58: \put{$\pi'_{(k)}$} [cC] at 1.8 2 \endpicture}}}
59: 
60: \newcommand{\ljk}[2]{\text{\parbox{2cm}{\beginpicture
61: \setcoordinatesystem units <1mm,1mm>
62: \setplotarea x from 5 to 15, y from 0 to 15
63: \plot 5 5 2.5 7.5 5 5 7.5 7.5 /
64: %\multiput{\tiny $\bullet$} at 5 5 *5  1 1/
65: \put{$#1$}[rC] at 2 8
66: \put{$#2$}[lC] at 7 8 \endpicture}}}
67: 
68: \newcommand{\lk}[1]{\text{\parbox{2cm}{\beginpicture
69: \setcoordinatesystem units <.5cm,.5cm>
70: \setplotarea x from 1 to 2, y from 0 to 2
71: \plot 1 0.5 1 1 1.5 1.5 /
72: \put{$\bullet$}[cC] at 1 0.5
73: \put{$#1$}[lC] at 1.7 1.8
74:  \endpicture}}}
75: 
76: \newcommand{\one}[1]{\text{\parbox{1cm}{\beginpicture
77: \setcoordinatesystem units <.1cm,.1cm>
78: \setplotarea x from 0 to 6, y from 3 to 13
79: \plot 3 3 3 8 /
80: \put{\footnotesize#1} [cC] at 3 11
81: \multiput{\tiny $\bullet$} at 3 3 *50  0 0.1 /
82: \endpicture}}}
83: 
84: \newcommand{\Y}{\text{\parbox{1.5cm}{
85: \beginpicture
86: \setcoordinatesystem units <0.1cm, 0.1cm>
87: \setplotarea x from 0 to 20, y from 3 to 17
88: \plot 10 6 10 10 7 13 10 10 13 13 /
89: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
90: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
91: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
92: %\put{$\bullet$}[Cc]  at 10 6
93: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
94: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
95: \endpicture}}}
96: 
97: \newcommand{\Yup}{\text{\parbox{1.5cm}{
98: \beginpicture
99: \setcoordinatesystem units <0.1cm, 0.1cm>
100: \setplotarea x from 0 to 20, y from 3 to 17
101: \plot 10 6 10 10 7 13 10 10 13 13 /
102: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
103: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
104: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
105: %\put{$\bullet$}[Cc]  at 10 6
106: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
107: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
108: \endpicture}}}
109: 
110: \newcommand{\Yri}{\text{\parbox{1.5cm}{
111: \beginpicture
112: \setcoordinatesystem units <0.1cm, 0.1cm>
113: \setplotarea x from 0 to 20, y from 3 to 17
114: \plot 10 6 10 10 7 13 10 10 13 13 /
115: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
116: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
117: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
118: %\put{$\bullet$}[Cc]  at 10 6
119: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
120: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
121: \endpicture}}}
122: 
123: \newcommand{\Yle}{\text{\parbox{1.5cm}{
124: \beginpicture
125: \setcoordinatesystem units <0.1cm, 0.1cm>
126: \setplotarea x from 0 to 20, y from 3 to 17
127: \plot 10 6 10 10 7 13 10 10 13 13 /
128: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
129: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
130: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
131: %\put{$\bullet$}[Cc]  at 10 6
132: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
133: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
134: \endpicture}}}
135: 
136: \newcommand{\Ybottom}{\text{\parbox{1.5cm}{
137: \beginpicture
138: \setcoordinatesystem units <0.1cm, 0.1cm>
139: \setplotarea x from 0 to 20, y from 3 to 17
140: \plot 10 6 10 10 7 13 10 10 13 13 /
141: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
142: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
143: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
144: %\put{$\bullet$}[Cc]  at 10 6
145: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
146: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
147: \endpicture}}}
148: 
149: \newcommand{\Yupri}{\text{\parbox{1.5cm}{
150: \beginpicture
151: \setcoordinatesystem units <0.1cm, 0.1cm>
152: \setplotarea x from 0 to 20, y from 3 to 17
153: \plot 10 6 10 10 7 13 10 10 13 13 /
154: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
155: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
156: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
157: %\put{$\bullet$}[Cc]  at 10 6
158: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
159: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
160: \endpicture}}}
161: 
162: \newcommand{\Yuple}{\text{\parbox{1.5cm}{
163: \beginpicture
164: \setcoordinatesystem units <0.1cm, 0.1cm>
165: \setplotarea x from 0 to 20, y from 3 to 17
166: \plot 10 6 10 10 7 13 10 10 13 13 /
167: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
168: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
169: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
170: %\put{$\bullet$}[Cc]  at 10 6
171: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
172: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
173: \endpicture}}}
174: 
175: \newcommand{\Yall}{\text{\parbox{1.5cm}{
176: \beginpicture
177: \setcoordinatesystem units <0.1cm, 0.1cm>
178: \setplotarea x from 0 to 20, y from 3 to 17
179: \plot 10 6 10 10 7 13 10 10 13 13 /
180: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
181: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
182: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
183: %\put{$\bullet$}[Cc]  at 10 6
184: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15
185: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15 
186: \endpicture}}}
187: 
188: 
189: 
190: 
191: \begin{document}
192: \title{\LARGE Approximating genealogies for partially linked neutral
193:   loci under a selective sweep}
194: %\runtitle{Partially linked loci under a selective sweep}
195: 
196: \thispagestyle{empty}
197: 
198: \author{{\sc by P. Pfaffelhuber\thanks{Corresponding author; Tel: (+49)-89-74-2180-108; email: p.p@lmu.de} and A. Studeny} \\[2ex]
199:   \emph{Ludwig-Maximilian University Munich} \vspace*{-7ex}} \date{}
200: 
201: \maketitle
202: \unnumberedfootnote{\emph{AMS 2000 subject classification.} 92D15
203:   (Primary), 60J80, 60J85, 60K37, 92D10 (Secondary).}
204: 
205: \unnumberedfootnote{\emph{Keywords and phrases.} Selective sweep,
206:   genetic hitchhiking, diffusion approximation, Yule process,
207:   ancestral recombination graph, random background}
208: 
209: 
210: \begin{abstract}
211: \noindent
212: Consider a genetic locus carrying a strongly beneficial allele which
213: has recently fixed in a large population. As strongly beneficial
214: alleles fix quickly, sequence diversity at partially linked neutral
215: loci is reduced. This phenomenon is known as a \emph{selective sweep}.
216: 
217: The fixation of the beneficial allele not only affects sequence
218: diversity at single neutral loci but also the joint allele
219: distribution of several partially linked neutral loci. This
220: distribution can be studied using the ancestral recombination graph
221: for samples of partially linked neutral loci during the selective
222: sweep. To approximate this graph, we extend recent work by
223: \cite{SchweinsbergDurrett2005, EtheridgePfaffelhuberWakolbinger2006}
224: using a marked Yule tree for the genealogy at a single neutral locus
225: linked to a strongly beneficial one.
226: 
227: We focus on joint genealogies at two partially linked neutral loci in
228: the case of large selection coefficients $\alpha$ and recombination
229: rates $\rho=\mathcal O(\alpha/\log\alpha)$ between loci.  Our approach
230: leads to a full description of the genealogy with accuracy of
231: $\mathcal O((\log \alpha)^{-2})$ in probability. As an application, we
232: derive the expectation of Lewontin's $D$ as a measure for non-random
233: association of alleles.
234: \end{abstract}
235: 
236: \section{Introduction}
237: The model of \emph{selective sweeps}, also known as \emph{genetic
238:   hitchhiking}, introduced by Maynard-Smith and Haigh in
239: \cite{MaynardSmithHaigh1974}, is the starting point for a large body
240: of both empirical and theoretical population genetic studies
241: (\cite{Nurminsky2005}). It predicts that sequence diversity is reduced
242: close to a strongly selected locus on a recombining genome near the
243: time of fixation of the beneficial allele. Theoretical studies aim at
244: describing these patterns of genetic diversity in detail while
245: empirical work uses this prediction to identify genes under selection.
246: 
247: If a species or a population adapts to its environment, several genes
248: might be under strong selection. Moreover, if the function of genes
249: were known, we would have predictions as to which genes are
250: responsible for the adaptive process. Unfortunately, functional
251: information is scarce. Without functional knowledge and in the
252: presence of recombination, the model of selective sweeps helps to
253: identify candidate genes affected by recent selective pressures.
254: Genome scans are carried out for a sample of individuals, which show
255: patterns of sequence diversity at lots of marker loci in the whole
256: genome (\cite{NielsenEtAl2005}). If a marker shows low diversity,
257: statistical tests help to decide if a gene under selection is located
258: nearby (\cite{KimStephan2002, LiStephan2005}).
259: 
260: Most theoretical studies of selective sweeps have focused on a model
261: with one selective and one partially linked neutral locus
262: (\cite{MaynardSmithHaigh1974, StephanWieheLenz1992,
263:   KaplanHudsonLangley1989, Barton1998, SchweinsbergDurrett2005,
264:   EtheridgePfaffelhuberWakolbinger2006}). This simple model already
265: describes the reduction in sequence diversity. However genetic data
266: are frequently available for many partially linked loci. This raises
267: the question of whether selective sweeps also generate distinct
268: patterns of multi-locus allele frequencies. We will follow
269: \cite{StephanSongLangley2006} and study a three locus model with one
270: selective and two partially linked neutral loci. Using this model, it
271: is possible to study the non-random association of allelic types at
272: the two neutral loci, which is usually called \emph{linkage
273:   disequilibrium}.
274: 
275: % \smallskip
276: 
277: % Since the invention of the hitchhiking model by
278: % \cite{MaynardSmithHaigh1974} several techniques were developed to
279: % study selective sweeps. Most influential for simulations was the
280: % approach of \cite{KaplanHudsonLangley1989} who used a structured
281: % coalescent for the ancestry of the neutral locus. Here, the two
282: % 'structures' are given by individuals carrying the beneficial and the
283: % wild-type, respectively. In this model, migration between the
284: % structures happens due to recombination events between the selected
285: % and the neutral locus (see also \cite{BravermanEtAl1995,
286: %   KimStephan2002}). As selection is assumed to be strong, the
287: % frequency of the selected allele can be assumed to be deterministic.
288: % Different from this analysis are diffusion models, which have been
289: % used in both the two locus model (\cite{StephanWieheLenz1992}) as well
290: % as the three locus model (\cite{StephanSongLangley2006}).
291: 
292: An influential idea in the analysis of selective sweeps was to study
293: approximate genealogies describing relationships between the
294: individuals in a sample from the population. Studying genealogies at
295: the selected site started with \cite{KaplanDardenHudson1988} and was
296: carried further to linked neutral loci in
297: \cite{KaplanHudsonLangley1989}.
298: 
299: The genealogy at a single neutral locus can be constructed as a
300: \emph{structured coalescent}.  Here, the beneficial and wild-type
301: allele at the selected locus form two subpopulations. Their sizes are
302: determined by the frequency path of the beneficial allele during the
303: selective sweep. Assume a new gamete is built (forward in time) by
304: recombination of a beneficial allele at the selected locus and a
305: neutral variant linked to a wild-type. Following the neutral variant
306: backward in time leads to a migration event from the beneficial to the
307: wild-type background. Therefore, recombination acts as migration
308: between the beneficial and the wild-type backgrounds.
309: 
310: Genealogies of two or more loci can be constructed using the ancestral
311: recombination graph (\cite{Hudson1983, GriffithsMarjoram1997}).
312: Therefore, we will construct ancestries of two partially linked
313: neutral loci under a selective sweep by a \emph{structured ancestral
314:   recombination graph}.  As in the case of only one locus, the two
315: subpopulations are distinguished by the beneficial and wild-type
316: allele at the selected locus, respectively. This ancestral
317: recombination graph will serve as the exact model for genealogies at
318: partially linked loci under a selective sweep. However, an exact
319: analysis is hard to obtain, because the graph must be conditioned on
320: the random frequency path of the beneficial allele.
321: 
322: An alternative approach uses a two-step procedure for genealogies at
323: the selective and the neutral locus. First, the (approximate) genealogy at
324: the selective locus is generated and second, the genealogy at the
325: neutral locus is added, which might differ due to recombination.  Two
326: approximate genealogies at the selected site have been proposed.
327: First, a star-like genealogy, which means that the most recent common
328: ancestor of all pairs in the population is the individual which
329: carried the beneficial allele first (\cite{SchweinsbergDurrett2005,
330:   NielsenEtAl2005}). Second, a Yule process, i.e., a pure birth
331: process, which allows for coalescences also during the selective sweep
332: (\cite{SchweinsbergDurrett2005,
333:   EtheridgePfaffelhuberWakolbinger2006}). It was shown in
334: \cite[Theorems 1.1, 1.2]{SchweinsbergDurrett2005} that the Yule
335: process approximation is more exact than the star-like approximation.
336: Therefore, we will use this Yule process approximation for the
337: genealogy at the selected site to study the three locus model of
338: \cite{StephanSongLangley2006} for selective sweeps. We will show that
339: the analysis carried out in
340: \cite{EtheridgePfaffelhuberWakolbinger2006} in the two locus case can
341: be extended to the three locus case (Theorem \ref{T}). Moreover, the
342: approximation by a Yule process can be used to calculate
343: characteristics of linkage disequilibrium explicitly (Theorem
344: \ref{T2}).
345: 
346: \section{The model}
347: Consider a beneficial allele which enters a population of (haploid)
348: size $N$ at time $t=0$ and has a selective advantage of $s$ with
349: respect to the wild-type allele. Set $\alpha=sN$, which is called the
350: scaled selection coefficient. As selection can only be detected if the
351: beneficial allele fixes in the population, we condition on fixation of
352: the beneficial allele and let $T$ be the (random) time of fixation.
353: 
354: \smallskip
355: 
356: Assume reproduction in the population follows a Wright-Fisher model,
357: or, more generally, a Cannings model with individual offspring
358: variance 1. In the limit of infinite $N$ and a time rescaling in units
359: of $N$ generations, the frequency path of the beneficial allele is the
360: solution of the SDE
361: \begin{equation}\label{eq:SDE}
362:   dX = \alpha X(1-X)\coth(\alpha X)dt + \sqrt{X(1-X)}dW,
363: \end{equation}
364: with a standard Brownian motion $W$ and $X_0=0$. This diffusion arises
365: as $h$-transform of the process describing the unconditional frequency
366: path with the fixation probability of the beneficial allele as a
367: harmonic function and has $0$ as an entrance boundary. (See e.g.
368: \cite{Griffiths2003}, p. 245 and
369: \cite{EtheridgePfaffelhuberWakolbinger2006}, (2.1).)
370: 
371: Two neutral loci are partially linked to the selected locus. For
372: simplicity, we refer to the two neutral loci as the \emph{l}eft and
373: \emph{r}ight neutral locus, denoted by $L$ and $R$. As illustrated in
374: Figure \ref{fig:geom}, the selected locus lies either (i) outside or
375: (ii) in between the neutral loci. All other possible geometries are
376: equivalent to either (i) or (ii) because of the symmetry in the model.
377: 
378: Recombination can break up the association of these three loci. (We
379: only consider recombination as simple crossing over. Gene conversion
380: is not considered in our model.) As we take a limiting infinite
381: population and rescale time by a factor of $N$, we have to consider
382: scaled recombination rates. These are different for the two
383: geometries. For geometry (i) we denote the recombination rates between
384: the selective and neutral loci by $\rho_{SL}$, $\rho_{LR}$ and for
385: geometry (ii) by $\rho_{LS}$, $\rho_{SR}$ respectively.
386: 
387: \begin{figure}
388: $$
389: (i): \text{
390: \parbox{2cm}{\beginpicture
391: \setcoordinatesystem units <1.5cm,1.3cm>
392: \setplotarea x from 0 to 2, y from 0 to 1
393: \plot 0 0.5 2 0.5 /
394: \plot 0 0.4 0 0.6 /
395: \plot 0.6 0.4 0.6 0.6 /
396: \plot 2 0.4 2 0.6 /
397: \put{$S$} [cC] at 0 0.2
398: \put{$L$} [cC] at 0.6 0.2
399: \put{$R$} [cC] at 2 0.2
400: \plot 0 0.7 0 0.9 /
401: \plot 0.6 0.7 0.6 0.9 /
402: \plot 2 0.7 2 0.9 /
403: \multiput {\tiny $\bullet$} at 0 .5 *200 .01 .0 /
404: \arrow <0.2cm> [0.375,1] from 0 .8 to .6 .8
405: \arrow <0.2cm> [0.375,1] from 0.6 .8 to 0 .8
406: \arrow <0.2cm> [0.375,1] from 2 .8 to .6 .8
407: \arrow <0.2cm> [0.375,1] from 0.6 .8 to 2 .8
408: \put{$\rho_{SL}$} [cC] at .3 1
409: \put{$\rho_{LR}$} [cC] at 1.3 1
410: \endpicture}} 
411: \qquad\qquad
412: (ii): \text{
413: \parbox{2cm}{\beginpicture
414: \setcoordinatesystem units <1.5cm,1.3cm>
415: \setplotarea x from 0 to 2, y from 0 to 1
416: \plot 0 0.5 2 0.5 /
417: \plot 0 0.4 0 0.6 /
418: \plot 1.2 0.4 1.2 0.6 /
419: \plot 2 0.4 2 0.6 /
420: \put{$L$} [cC] at 0 0.2
421: \put{$S$} [cC] at 1.2 0.2
422: \put{$R$} [cC] at 2 0.2
423: \plot 0 0.7 0 0.9 /
424: \plot 1.2 0.7 1.2 0.9 /
425: \plot 2 0.7 2 0.9 /
426: \multiput {\tiny $\bullet$} at 0 .5 *200 .01 .0 /
427: \arrow <0.2cm> [0.375,1] from 0 .8 to 1.2 .8
428: \arrow <0.2cm> [0.375,1] from 1.2 .8 to 0 .8
429: \arrow <0.2cm> [0.375,1] from 2 .8 to 1.2 .8
430: \arrow <0.2cm> [0.375,1] from 1.2 .8 to 2 .8
431: \put{$\rho_{LS}$} [cC] at .6 1
432: \put{$\rho_{SR}$} [cC] at 1.5 1
433: \endpicture}} 
434: $$
435: \caption{\label{fig:geom}The two possible geometries of the selected
436:   ($S$) and the two neutral loci ($L$ and $R$). The scaled
437:   recombination rates between loci are given by $\rho_{SL}, \rho{LR},
438:   \rho_{LS}$ and $\rho_{SR}$.}
439: \end{figure}
440: 
441: The two linked neutral loci do not affect the frequency path of the
442: beneficial allele. In contrast, neutral variants which are linked to
443: the beneficial allele at the beginning of the selective sweep rise in
444: frequency.  Looking backward in time from the time $T$ of fixation, we
445: can trace back the history of a finite sample at all three loci. As
446: the neutral loci are linked to the selected one, the genealogies at
447: all three loci are correlated.
448: 
449: For the construction of the ancestral recombination graph relating all
450: loci, time is running backward, so we set $\beta = T-t$. Conditioned
451: on a frequency path $\mathcal X=(X_t)_{0\leq t\leq T}$, given by
452: \eqref{eq:SDE}, we will describe the ancestral recombination graph as
453: a partition-valued process $\xi^{\mathcal X} =(\xi^{\mathcal
454:   X}_\beta)_{0\leq \beta\leq T}$.
455: 
456: \smallskip
457: 
458: Assume we take a sample from the population at time $T$. Every
459: individual in the sample carries one $L$ and one $R$-locus. Of all
460: $L$- and $R$-loci present in the sample we want to trace back a number
461: $\ell$ of $L$- and $r$ of $R$-loci. These loci are represented by sets
462: $\dickm\ell$ for the $L$- and $\dickm r$ for the $R$-loci. So, $\ell
463: := |\dickm \ell|, r:= |\dickm r|$. To define the state space of the
464: structured ancestral recombination graph denote by $\mathcal P_A$ the
465: set of partitions of $A$ for a finite set $A$ and define
466: $$ \mathcal P'_{\dickm \ell\,\cup\,\dickm r} := \{\xi=(\xi^B, \xi^b), \xi^B\cup\xi^b \in\mathcal
467: P_{\dickm\ell \cup \dickm r}, \xi^B\cap\xi^b=\varnothing\}.$$ The
468: coordinates $\xi^B$ and $\xi^b$ contain partition elements located in
469: the beneficial and the wild-type background, respectively. For
470: $\xi\in\mathcal P'_{\dickm \ell \cup \dickm r}$ we write $\xi_{(j)}$
471: for the partition element containing $j\in\dickm \ell\cup \dickm r$.
472: 
473: The ancestral process is started at the time $\beta=0$ of fixation of
474: the beneficial allele. So, the sample of $L$- and $R$-loci is linked
475: to the beneficial allele. Therefore, we start the process in
476: $\xi^{\mathcal X}_0=(\pi, \varnothing)$ for some $\pi\in \mathcal
477: P_{\dickm \ell\,\cup\,\dickm r}$. Assume the state at time $\beta$ is
478: $\xi^{\mathcal X}_\beta=(\xi^B, \xi^b)\in\mathcal P'_{\dickm
479:   \ell\,\cup\,\dickm r}$. For $j\in\dickm \ell \cup \dickm r$ the
480: partition element which contains $j\in\dickm \ell$, i.e.,
481: $(\xi^{\mathcal X}_\beta)_{(j)}$, encodes the set of $L$- and
482: $R$-loci, taken from the population at time $T$, which have the same
483: ancestor as $j$ at time $T-\beta$.  Usually we will study the
484: genealogy of $n$ pairs of $L$- and $R$-loci. In this case set $\dickm
485: \ell:=\{1,\ldots, n\}$ and $\dickm r:=\{n+1,\ldots, 2n\}$ and start
486: the process with $\pi = \{\{1,n+1\}, \ldots, \{n,2n\}\}$.
487: 
488: The dynamics of the process is given as follows: Coalescence events
489: occur for lines in the beneficial and the wild-type background with
490: pair coalescence rate $1/X_{T-\beta}$ and $1/(1-X_{T-\beta})$ at time
491: $\beta$, respectively. So, given $\xi^{\mathcal
492:   X}_\beta=(\xi^B,\xi^b)$ with $\xi^B=\{\xi^B_1, \ldots,
493: \xi^B_{|\xi^B|}\}$ and $\xi^b=\{\xi^b_1, \ldots, \xi^b_{|\xi^b|}\}$
494: transitions occur for $1\leq j\neq k\leq |\xi^B|$ and $1\leq j'\neq
495: k'\leq |\xi^b|$ from $(\xi^B, \xi^b)$ to
496: \begin{equation}\label{eq:coal}
497: \begin{aligned}
498:   &\left((\xi^B\setminus \{\xi^B_j, \xi^B_k\}) \cup \{\xi_j^B\cup
499:     \xi^B_k\}, \xi^b)\right) &&
500:   \text{ with rate }\frac{1}{X_{T-\beta}},  & \quad(1)\\
501:   & \left((\xi^B,(\xi^b\setminus \{\xi^b_{j'}, \xi^b_{k'}\}) \cup
502:     \{\xi_{j'}^b\cup \xi^b_{k'}\})\right) && \text{ with rate
503:     }\frac{1}{1-X_{T-\beta}}, & \quad(2)
504: \end{aligned}
505: \end{equation}
506: respectively. For transitions in the process $\xi^{\mathcal X}$ due to
507: recombination we focus on geometry (i) first. A recombination event
508: hits one line between the $S$ and the $L$ locus with rate
509: $\rho_{SL}$ and between the $L$ and the $R$ locus with rate
510: $\rho_{LR}$. If a recombination event occurs between the $S$ and the
511: $L$ locus, it may be that both recombining chromosomes carry the same
512: allele at the $S$ locus. This gives a recombination event which cannot
513: be seen effectively and we ignore it in the process $\xi^{\mathcal
514:   X}$. All other recombination events must be modeled. If
515: $\xi_\beta^{\mathcal X}=(\xi^B,\xi^b)$ with $\xi^B=\{\xi^B_1, \ldots,
516: \xi^B_{|\xi^B|}\}$ and $\xi^b=\{\xi^b_1, \ldots, \xi^b_{|\xi^b|}\}$,
517: transitions occur for $1\leq j\leq |\xi^B|$ and $1\leq k\leq |\xi^b|$
518: from $(\xi^B, \xi^b) $ to
519: \begin{equation}\label{eq:rec1}
520: \begin{aligned}
521:   &  \left(\xi^B\setminus\{\xi^B_j\}, \xi^b\cup \{\xi^B_j\}\right) && \text{ with rate }\rho_{SL}(1-X_{T-\beta}) & \qquad (3_i) \\
522:   &  \left((\xi^B\setminus\{\xi^B_j\}) \cup \{\xi^B_j\cap \dickm \ell\}, \xi^b\cup \{\xi^B_j\cap\dickm r\} \right) && \text{ with rate }\rho_{LR}(1-X_{T-\beta}) & (4_i)\\
523:   &  \left((\xi^B\setminus \{\xi^B_j\}) \cup \{\xi^B_j\cap \dickm \ell, \xi^B_j\cap \dickm r\}, \xi^b\}\right)  && \text{ with rate }\rho_{LR}X_{T-\beta} & (5_i)\\
524:   &  \left(\xi^B, (\xi^b \setminus\{\xi^b_k\}) \cup \{\xi^b_k\cap \dickm \ell, \xi^b_k\cap\dickm r\}\right) && \text{ with rate }\rho_{LR}(1-X_{T-\beta}) & (6_i)\\
525:   &  \left(\xi^B\cup \{\xi^b_k\}, \xi^b\setminus\{\xi^b_k\}\right) && \text{ with rate }\rho_{SL}X_{T-\beta} & (7_i)\\
526:   & \left(\xi^B\cup \{\xi^b_k\cap\dickm r\}, (\xi^b\setminus\{\xi^b_k\}) \cup
527:   \{\xi^b_k\cap \dickm \ell\}\right) && \text{ with rate
528:   }\rho_{LR}X_{T-\beta}. & (8_i)
529: \end{aligned}
530: \end{equation}
531: Here, $(3_i)$ encodes a recombination event which takes a pair of
532: linked $L$- and $R$-loci from the beneficial to the wild-type
533: background; an event ($4_i$) separates the $R$-locus of a line and
534: takes it to the wild-type background; by $(5_i)$ the $L$ and $R$ loci
535: of a line in the beneficial background are split but remain both in
536: the same background; $(6_i)$ describes the same transition for a line
537: in the wild-type background.  The transitions $(7_i)$ and $(8_i)$
538: describe the back-recombination of loci into the beneficial
539: background.
540: 
541: \begin{example}
542:   An example displaying the dynamics of the process $\xi^{\mathcal X}$
543:   for geometry (i) is shown in Figure \ref{ancrecgraph}.  The sets of
544:   $L$- and $R$-loci are $\dickm\ell = \{1,2,3\}$ and $\dickm
545:   r=\{4,5,6\}$, respectively. The starting partition is $\xi^{\mathcal
546:     X}_0=(\pi,\varnothing)$ with $\pi = \{\{1,4\}, \{2,5\},
547:   \{3,6\}\}$. Several kinds of events can happen; coalescences in the
548:   beneficial background, i.e., an event (1), recombinations which
549:   leave the two neutral loci together but change the allele at the
550:   selected site, i.e., an event $(3_i)$ and recombination events which
551:   split the two neutral loci. The last kind of event may either bring
552:   one of the two neutral loci in a different background, $(4_i)$, or
553:   split a line within the beneficial background, $(5_i)$, or split a
554:   line in the wild-type background, $(6_i)$. The final partition is
555:   $\xi^{\mathcal X}_T = (\xi^B_T, \xi^b_T)$ with $\xi^B_T =
556:   \{\{1,2\}\}$, $\xi^b_T= \{\{3\}, \{4\}, \{5\}, \{6\}\}$.
557: \end{example}
558: 
559: For geometry (ii) we have (rescaled) recombination rates $\rho_{LS}$
560: and $\rho_{SR}$ between the left neutral and the selective and the
561: right and the selective locus, respectively. Here, transitions occur
562: from $(\xi^B, \xi^b)$ to
563: \begin{equation}\label{eq:rec2}
564: \begin{aligned}
565:   &\left((\xi^B\setminus\{\xi^B_j\})\cup\{\xi_j^B\cap\dickm r\}, \xi^b\cup\{\xi_j^B\cap \dickm \ell\} \right) && \text{ with rate }\rho_{LS}(1-X_{T-\beta}) &  \quad (3_{ii})\\
566:   &\left((\xi^B\setminus\{\xi^B_j\})\cup\{\xi_j^B\cap\dickm \ell\}, \xi^b\cup\{\xi_j^B\cap \dickm r\} \right) && \text{ with rate }\rho_{SR}(1-X_{T-\beta}) & (4_{ii})\\
567:   &\left((\xi^B\setminus\{\xi^B_j\}) \cup \{\xi^B_j\cap \dickm \ell, \xi^B_j\cap \dickm r\}, \xi^b\right) && \text{ with rate }(\rho_{LS} + \rho_{SR})X_{T-\beta} & (5_{ii})\\
568:   &\left(\xi^B, (\xi^b\setminus\{\xi^b_k\}) \cup \{\xi^b_k\cap \dickm \ell, \xi^b_k\cap \dickm r\}\right) && \text{ with rate }(\rho_{LS} + \rho_{SR})(1-X_{T-\beta}) & (6_{ii})\\
569:   &\left(\xi^B\cup\{\xi_k^b\cap \dickm \ell\}, (\xi^b\setminus\{\xi^b_k\})\cup\{\xi_k^b\cap\dickm r\}\right)  && \text{ with rate }\rho_{LS}X_{T-\beta} & (7_{ii})\\
570:   &\left(\xi^B\cup\{\xi_k^b\cap \dickm r\},
571:  ( \xi^b\setminus\{\xi^b_k\})\cup\{\xi_j^b\cap\dickm \ell\}\right) && \text{
572:     with rate }\rho_{SR}X_{T-\beta}. & (8_{ii})
573: \end{aligned}
574: \end{equation}
575: These events refer to a change in background from the beneficial to
576: the wild-type background either for the $L$-locus, $(3_{ii})$, or the
577: $R$-locus, $(4_{ii})$. Splits in the beneficial and wild-type
578: background may happen as in the case of geometry (i); see events
579: $(5_{ii})$ and $(6_{ii})$. Back-recombinations to the beneficial
580: background are denoted by $(7_{ii})$ for the $L$- and $(8_{ii})$ for the
581: $R$-locus. Observe that a transition which takes both loci on one line
582: from the beneficial to the wild-type background cannot occur for
583: geometry (ii); cf. event $(3_i)$.
584: 
585: \begin{figure}
586: \begin{center}
587: \includegraphics[width=15.5cm]{ancrecgraph.ps}
588: \end{center}
589: \caption{\label{ancrecgraph}A structured ancestral recombination graph
590:   $\xi^{\mathcal X}$ conditioned on the frequency path $\mathcal X$ of
591:   the beneficial allele. Between times $\beta=0$ and $\beta=T$
592:   coalescences may occur at rates $(1)$ and $(2)$. Recombination
593:   events happen at rates $(3_i)-(8_i)$. The dashed lines indicate
594:   ancestry of the $L$-locus while the $R$-locus may be traced along
595:   dotted lines.}
596: \end{figure}
597: 
598: \begin{definition}\label{def:1}
599:   Assume $\dickm \ell$ and $\dickm r$ are sets of left and right
600:   neutral loci, respectively, and $\mathcal X=(X_t)_{0\leq t\leq T}$
601:   is a frequency path of the beneficial allele given by
602:   \eqref{eq:SDE}.
603: 
604:   Conditioned on $\mathcal X$, consider the jump process
605:   $\xi^{\mathcal X}=(\xi^{\mathcal X}_\beta)_{0\leq \beta \leq T}$,
606:   which starts in $\xi_0^{\mathcal X} = (\pi, \varnothing)$ for
607:   $\pi\in \mathcal P_{\dickm\ell \cup \dickm r}$ and makes transitions
608:   by coalescence events (1), (2), given by \eqref{eq:coal} and
609:   recombination events ($3_i$)-($8_i$) or ($3_{ii}$)-($8_{ii}$) from
610:   \eqref{eq:rec1} and \eqref{eq:rec2}, respectively. This process
611:   $\xi^{\mathcal X}$ is denoted the \emph{structured ancestral recombination graph for
612:     the $L$ and $R$ locus} conditioned on $\mathcal X$ for geometry
613:   (i) or (ii), respectively.
614: 
615:   The mixture of $\xi^{\mathcal X}_T$ over the distribution of
616:   frequency paths given by \eqref{eq:SDE} defines the random partition
617:   $\Gamma_\pi = (\Gamma^B_\pi, \Gamma^b_\pi)$, i.e.,
618:     $$\Gamma_\pi :=\int \xi^{\mathcal X}_T \mathbb{P}\left[d\mathcal X\right].$$
619:   \end{definition}
620: 
621: \section{Main result}
622: We study selective sweeps in the infinite population limit, i.e., the
623: frequency of the beneficial allele follows the SDE given by
624: \eqref{eq:SDE}. Moreover, selection is most efficient for large
625: selection coefficients. Our goal is to derive a simpler but approximate expression
626: for $\Gamma_\pi$ in the regime of large $\alpha$. It was shown in
627: \cite{EtheridgePfaffelhuberWakolbinger2006} that for the fixation time
628: $T$ of the beneficial allele
629: \begin{align}\label{eq:T}
630:   \mathbb E[T] = \frac{2\log\alpha}{\alpha} + \mathcal
631:   O\Big(\frac{1}{\alpha} \Big), \qquad \mathbb V[T] = \mathcal
632:   O\Big(\frac{1}{\alpha^2} \Big)
633: \end{align}
634: for large $\alpha$. This suggests that only under the scaling $\rho =
635: \mathcal O(\alpha/ \log\alpha)$ for the recombination rate a
636: non-trivial number of recombination events occurs during the sweep for large $\alpha$. This is true for all possible kinds of
637: recombination events during the sweep, so the recombination rates
638: $\rho_{SL}, \rho_{LR}$ and $\rho_{LS}, \rho_{SR}$ for geometries (i)
639: and (ii) should be of this order.  Henceforth, we assume
640: \begin{equation*}
641:   \begin{aligned}
642:     \text{Geometry (i):} &\qquad \rho_{SL} =
643:     \gamma_{SL}\frac{\alpha}{\log\alpha}, &\quad \rho_{LR} =
644:     \gamma_{LR}\frac{\alpha}{\log\alpha}, &\qquad 0<\gamma_{SL}, \gamma_{LR}<\infty\\
645:     \text{Geometry (ii):} &\qquad \rho_{LS} =
646:     \gamma_{LS}\frac{\alpha}{\log\alpha},&\quad \rho_{SR} =
647:     \gamma_{SR}\frac{\alpha}{\log\alpha},&\qquad 0<\gamma_{LS},
648:     \gamma_{SR}<\infty.
649:   \end{aligned}
650: \end{equation*}
651: 
652: Our approximation of $\Gamma_\pi$ is based on a Yule tree, which
653: serves as an approximation of the genealogy at the selected locus.  A
654: Yule tree is the realization of a Yule process, i.e., a pure birth
655: process which starts with one line and every line splits in two lines
656: after an exponential waiting time.
657: 
658: In our approximation the quantity
659: \begin{align}\label{eq:pjk}
660:   p_{i_1}^{i_2}(\gamma) := \exp\Big( -\frac{\gamma}{\log\alpha} \sum_{i=i_1+1}^{i_2}
661:   \frac{1}{i}\Big)
662: \end{align}
663: will play an important role. 
664: 
665: Assume $\dickm\ell$ and $\dickm r$ are sets of left and right loci and
666: $\pi\in\mathcal P_{\dickm \ell\cup\dickm r}$. Three mechanisms
667: determine the Yule approximation of the partition $\Gamma_\pi$. First,
668: we approximate splits in the beneficial background, i.e., events
669: $(5_i)$ and $(5_{ii})$, by the following procedure:
670: \begin{align}\label{eq:Y2}
671:   \text{\parbox{13cm}{For all partition elements
672:       $\pi_1,\ldots,\pi_{|\pi|}$ realize Bernoulli random variables
673:       $U_1,\ldots, U_{|\pi|}$ which are 1 with success probability
674:       $$ \text{geometry (i):} \quad 1-p_0^{\lfloor
675:         2\alpha\rfloor}(\gamma_{LR}))\qquad \text{geometry (ii):}\quad 
676:       1-p_0^{\lfloor 2\alpha\rfloor}(\gamma_{LS} + \gamma_{SR})).$$ If
677:       $U_i=1$, split the $i$th partition element in its left and right
678:       locus. Altogether, this defines a partition
679: $$ \pi' = \big\{ \{\pi_i \cap \dickm\ell\}, \{\pi_i\cap\dickm r\}: U_i=1\big\} \cup \big\{ \{\pi_i\}: U_i=0\big\}.$$
680: }}
681: \end{align}
682: Next, realize a Yule process with branching rate $\alpha$, i.e., each
683: line splits in two lines at rate $\alpha$. Stop this process when it
684: has $\lfloor {2}\alpha\rfloor$ lines. Call this tree $\mathcal Y$. To
685: obtain the genealogy of a sample of size $|\pi'|$ from this tree with
686: $\lfloor {2}\alpha\rfloor$ extant leaves, we use the following
687: construction:
688: \begin{align}\label{eq:Y1}
689:   \text{\parbox{13cm}{Start with $|\pi'|$ lines from the full Yule
690:       tree $\mathcal Y$ with $\lfloor 2\alpha \rfloor$ lines. When
691:       there are $k$ lines left at the time the full tree has $i$
692:       lines, the probability that a coalescence event occurs among the
693:       $k$ lines at the time the full tree goes from $i$ to $i-1$ lines
694:       is
695:       $$\frac{\binom{k}{2}}{\binom{i}{2}}.$$ By this construction we
696:       build a tree $\mathcal Y_{|\pi'|}$ with the partition elements
697:       of $\pi'$ as leaves and nodes which record the number of lines
698:       in the full Yule tree.  }}
699: \end{align}
700: 
701: \begin{remark}
702:   To construct the sample tree $\mathcal Y_{|\pi'|}$ from $\mathcal Y$
703:   is a task equivalent to describing an exchangeable sample from a
704:   tree which arises by exchangeable binary coalescence dynamics. This
705:   has been studied by \cite{Saundersetal1984} and was recalled in
706:   \cite[Lemma 4.8]{EtheridgePfaffelhuberWakolbinger2006}.  If $I_t=i$
707:   is the number of lines in the Yule tree $\mathcal Y$ at time $t$,
708:   denote by $K_i$ the number of lines in $\mathcal Y_{|\pi'|}$ while
709:   $I_t=i$. The process $(K_i)_{\lfloor 2\alpha \rfloor\geq i\geq 1}$
710:   is a time-inhomogeneous Markov chain with transition probabilities
711: $$ \mathbb P[K_{i-1}=k-1 | K_i=k] = \frac{\binom{k}{2}}{\binom{i}{2}},
712: \qquad \qquad i=2,\ldots, \lfloor 2\alpha\rfloor, k=2,\ldots, |\pi'|.
713: $$
714: Moreover, the sample tree can be described forward in time by noting
715: that
716: $$ \mathbb P[K_{i}=k|K_{i-1}=k-1 ] = \frac{|\pi'| - k+1}{|\pi'|+i-1}. $$
717: \qed
718: \end{remark}
719:  
720: The sample tree which is pruned out of the full tree in this way
721: represents the genealogy at the selected site. To describe the
722: genealogies at the partially linked neutral sites we mark the sample
723: Yule tree to determine further recombination events. A mark stands for
724: one (or two) recombination events that may occur. This works in the following way:
725: \begin{align}\label{eq:Y3}
726:   \text{\parbox{13cm}{Let a branch in the tree $\mathcal Y_{|\pi'|}$
727:       be given which starts when the full tree has $i_1$ lines and
728:       ends when the full genealogy has $i_2$ lines. For geometry (i),
729:       every branch can be hit by at most one of three different kinds
730:       of marks indicating recombination events.  These are $SL$-,
731:       $LR$-, and $SLR$-marks. Their probabilities are given in Table
732:       \ref{tab:marks}. For geometry (ii) the branch is hit
733:       independently by $LS$- and $SR$-marks with probabilities
734:       $(1-p_{i_1}^{i_2}(\gamma_{LS}))$ and $(1-p_{i_1}^{i_2}(\gamma_{SR}))$.\\[0.5ex]
735:       Here, $SL$-marks separate the $S$- from the $L$-locus on each
736:       branch of the tree, etc. For geometry (i), $SLR$-marks separate
737:       the $S$- from the $L$- and the $L$- from the $R$-locus.}}
738: \end{align}
739: %Observe the terms $p_0^k$ for the $SL$ and $SLR$ events for geometry
740: %(i).  
741: 
742: \begin{table}
743: \begin{center}
744: %\hspace{2cm} geometry (i) \hspace{5cm} geometry (ii)\hspace{2cm} 
745: \vspace{1ex}
746: 
747: \begin{tabular}{|c|c|}\hline
748:   \rule[-4mm]{0cm}{1cm}mark & probability \\\hline
749:   \rule[-4mm]{0cm}{1cm}$SL$ & $\big(1-p_{i_1}^{i_2}(\gamma_{SL})\big)p_0^{i_2}(\gamma_{LR})$ \\
750:   \rule[-4mm]{0cm}{1cm}$LR$ & $p_{i_1}^{i_2}(\gamma_{SL})\big(1-p_{i_1}^{i_2}(\gamma_{LR})\big)$ \\
751:   \rule[-4mm]{0cm}{1cm}$SLR$ & $\big(1-p_{i_1}^{i_2}(\gamma_{SL})\big)\big(1-p_0^{i_2}(\gamma_{LR})\big)$\\ 
752:   \rule[-4mm]{0cm}{1cm}no &  $p_{i_1}^{i_2}(\gamma_{SL})p_{i_1}^{i_2}(\gamma_{LR})$\\\hline
753: \end{tabular}
754: %\hspace{2ex}
755: %\begin{tabular}{|c|c|}\hline
756: %  \rule[-4mm]{0cm}{1cm}mark & probability \\\hline
757: %  \rule[-4mm]{0cm}{1cm}$LS$ & $\big(1-p_j^k(\gamma_{LS})\big)p_j^k(\gamma_{SR})$ \\
758: %  \rule[-4mm]{0cm}{1cm}$SR$ & $p_j^k(\gamma_{LS})\big(1-p_j^k(\gamma_{SR})\big)$ \\
759: %  \rule[-4mm]{0cm}{1cm}$LSR$& $\big(1-p_j^k(\gamma_{LS})\big)\big(1-p_j^k(\gamma_{SR})\big)$\\ 
760: %  \rule[-4mm]{0cm}{1cm}no   & $p_j^k(\gamma_{LS})p_j^k(\gamma_{SR})$\\\hline
761: %\end{tabular}
762: \end{center}
763: \caption{\label{tab:marks}For geometry (i), we mark every branch in
764:   the Yule tree by at most one from three different kinds of events.
765:   If a branch starts when the full Yule tree has $i_1$ and ends when
766:   it has $i_2$ lines, the probabilities for all marks are given in the
767:   table. }
768: \end{table}
769: 
770: \begin{example}
771:   The above construction is illustrated in Figure \ref{yuleTree}. We
772:   consider geometry (i) here. A set $\dickm\ell=\{1,2,3,4\}$ of
773:   $L$-loci and $\dickm r=\{5,6,7,8\}$ of $R$-loci is given. Starting with
774:   $\pi=\{\{1,5\}, \{2,6\},\{3,7\},\{4,8\}\}$, every partition element
775:   is split with probability $p_0^{\lfloor 2\alpha\rfloor}$ according
776:   to \eqref{eq:Y2}. This results in the finer partition $\pi'$. The
777:   partition elements of $\pi'$ are used to construct a sample tree
778:   from a full Yule tree which has $\lfloor 2\alpha\rfloor$ lines. The
779:   coalescence probabilities for the sample are given by \eqref{eq:Y1}.
780:   On the sample tree, branches are marked by $SL$-, $LR$-, or
781:   $SLR$-marks according to Table \ref{tab:marks}. The resulting
782:   partition $\pi''$ is constructed as given in Definition \ref{def:2}.
783:   % Poisson processes with rates $\rho_{SL}$ and $\rho_{LR}$ are used
784:   % to construct $SL$, $LR$ and $SLR$ marks.  The leftmost mark is a
785:   % $SLR$ mark because the line is first hit by the Poisson process
786:   % with rate $\rho_{SL}$ and the recombined line is hit by the
787:   % Poisson process with rate $\rho_{LR}$.
788: \end{example}
789: 
790: \begin{figure}
791: \begin{center}
792: \includegraphics[width=15.5cm]{yuleFigure.ps}
793: \end{center}
794: \caption{\label{yuleTree}The Yule process approximation for two linked
795:   neutral loci under a selective sweep. Here, we consider geometry
796:   (i). The $L$-locus may be traced back along dashed lines while
797:   dotted lines indicate ancestry of the $R$-locus. See text for
798:   explanation. }
799: \end{figure}
800: 
801: We are now in a position to define our approximation based on the Yule
802: process.
803: 
804: \begin{definition}\label{def:2}
805:   Assume $\dickm\ell$ and $\dickm r$ are sets of left and right
806:   neutral loci, respectively, and $\pi\in\mathcal P_{\dickm
807:     \ell\cup\dickm r}$. By \eqref{eq:Y2} construct the partition
808:   $\pi'$ and by \eqref{eq:Y1} and \eqref{eq:Y3} a Yule tree $\mathcal
809:   Y_{|\pi'|}$ with marks. For geometry (i) define the equivalence
810:   relation:
811:   \begin{equation}\label{eq:equivGeoi} 
812:     j\sim k :\iff \begin{cases} \text{no $SL$-, $SLR$-mark on } \Yup & \text{ if }j,k\in\dickm \ell,\\
813:       \text{no $SL$-, $LR$-, $SLR$-mark on } \Yup, & \text{ if }j,k\in\dickm r\\[2ex]
814:       \text{no $SL$-mark on } \Yup, \\[1ex]
815:       \text{no $LR$-mark on } \Yri, & \text{ if }j\in\dickm \ell,k\in\dickm r\\
816:       \text{no $SLR$-mark on } \Yall
817:   \end{cases}
818:   \end{equation}
819:   where the bold lines indicate for which part of the tree $\mathcal
820:   Y_{|\pi'|}$ relating two lines with the root of the tree, the
821:   constraint on marks applies. For geometry (ii) set
822:   \begin{equation}\label{eq:equivGeoii} 
823:     j\sim k :\iff \begin{cases} \text{no $LS$-mark on } \Yup, & \text{ if }j,k\in\dickm \ell,\\
824:       \text{no $SR$-mark on } \Yup, & \text{ if }j,k\in\dickm r,\\[2ex]
825:       \text{\parbox{5.2cm}{no $LS$-mark on \Yle, \\[1ex]
826:         no $SR$-mark on \Yri}} & \text{ if
827:       }j\in\dickm \ell, k\in\dickm r
828:     \end{cases} 
829:   \end{equation}
830:   (The equations \eqref{eq:equivGeoi} and \eqref{eq:equivGeoii} indeed
831:   define equivalence relations, as can easily be checked.)  Each of
832:   these equivalence relations on $\dickm\ell\cup\dickm r$ defines a
833:   partition $\pi''$.  For geometry (i) there is a unique partition
834:   element
835: \begin{equation}\label{eq:uni} 
836: \begin{aligned}
837:   \pi''_f = \Big\{j\in\dickm \ell: & \text{ no $SL$-, $SLR$-mark on
838:     \one{$\pi'_{(j)}$}}\Big\} \\ & \cup \Big\{k\in\dickm r: \text{ no $SL$-, $LR$-,
839:     $SLR$-mark on \one{$\pi'_{(k)}$}}\Big\}
840: \end{aligned}
841: \end{equation}
842: and for geometry (ii) a unique partition element
843: \begin{equation}
844: \begin{aligned}\label{eq:unii} 
845:   \pi''_f = \Big\{j\in\dickm \ell: & \text{ no $LS$-mark on
846:     \one{$\pi'_{(j)}$}}\Big\} \cup \Big\{k\in\dickm r: \text{ no $SR$-mark on
847:     \one{$\pi'_{(k)}$}}\Big\}.
848: \end{aligned}
849: \end{equation}
850: Then the random partition
851: $$ \Upsilon_\pi:= ( \{\pi''_f\}, \pi''\setminus \{\pi_f''\})$$
852: is called the \emph{Yule approximation of } $\Gamma_\pi$.
853: \end{definition}
854: 
855: \begin{example}
856:   For the example in Figure \ref{yuleTree} the $SL$-, $LR$- and
857:   $SLR$-marks on the sample tree lead to the realization
858: $$ \Upsilon_\pi = (\{\{3,4\}\}, \{\{1,2\}, \{5,6\},\{7\},\{8\}\}).$$
859: \end{example}
860: 
861: \begin{theorem}\label{T}
862:   Let $\pi\in\mathcal P_{\dickm \ell\cup\dickm r}$ and $\Gamma_\pi$
863:   and $\Upsilon_\pi$ be as in Definitions \ref{def:1} and \ref{def:2}.
864:   Then,
865:   $$ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}} 
866:   \big|\mathbb P[\Gamma_\pi = \xi] - \mathbb P[\Upsilon_\pi=\xi] \big| =
867:   \mathcal O\Big( \frac{1}{(\log\alpha)^2}\Big). $$
868: \end{theorem}
869: 
870: \noindent
871: %A few remarks are in order.
872: 
873: \begin{remark}
874: \begin{enumerate}
875: \item The Theorem states that, for large $\alpha$, the random
876:   partitions $\Gamma_\pi$ and $\Upsilon_\pi$ are close in variation
877:   distance. Here, variation distance refers to the maximal difference
878:   in the probabilities to obtain any partition $\xi\in\mathcal
879:   P'_{\dickm\ell \cup\dickm r}$. The order of accuracy, given by the
880:   Landau symbol, still depends on several parameters. These are the
881:   cardinalities $\ell$ and $r$ and recombination constants
882:   $\gamma_{SL}, \gamma_{LR}$ for geometry (i) and $\gamma_{LS}$ and
883:   $\gamma_{SR}$ for geometry (ii). The proof of Theorem \ref{T} will
884:   be given in Section \ref{proof}.
885: \item At first sight, comparing the Definitions \ref{def:2} and
886:   \ref{def:1} the Yule approximation does not look any simpler than
887:   the exact model. However, the Yule approximation has advantages both
888:   analytically and computationally. The random partition $\Gamma_\pi$
889:   relies on constructing a frequency path $\mathcal X$, while the Yule
890:   approximation $\Gamma_\pi$ constructs the ancestral recombination
891:   graph for the sample directly. Analytically, as we will see in
892:   Section \ref{app}, this means that explicit calculations are
893:   possible. Computationally, i.e., for simulations of the ancestral
894:   recombination graph, the direct construction of the ancestry of the
895:   sample allows for fast algorithms; see
896:   \cite{PfaffelhuberHauboldWakolbinger2006} for the case of a single
897:   neutral locus.
898: \item The current paper is a generalisation of results found in
899:   \cite{EtheridgePfaffelhuberWakolbinger2006} for a two-locus system
900:   with only one neutral locus. More precisely, consider the projection of
901:   $\Gamma_\pi$ on only one locus, i.e., on either $\dickm \ell$ or
902:   $\dickm r$. In Propositions 4.2 and 4.7 of that paper it was shown
903:   that the projection of $\Upsilon_\pi$ on $\dickm \ell$ or $\dickm r$
904:   is an approximation to a structured coalescent with an error in
905:   probability of the order $\mathcal O\big( (\log\alpha)^{-2}\big)$.
906: \item In \cite{EtheridgePfaffelhuberWakolbinger2006} an approximate
907:   sampling formula was given in the two-locus case. A similar approach
908:   would be possible here. However, we refrain from its derivation
909:   because it was shown in \cite{PfaffelhuberHauboldWakolbinger2006}
910:   that the sampling formula in the two-locus case only produces
911:   numerically sound results for $n\leq 5$.
912: \item As indicated numerically in
913:   \cite{PfaffelhuberHauboldWakolbinger2006}, the Yule approximation
914:   can be improved. To understand how this works, we need to collect
915:   the errors which contribute to the error of order $\mathcal
916:   O(1/(\log\alpha)^2)$. First, the Yule approximation ignores events
917:   $(2), (6_{ii}), (7)$ and $(8)$. Second, as will be clear in the
918:   proof of Proposition \ref{PropSecond}, the coalescent rate in the
919:   beneficial background is decreased from $1/X dt$ to $(1-X)/X dt$ by
920:   the Yule process. It is the latter error that dominates, at least in
921:   large samples, because the total coalescence rate increases
922:   quadratically with the number of lines. However, increasing the
923:   coalescence probability in \eqref{eq:Y1} to
924: \begin{align*}\label{eq:yuleCoal}
925:   1\wedge
926:   \frac{\binom{k}{2}}{\binom{i}{2}}\frac{1}{1-\tfrac{i-1}{2\alpha}}
927: \end{align*}
928: at the time the Yule tree has $i$ lines corrects for this error.
929: \item For simulations of genealogies it is most important that the
930:   Yule approximation given above is not restricted to the case of two
931:   neutral loci. The take-home-message from the construction of the
932:   Yule approximation is that splits in the beneficial background are
933:   generated first and afterwards marks on a Yule tree determine all
934:   recombination events. Both, splits in the beneficial background and
935:   recombination events along the Yule tree can be given along a
936:   continuous chromosome.
937:   %The implementation of the corresponding
938:   %mechanisms would result in software which is comparable to more
939:   %accurate than existing programs (see e.g. the program {\tt ssweep}
940:   %mentioned in \cite{KimStephan2002}).
941: \end{enumerate}
942: \end{remark}
943: \qed
944: 
945: \section{Application: {\bf\emph{D}}}
946: \label{app}
947: Lewontin's $D$ is a measure of linkage disequilibrium (non-random
948: association of alleles) and is frequently used as a simple statistic
949: in a multi-locus setting (\cite{Lewontin1964}; see also
950: \cite[(2.89)]{Ewens2004}). Given two loci $L$ and $R$ with alleles 0
951: or 1 at each locus, it is defined as
952: \begin{equation} \label{eq:D}
953: D = p_{LR} - p_L p_R
954: \end{equation}
955: where $p_{LR}$ is the frequency of individuals carrying allele 1 at
956: both loci, $p_L$ is the frequency of 1's at the $L$ locus and $p_R$ is
957: the frequency of 1's at the $R$ locus..
958: 
959: To predict patterns of $D$ between pairs of neutral loci at the time
960: $T$ of fixation of a beneficial allele we next approximate $\mathbb
961: E[D(T)]$ using Theorem \ref{T}. It is crucial to observe that $\mathbb
962: E[p_{LR}(T)]$ as well as $\mathbb E[p_{L}(T) p_R(T)]$ may be derived
963: by the distribution of genealogies of linked neutral loci under
964: selection and the expected allele frequencies at the beginning of the
965: sweep. To see this, note that $\mathbb E[p_{LR}(T)]$ equals the
966: probability that the ancestors of the $L$- and $R$-locus of one
967: randomly picked individual from the population at time $T$ carry
968: alleles 1 at both neutral loci. Analogously, $\mathbb E[p_{L}(T)
969: p_R(T)]$ is the probability that the ancestors of the $L$- and $R$-
970: loci of two different individuals at time $T$ both carry allele 1.
971: Denote by $q$ the probability that both loci, $L$ and $R$ from one
972: individual, picked at time $T$, have a common ancestor at the
973: beginning of the sweep. Analogously, $q'$ is the same probability for
974: the $L$- and $R$-loci from two different individuals. Using these
975: definitions we see that
976: \begin{equation}\label{eq:pLRpLpR}
977: \begin{aligned}
978:   \mathbb E\left[p_{LR}(T)\right] & = q \cdot \mathbb E\left[p_{LR}(0)] + (1-q)\cdot
979:   \mathbb E[p_{L}(0)p_R(0)\right], \\ \mathbb E\left[p_{L}(T)p_R(T)\right] & = q'\cdot
980:   \mathbb E\left[p_{LR}(0)\right] + (1-q')\cdot \mathbb E\left[p_{L}(0)p_R(0)\right].
981: \end{aligned}
982: \end{equation}
983: Combining \eqref{eq:pLRpLpR} with the definition of $D$ from
984: \eqref{eq:D},
985: \begin{equation}\label{eq:D1}
986: \mathbb E[D(T)] = (q - q') \mathbb E[D(0)].
987: \end{equation}
988: Both, $q$ and $q'$ may be approximated by Theorem \ref{T}. Formally,
989: setting $\dickm{\ell}=\{1\}, \dickm{r}=\{2\}$,
990: \begin{equation}
991: \begin{aligned}
992:   q & = \mathbb P\left[\Gamma^B_{\{1,2\}} \cup \Gamma^b_{\{1,2\}}= \{\{1,2\}\}\right],\\
993:   q'& = \mathbb P\left[\Gamma^B_{\{1\},\{2\}} \cup \Gamma^b_{\{1\},\{2\}} =
994:   \{\{1,2\}\}\right].
995: %  q & := \mathbb P[\xi^B(0) \cup \xi^b(0) = \{\{1,2\}\}| \xi^B(T) =
996: %  \{\{1,2\}\}],\\  
997: %  q' & := \mathbb P[\xi^B(0) \cup \xi^b(0) =
998: %  \{\{1,2\}\}| \xi^B(T) = \{\{1\},\{2\}\}],
999: \end{aligned}
1000: \end{equation}
1001: As $\Gamma_\pi$ may be approximated by $\Upsilon_\pi$ this brings us
1002: in a position to predict patterns of $D$ at the end of a selective
1003: sweep.
1004: 
1005: \begin{theorem}\label{T2}
1006: For geometry (i), 
1007: \begin{equation}\label{eq:P:D:1}
1008: \begin{aligned}
1009:   \mathbb E[D(T)] & = p_0^{ 2\alpha }(2\gamma_{LR}) \Big(1 -
1010:   \sum_{k=2}^{ 2\alpha} \frac{2}{k(k+1)}
1011:   p_k^{2\alpha}(2\gamma_{SL})\Big)\mathbb E[D(0)] + \mathcal O\Big(
1012:   \frac{1}{(\log\alpha)^2}\Big),
1013: \end{aligned}
1014: \end{equation}
1015: and for geometry (ii),
1016: \begin{equation}\label{eq:P:D:2}
1017:   \mathbb E[D(T)] = \mathbb E[D(0)]\cdot\mathcal O\Big(
1018:   \frac{1}{(\log\alpha)^2}\Big).
1019: \end{equation}
1020: \end{theorem}
1021: 
1022: \begin{figure}
1023: \begin{center}
1024: \includegraphics[width=10cm]{sim.ps}
1025: \end{center}
1026: \caption{\label{sim}The effect of Lewontin's $D$ under a selective
1027:   sweep may be simulated in a Wright-Fisher model. In this process,
1028:   the frequency path of the beneficial allele is stochastic and the
1029:   ancestral recombination graph may be built conditioned on this
1030:   frequency path. The locations of the $L$ and $R$ locus are fixed.
1031:   The position of the selected site varies along the $x$-axis.  If we
1032:   compare the result from \eqref{eq:P:D:1} to equation (47) of
1033:   \cite{StephanSongLangley2006} we see that the Yule process
1034:   approximation is more accurate. The parameters of the Wright-Fisher
1035:   model are $N=10^5, \alpha=1000, \rho_{LR}=20$ and $D(0) = 0.0242$.}
1036: \end{figure}
1037: 
1038: \begin{remark}
1039: \begin{enumerate}
1040: \item Patterns of Lewontin's $D$ can be studied by deterministic
1041:   forward calculations instead of our genealogical approach. This was
1042:   carried out in \cite{StephanSongLangley2006} under the assumption
1043:   that strong selection leads to a deterministic behaviour of allele
1044:   frequencies. Specifically, the frequency of the beneficial allele
1045:   follows the logistic differential equation
1046:   $$ dX = \alpha X(1-X)dt,\qquad\qquad X_0 = \tfrac{1}{N} $$
1047:   instead of the stochastic path given by \eqref{eq:SDE}. Predictions
1048:   of $D$ at all times during the selective sweep were given. In
1049:   particular, their equation (47) approximates values of $D$ at the
1050:   end of the sweep for geometry (i).
1051:   %They found
1052:   %\begin{equation}
1053:   %  \begin{aligned}
1054:   %    D(T) = e^{-\rho_{LR} T }\Big( 1 - \Big(
1055:   %    \frac{p_S(0)}{1-p_S(0)}\Big)^{2\rho_{RS}/\alpha} \Big(
1056:   %    \frac{1-2p_S(0)}{1-p_S(0)}\Big)^{2}\Big) D(0)
1057:   %  \end{aligned}
1058:   %\end{equation}
1059:   %where $p_S(0)$ is the frequency of the beneficial allele at the
1060:   %beginning of the sweep and $$T = \frac{2}{\alpha}\log\Big(
1061:   %\frac{1-p_S(0)}{p_S(0)}\Big) $$ is the (deterministic) duration of
1062:   %the sweep. (Their results differ by a factor of 2 because they take
1063:   %$s$ to be the selective advantage of a diploid individual which is
1064:   %homozygous for the beneficial allele while $s$ in our analysis is
1065:   %the advantage of a heterozygote.)
1066: 
1067:   % Almost the same result was found by
1068:   % \cite{LehnertStephanPfaffelhuber2006} who used a genealogical
1069:   % approach and approximated the genealogy at the selected site as a
1070:   % star-like phylogeny rather than a Yule process; see their equation
1071:   % (8).
1072:   In real populations, random effects due to genetic drift are not
1073:   negligible.  This has been pointed out by
1074:   \cite{LehnertStephanPfaffelhuber2006}.
1075: % and may be seen from the
1076: %  divergence of the deterministic analysis, carried out by
1077: %  \cite{StephanSongLangley2006}, from the simulation of a
1078: %  Wright-Fisher model in Figure \ref{sim}. 
1079:   The Yule process approximation captures most random effects. Indeed,
1080:   comparison with simulations from
1081:   \cite{LehnertStephanPfaffelhuber2006} shows that the results
1082:   produced by the Yule process approximation are more accurate than
1083:   those of \cite{StephanSongLangley2006}.
1084:  
1085:  % These effects are not captured by the analysis in
1086:  % \cite{StephanSongLangley2006} but in the Yule process approximation
1087:  % of Theorem \ref{T2}.  Indeed, Figure \ref{sim} shows that the Yule
1088:  % process approximation produces more accurate results than the
1089:   %results of \cite{StephanSongLangley2006}.
1090: 
1091: \item For empirical studies it is most interesting to know which
1092:   patterns of linkage disequilibrium  to look for in real data. The
1093:   pattern genetic hitchhiking can produce was discussed in
1094:   \cite{StephanSongLangley2006} and \cite{ReedTishkoff2006}.
1095:   Surprisingly, hitchhiking reduces levels of linkage disequilibrium
1096:   compared to the neutral expectation. This is evident from Figure
1097:   \ref{sim}. If the selected locus is far from both neutral loci,
1098:   linkage disequilibrium between the neutral loci is not affected by
1099:   hitchhiking. Therefore, values of $D$ for large $\rho_{SL}$ converge
1100:   to the expectation of $D$ under neutrality.  This effect was taken
1101:   up by \cite{ReedTishkoff2006} to argue that genetic hitchhiking
1102:   produces patterns in the association of alleles similar to
1103:   recombination hotspots, which are e.g. important in genetic
1104:   association studies in humans (\cite{hapmap2005}). However, genetic
1105:   hitchhiking certainly produces patterns different from recombination
1106:   hotspots in general, e.g., a low neutral diversity or a
1107:   distinctive site frequency spectrum (\cite{FayWu2000}).
1108: \item An accurate approximation of $\mathbb E[D(T)]$ does not suffice
1109:   to predict patterns of linkage disequilibrium in general. In
1110:   addition to genetic drift, random effects which affect $D(T)$ were
1111:   found in \cite{StephanSongLangley2006} to be the allelic type of the
1112:   founder of the sweep and its frequency. The resulting variance in
1113:   $D$ can be considerably higher than under neutrality.
1114:   % Additionally, as simulations in
1115:   % \cite{LehnertStephanPfaffelhuber2006} show, the measure $r^2$ for
1116:   % linkage disequilibrium does not vanish near the selected locus. As
1117:   %   $$ \mathbb E[r^2] = \mathbb E\Big[ \frac{D^2}{p_L (1-p_L) p_R(1-p_R)}\Big] \approx
1118:   %   \frac{\mathbb{V}[D]}{\mathbb E[p_L (1-p_L) p_R(1-p_R)]}$$ (see
1119:   %   \cite{McVean2002}) this indicates that $\mathbb{V}[D(T)]$ is not
1120:   %   negligible.
1121: \end{enumerate}
1122: \end{remark}
1123: 
1124: \noindent
1125: Now we come to the proof of Theorem \ref{T2}.
1126: 
1127: \begin{proof} The key in the proof is to compute the probabilities $q$
1128:   and $q'$. This is achieved by the Yule process approximation
1129:   $\Upsilon_\pi$ of Theorem \ref{T}.
1130: 
1131:    We start with geometry (ii). Here, we can see from the Yule
1132:    approximation \eqref{eq:equivGeoii} that $q = q'$ up to a term of
1133:    order $1/(\log\alpha)^2$ since one $L$ and one $R$ locus are
1134:    identical by descent iff there is no $LS$ mark on
1135:    \text{\parbox{1.2cm}{
1136: \beginpicture
1137: \setcoordinatesystem units <0.1cm, 0.1cm>
1138: \setplotarea x from 3 to 17, y from 3 to 17
1139: \plot 10 6 10 10 7 13 10 10 13 13 /
1140: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
1141: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
1142: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
1143: \put{$\bullet$}[Cc]  at 10 6
1144: \put{\footnotesize$1$} [cC] at 5 14
1145: \put{\footnotesize$2$} [cC] at 15 14
1146: \endpicture}} 
1147: and no $SR$ mark on 
1148:    \text{\parbox{1.2cm}{
1149: \beginpicture
1150: \setcoordinatesystem units <0.1cm, 0.1cm>
1151: \setplotarea x from 3 to 17, y from 3 to 17
1152: \plot 10 6 10 10 7 13 10 10 13 13 /
1153: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
1154: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
1155: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
1156: \put{$\bullet$}[Cc]  at 10 6
1157: \put{\footnotesize$1$} [cC] at 5 14
1158: \put{\footnotesize$2$} [cC] at 15 14
1159: \endpicture}}. It
1160:    does not depend on the linkage of the $L$ and the $R$ locus at the
1161:    end of the sweep.  Consequently, \eqref{eq:P:D:2} follows.
1162: 
1163:    For geometry (i), we start with the approximation of $q'$. For one
1164:    $L$ and one $R$ locus from two different individuals there is a
1165:    random number $K$ of lines in the full tree of the Yule
1166:    approximation at the time the selected loci which are linked to the
1167:    neutral ones coalesce.  To obtain the distribution of $K$, we
1168:    compute
1169: $$ \mathbb P[K=k] = \prod_{l=k+1}^{ 2\alpha} \left( 1 -
1170: \frac{1}{\binom{l}{2}}\right) \frac{1}{\binom{k}{2}} = \left(
1171: \prod_{l=k+1}^{ 2\alpha} \frac{(l+1)(l-2)}{l(l-1)}\right)
1172: \frac{2}{k(k-1)} = \frac{2}{k(k+1)} + \mathcal O\left(
1173: \frac{1}{\alpha}\right),$$ which is a special case of
1174: \cite{EtheridgePfaffelhuberWakolbinger2006}, (4.16). We read from
1175: \eqref{eq:equivGeoi} that the $L$ and $R$ locus are identical by
1176: descent at the beginning of the sweep if and only if (a) no mark or an
1177: $SL$ mark falls on 
1178:    \text{\parbox{1.2cm}{
1179: \beginpicture
1180: \setcoordinatesystem units <0.1cm, 0.1cm>
1181: \setplotarea x from 3 to 17, y from 3 to 17
1182: \plot 10 6 10 10 7 13 10 10 13 13 /
1183: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
1184: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
1185: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
1186: \put{$\bullet$}[Cc]  at 10 6
1187: \put{\footnotesize$1$} [cC] at 5 14
1188: \put{\footnotesize$2$} [cC] at 15 14
1189: \endpicture}}, 
1190: (b) no mark hits
1191:    \text{\parbox{1.2cm}{
1192: \beginpicture
1193: \setcoordinatesystem units <0.1cm, 0.1cm>
1194: \setplotarea x from 3 to 17, y from 3 to 17
1195: \plot 10 6 10 10 7 13 10 10 13 13 /
1196: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
1197: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
1198: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
1199: \put{$\bullet$}[Cc]  at 10 6
1200: \put{\footnotesize$1$} [cC] at 5 14
1201: \put{\footnotesize$2$} [cC] at 15 14
1202: \endpicture}} 
1203: and (c) no mark or an $LR$ mark falls on
1204:    \text{\parbox{1.2cm}{
1205: \beginpicture
1206: \setcoordinatesystem units <0.1cm, 0.1cm>
1207: \setplotarea x from 3 to 17, y from 3 to 17
1208: \plot 10 6 10 10 7 13 10 10 13 13 /
1209: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /
1210: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /
1211: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /
1212: \put{$\bullet$}[Cc]  at 10 6
1213: \put{\footnotesize$1$} [cC] at 5 14
1214: \put{\footnotesize$2$} [cC] at 15 14
1215: \endpicture}}. Hence we compute
1216: \begin{equation}
1217: \begin{aligned}
1218:   q' & = \sum_{k=2}^{ 2\alpha} \frac{2}{k(k+1)}
1219:   p_0^k(\gamma_{LR}) p_k^{ 2\alpha }(\gamma_{SL})p_k^{ 2\alpha
1220:   }(\gamma_{LR})p_k^{ 2\alpha }(\gamma_{SL}) + \mathcal O\left(
1221:   \frac{1}{(\log\alpha)^2}\right)\\ & = p_0^{ 2\alpha }(\gamma_{LR})
1222:   \sum_{k=2}^{ 2\alpha} \frac{2}{k(k+1)} p_k^{2\alpha}(2\gamma_{SL}) +
1223:   \mathcal O\left( \frac{1}{(\log\alpha)^2}\right).
1224: \end{aligned}
1225: \end{equation}
1226: For $q$ we have to distinguish the cases where the $L$- and the $R$-loci
1227:  split or not. If they do not split, the $L$- and $R$-locus have
1228: the same ancestor at the beginning of the sweep if and only if there
1229: is neither an $LR$- nor an $SLR$-mark on \one{$\{1,2\}$}. If they split,
1230: the probability of a common ancestor is $q'$. Therefore,
1231: \begin{equation}
1232: \begin{aligned}
1233: q & = p_0^{ 2\alpha }(\gamma_{LR}) p_0^{ 2\alpha }(\gamma_{LR}) +
1234: \big(1-p_0^{ 2\alpha }(\gamma_{LR})\big) q'+
1235:   \mathcal O\left( \frac{1}{(\log\alpha)^2}\right).
1236: \end{aligned}
1237: \end{equation}
1238: Hence 
1239: \begin{equation}
1240: \begin{aligned}
1241:   \mathbb E[D(T)] & = p_0^{ 2\alpha }(\gamma_{LR}) \big(p_0^{2\alpha
1242:   }(\gamma_{LR}) - q'\big) \mathbb E[D(0)]+ \mathcal O\left(
1243:   \frac{1}{(\log\alpha)^2}\right)
1244: \end{aligned}
1245: \end{equation}
1246: and the result follows. 
1247: \end{proof}
1248: 
1249: \section{Proof of Theorem  \ref{T}}
1250: \label{proof}
1251: The proof deals with geometries (i) and (ii) simultaneously.  We will
1252: write events at rates (3)-(8) whenever we refer to the rates
1253: ($3_i$)-($8_i$) for geometry (i) and ($3_{ii}$)-($8_{ii}$) for
1254: geometry (ii), respectively.
1255: 
1256: We will be dealing with several random partitions all of which agree
1257: up to an error of order $\mathcal O\big( (\log(\alpha))^{-2}\big)$.
1258: Exactly, we will prove
1259: $$\Gamma_\pi \quad \stackrel{\text{Prop. \ref{PropFirst}}}{\approx}\quad 
1260: \Delta_\pi \quad \stackrel{\text{Prop.
1261:     \ref{PropSecond}}}{\approx}\quad \Xi_\pi \quad
1262: \stackrel{\text{Prop. \ref{PropThird}}}{\approx}\quad \Upsilon_\pi$$ where
1263: $\Gamma_\pi, \Delta_\pi, \Xi_\pi$ and $\Upsilon_\pi$ are given in
1264: Definitions \ref{def:1}, \ref{def:3}, \ref{def:4} and \ref{def:2},
1265: respectively and '$\approx$' means that the random partitions differ
1266: by $\mathcal O\left( (\log\alpha)^{-2}\right)$ in variation distance.
1267: 
1268: While $\Gamma_\pi$ is the random partition which is defined by the
1269: structured ancestral recombination graph, the other random partitions
1270: are approximations. First, $\Delta_\pi$ arises by (i) ignoring events
1271: which occur according to rates $(2), (6_{ii}), (7)$ and $(8)$ and (ii)
1272: realizing all events according to rate $(5)$ first and only
1273: afterwards, construct the process using rates $(1), (3), (4)$ and
1274: $(6_i)$. Second, $\Xi_\pi$ already deals with the Yule process. It is
1275: derived by marking an infinite Yule tree by two constant rate Poisson
1276: processes with rates $\rho_{SL}, \rho_{LR}$ for geometry (i) and
1277: $\rho_{LS}, \rho_{SR}$ for geometry (ii). Finally, the Yule
1278: approximation $\Upsilon_\pi$ of $\Gamma_\pi$ arises by considering
1279: only the number of lines in an infinite Yule tree at times of
1280: coalescence in a sample.
1281: 
1282: In the whole proof we rely on a probability measure $\mathbb P$ on a
1283: probability space on which the solution of \eqref{eq:SDE} as well as
1284: arbitrarily many independent Poisson processes and other random
1285: variables are realized.
1286: 
1287: \begin{definition}
1288: \label{def:3}
1289: Define a $\mathcal P'_{\dickm\ell\,\cup\,\dickm r}$-valued random
1290: variable $\Delta_\pi$ as follows: starting in $\pi\in\mathcal
1291: P_{\dickm \ell\cup\dickm r}$ split all partition elements $\xi\in\pi$
1292: independently into $\xi \cap \dickm \ell, \xi \cap \dickm r$ with
1293: probability
1294: \begin{equation} \label{Split} 1-\mathbb E\left[\exp\left( - \rho\cdot
1295:     \mathbb \int_0^T X_sds\right)\right]
1296: \end{equation}
1297: where $\rho= \rho_{LR}$ for geometry (i) and $\rho= \rho_{LS}+
1298: \rho_{SR}$ for geometry (ii).  The resulting partition $\pi'$ is used
1299: for the starting point $(\pi',\varnothing)$ of a process
1300: $\eta^{\mathcal X} = (\eta^{\mathcal X}_\beta)_{0\leq\beta\leq T}$,
1301: conditioned on a frequency path $\mathcal X = (X_t)_{0\leq t\leq T}$
1302: with transitions according to events ($1$),($3_i$), ($4_i$), ($6_i$),
1303: given by \eqref{eq:rec1}, for geometry (i) and to events ($1$),
1304: ($3_{ii}$) and ($4_{ii}$), given by \eqref{eq:rec2}, for geometry
1305: (ii), respectively. Given $\eta^{\mathcal X}$, define $$\Delta_\pi :=
1306: \int \eta^{\mathcal X}_T \mathbb P[d\mathcal X].$$
1307: \end{definition}
1308: 
1309: \begin{proposition}\label{PropFirst}
1310:   Let $\pi\in\mathcal P_{\dickm \ell\cup\dickm r}$ and $\Gamma_\pi$
1311:   and $\Delta_\pi$ be as in Definitions \ref{def:1} and \ref{def:3}.
1312:   Then,
1313:   $$ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}}
1314:   \big|\mathbb P[\Gamma_\pi = \xi] - \mathbb P[\Delta_\pi=\xi] \big|
1315:   = \mathcal O\left( \frac{1}{(\log\alpha)^2}\right). $$
1316: \end{proposition}
1317: 
1318: \begin{proof}
1319:   We proceed in several steps. Our arguments in Step 1 show that we
1320:   may discard events which occur at rates (2), ($6_{ii}$), (7) and
1321:   (8). In Step 2 we use a fixed number of Poisson processes to
1322:   generate the random partition we want to approximate.  Our goal is
1323:   to separate events ($5$) from the rest by verifying a certain order
1324:   of the possible events and establishing an approximate independence
1325:   of the events (5). Particularly, we show in Step 3 that splits in
1326:   the beneficial background (i.e., events (5)) take place before all
1327:   other events with high probability. The approximate independence
1328:   will be proved in Steps 5 and 6 by an application of a general
1329:   result on mixed Poisson processes we establish in Step 4.
1330: 
1331:   \begin{step}
1332:     (Small probability of events (2), ($6_{ii}$), (7) and (8))\\
1333:     First, note that by Proposition 3.4 of
1334:     \cite{EtheridgePfaffelhuberWakolbinger2006} events ($2$), i.e.,
1335:     coalescences in the wild-type background, have a probability of
1336:     order $\mathcal O\big((\log\alpha)^{-2}\big)$.  Furthermore,
1337:     events ($7$) and ($8$) are back-recombinations into the beneficial
1338:     background and hence have a probability of order $\mathcal
1339:     O\big((\log\alpha)^{-2}\big)$ as well.  Additionally, for geometry
1340:     (ii), events ($6_{ii}$), i.e., splits in the wild-type background,
1341:     can only occur if a coalescence event (2) has happened before.
1342:     % by Proposition 3.4 of
1343:     % \cite{EtheridgePfaffelhuberWakolbinger2006}. 
1344:     As a consequence, we can discard events which occur at rates (2),
1345:     ($6_{ii}$), (7) and (8) producing only an error in variation
1346:     distance of at most $\mathcal O\big((\log\alpha)^{-2}\big)$.
1347: 
1348:     So we are left with a $\mathcal P'_{\dickm\ell\cup\dickm
1349:       r}$-valued stochastic process conditioned on $\mathcal X$,
1350:     $\zeta^{\mathcal X} = (\zeta^{\mathcal X}_\beta)_{0\leq \beta\leq
1351:       T}$, which arises by events $(1)$, $(3)$,$(4)$,$(5)$ and
1352:     $(6_i)$, started in $\zeta_0^{\mathcal X} = (\pi,\varnothing)$.
1353:   \end{step}
1354: 
1355:   \begin{step} (Construction of $\zeta^{\mathcal X}$ by Poisson processes)\\
1356:     Recall that $\ell:=|\dickm {\ell}|$ and $r:=|\dickm{r}|$ are the
1357:     number of $L$ and $R$ loci under consideration. Take Poisson
1358:     processes which are all conditionally independent given the random
1359:     frequency path $\mathcal X$ of the beneficial allele.  For
1360:     coalescence, take a Poisson process $\mathcal T_{\mathfrak 1}$ with
1361:   \begin{equation}
1362:     \begin{aligned}
1363:       \text{ rate }\binom{\ell + r}{2} \frac{1}{X_{T-\beta}} && 
1364:       && \qquad
1365:       (\text{coalescence in the beneficial background}) && \qquad \mathfrak{(1)},
1366:     \end{aligned}
1367:   \end{equation}
1368:   at time $\beta$; for recombination events take Poisson processes
1369:   $\mathcal T_{\mathfrak {3_i}}$, $\mathcal T_{\mathfrak {4_i}}$,
1370:   $\mathcal T_{\mathfrak {5_i}}$ with
1371:   \begin{equation}
1372:     \begin{aligned}
1373:       &\text{ rate }\ell\rho_{SL}(1-X_{T-\beta})& &\qquad(\text{rec. to the wild-type background})&& \qquad \mathfrak{(3_i)},\\
1374:       & \text{ rate }  r \rho_{LR}(1-X_{T-\beta})& &\qquad(\text{rec. to or split in the wild-type background})&&\qquad \mathfrak{(4_i)},\\
1375:       &\text{ rate } r \rho_{LR}X_{T-\beta} & &\qquad(\text{split in the beneficial background})&&\qquad \mathfrak{(5_i)},\\
1376: %      \mathfrak{(6_i)} && \text{ rate }  (\ell\wedge r) \rho_{LR}(1-X) && \qquad(\text{split in $b$})\\
1377:     \end{aligned}
1378:   \end{equation}
1379:   at time $\beta$ for geometry (i) and Poisson processes $\mathcal
1380:   T_{\mathfrak {3_{ii}}}$, $\mathcal T_{\mathfrak {4_{ii}}}$, $\mathcal
1381:   T_{\mathfrak {5_{ii}}}$ with
1382:   \begin{equation}
1383:     \begin{aligned}
1384:       &\text{ rate } \ell \rho_{LS}(1-X_{T-\beta})& &\qquad(\text{rec. to the wild-type background})&&\qquad \qquad \quad\: \mathfrak{(3_{ii})},\\
1385:       & \text{ rate } r \rho_{SR}(1-X_{T-\beta})& &\qquad(\text{rec. to the wild-type background})&& \qquad \qquad \quad\: \mathfrak{(4_{ii})},\\
1386:       &\text{ rate } r (\rho_{LS}+\rho_{SR})X_{T-\beta}&
1387:       &\qquad(\text{split in the beneficial background})&& \quad
1388:       \qquad \qquad \:\mathfrak{(5_{ii})},
1389:     \end{aligned}
1390:   \end{equation}
1391:   at time $\beta$ for geometry (ii). We have combined recombinations
1392:   to the wild-type and splits in the wild-type background in case of
1393:   geometry $(i)$ since they happen with the same rates.
1394: 
1395:   Additionally, let $W=(W_{{\mathfrak i},m})_{{\mathfrak i} =
1396:     {\mathfrak 1}, {\mathfrak 3}, {\mathfrak 4}, {\mathfrak 5},
1397:     m=1,2,\ldots}$ be a random array such that all $W_{{\mathfrak
1398:       i},m}$'s are independent, $W_{{\mathfrak 1},m}$ is uniformly
1399:   distributed on all pairs of $\dickm{\ell}\cup\dickm{r}$,
1400:   $W_{{\mathfrak 3},m}$ is uniformly distributed on $\dickm{\ell}$,
1401:   and $W_{{\mathfrak 4},m}$ and $W_{{\mathfrak 5},m}$ are uniformly
1402:   distributed on $\dickm{r}$, $m=1,2,\ldots$. 
1403: 
1404:   The set $\dickm{\ell}\cup\dickm{r}$ can be totally ordered, so we
1405:   may assume that every partition element in $\zeta\in\mathcal
1406:   P'_{\dickm{\ell}\cup \dickm{r}}$ has a smallest element. Recall that
1407:   we write $\zeta_{(j)}$ for the partition element containing
1408:   $j\in\dickm{\ell}\cup\dickm{r}$.
1409: 
1410:   We abbreviate by $\mathcal T_{\mathfrak 3}$-$\mathcal T_{\mathfrak
1411:     5}$ the Poisson processes $\mathcal T_{\mathfrak {3_i}}$-$\mathcal
1412:   T_{\mathfrak {5_i}}$ for geometry (i) and the Poisson processes
1413:   $\mathcal T_{\mathfrak {3_{ii}}}$-$\mathcal T_{\mathfrak {5_{ii}}}$ for
1414:   geometry (ii). We next show that the distribution of
1415:   $\zeta^{\mathcal X}_T$ is the image measure of the tupel $(\mathcal
1416:   T_{\mathfrak 1}, \mathcal T_{\mathfrak 3}, \mathcal T_{\mathfrak 4},
1417:   \mathcal T_{\mathfrak 5}, W)$ under a map $\varphi$. Specifically,
1418:   the distribution of $\zeta_T^{\mathcal X}$ is uniquely determined by
1419:   the distribution of $(\mathcal T_{\mathfrak 1}, \mathcal
1420:   T_{\mathfrak 3}, \mathcal T_{\mathfrak 4}, \mathcal T_{\mathfrak 5},
1421:   W)$.
1422: 
1423:   To define $\varphi$, consider a discrete set $\mathbf T_{\mathfrak
1424:     1}\subseteq[0,T]$ and finite sets $\mathbf T_{\mathfrak 3},\mathbf
1425:   T_{\mathfrak 4},\mathbf T_{\mathfrak 5}\subseteq [0,T]$ such that
1426:   $\mathbf T_{\mathfrak i_1}\cap \mathbf T_{\mathfrak i_2}=\varnothing$
1427:   for $\mathfrak{i_1} \neq \mathfrak{i_2}$ and set $\mathbf
1428:   T=\bigcup_{\mathfrak i} \mathbf T_{\mathfrak i}$. Furthermore $w =
1429:   (w_{{\mathfrak i},m})_{{\mathfrak i} = {\mathfrak 1}, {\mathfrak 3},
1430:     {\mathfrak 4}, {\mathfrak 5}, m=1,2,\ldots}$ such that for all
1431:   $m=1,2,\ldots$, $w_{{\mathfrak 1},m}$ is a pair in
1432:   $\dickm{\ell}\cup\dickm{r}$, $w_{{\mathfrak 3},m} \in \dickm{\ell}$
1433:   and $w_{{\mathfrak 4},m}, w_{{\mathfrak 5},m} \in\dickm{r}$. Given
1434:   $(\mathbf T_{\mathfrak 1},\mathbf T_{\mathfrak 3},\mathbf
1435:   T_{\mathfrak 4},\mathbf T_{\mathfrak 5},w)$ we generate a partition
1436:   by considering the events in $\mathbf T$ in decreasing order. Assume
1437:   $\zeta^{\mathcal X}_0=(\pi,\varnothing)$ and after the $(m-1)$st
1438:   event at time $\beta$ we obtain a partition $\zeta^{\mathcal
1439:     X}_\beta = (\zeta^B, \zeta^b)\in\mathcal
1440:   P'_{\dickm{\ell}\cup\dickm{r}}$ and the $m$th event in $\mathbf T$
1441:   to be realized happens at time $\beta'\in \mathbf T$.
1442: 
1443:   Consider first the case $\beta'$ is the $m$th event is the
1444:   $m_{\mathfrak 1}$st event in $\beta'\in \mathbf T_{\mathfrak 1}$.
1445:   The pair $w_{{\mathfrak 1}, m_{\mathfrak 1}}=(j,k)$ gives a random
1446:   pair of loci.  If $\zeta_{(j)}, \zeta_{(k)}\in\zeta^B$ and if both,
1447:   $j$ and $k$, are the smallest elements of their partition elements,
1448:   coalesce these partition elements, i.e., make the transition
1449:   $$ \left(\zeta^B,\zeta^b\right) \longrightarrow \left((\zeta^B\setminus \{\zeta_{(j)}, 
1450:   \zeta_{(k)}\}) \cup \{ \zeta_{(j)}\cup \zeta_{(k)}\},\; \zeta^b\right).$$
1451:   Otherwise do nothing.
1452: 
1453:   The next case to consider is that $\beta'$ is the $m_{\mathfrak
1454:     3}$rd event in $\mathbf T_{\mathfrak 3}$ and $w_{{\mathfrak 3},
1455:     m_{\mathfrak 3}}=j$ for some $j\in\dickm{\ell}$. If
1456:   $\zeta_{(j)}\in\zeta^B$ and if $j$ is the smallest element of
1457:   $\zeta_{(j)}\cap\dickm{\ell}$, change the partition element from
1458:   $\zeta^B$ to $\zeta^b$, i.e., make the transition
1459:   \begin{equation}\label{eq:trans3}
1460:     \left(\zeta^B,\zeta^b\right) \longrightarrow \left(\zeta^B\setminus \{\zeta_{(j)}\},\; \zeta^b\cup\{\zeta_{(j)} \}\right). 
1461:   \end{equation}
1462:   Otherwise do nothing. The case $t\in \mathbf T_{\mathfrak 5}$ is similar and
1463:   is omitted.
1464: 
1465:   If $\beta'$ is the $m_{\mathfrak 4}$th event in $\mathbf
1466:   T_{\mathfrak 4}$ and $w_{{\mathfrak 4}, m_{\mathfrak 4}}=j$ for
1467:   $j\in\dickm{r}$ the partition $\zeta$ again only changes if $j =
1468:   \min \zeta_{(j)}\cap\dickm{r}$. We distinguish two cases,
1469:   $\zeta_{(j)}\in\zeta^B$ and $\zeta_{(j)}\in\zeta^b$. In the former
1470:   case, split the $L$- and $R$-loci in the partition element in two
1471:   partition elements and bring all $R$-loci into the wild-type
1472:   background, i.e., make the transition
1473:   \begin{equation}\label{eq:trans4a} \left(\zeta^B,\zeta^b\right) \longrightarrow 
1474:     \left((\zeta^B\setminus\{\zeta^B_{(j)}\}) \cup \{\zeta^B_{(j)}\cap \dickm
1475:     \ell\},\; \zeta^b\cup \{\zeta^B_{(j)}\cap\dickm r\}\right).\end{equation} 
1476:   This corresponds to an event (4).   
1477:   In the latter case split all $L$- and $R$-loci of $\zeta_{(j)}$ and leave them in
1478:   the wild-type background, i.e., make the transition
1479:   \begin{equation}\label{eq:trans4b} 
1480:   \left(\zeta^B,\zeta^b\right) \longrightarrow \left(\zeta^B,Ê\;
1481:     (\zeta^b\setminus\{\zeta_{(j)}\}) \cup \{\zeta_{(j)} \cap
1482:     \dickm{\ell}, \zeta_{(j)}\cap\dickm{r}\}\right),
1483:     \end{equation}
1484:   which corresponds to an event $(6_i)$. 
1485:   Recall that for geometry (ii) one $L$- and one $R$-locus cannot recombine to the wild-type background
1486:   together.  Hence partition elements in $\zeta^b$ are either subsets of 
1487:   $\dickm{\ell}$ or of $\dickm{r}$ such that the last transition must not occur for this geometry.
1488: 
1489:   By generating all events according to this procedure we end with a
1490:   partition $\zeta^{\mathcal X}_T$. Therefore we have defined the map
1491:   $\varphi: (\mathbf T_{\mathfrak 1},\mathbf T_{\mathfrak 3},\mathbf
1492:   T_{\mathfrak 4},\mathbf T_{\mathfrak 5},w) \mapsto \zeta^{\mathcal
1493:     X}_T$.
1494:   \begin{align}\label{eq:claim}
1495:     \text{\parbox{12cm}{\it The distribution of $\zeta^{\mathcal X}_T$ is
1496:         the image measure of $(\mathcal T_{\mathfrak 1}, \mathcal
1497:         T_{\mathfrak 3}, \mathcal T_{\mathfrak 4}, \mathcal
1498:         T_{\mathfrak 5}, W)$ under the map $\varphi$.}}
1499:   \end{align}
1500:   To see this, observe first, that there are only finitely many
1501:   recombination events (3), (4), (5) and ($6_i$). Almost surely, all
1502:   events in the Poisson processes occur at different times, so
1503:   $\varphi$ is defined on a set of probability 1.  By the above
1504:   construction, we obtain that two partition elements in $\zeta^B$
1505:   coalesce by event (1). The Poisson processes $\mathcal T_{\mathfrak
1506:     1}$, $\mathcal T_{\mathfrak 3}$, $\mathcal T_{\mathfrak 4}$,
1507:   $\mathcal T_{\mathfrak 5}$ produce exactly the recombination events
1508:   (3), (4), (5) and ($6_i$). Hence \eqref{eq:claim} is proved.
1509:   \smallskip
1510:   
1511:   Given $w$, the random partition $\varphi(\mathbf T_{\mathfrak 1},
1512:   \mathbf T_{\mathfrak 3}, \mathbf T_{\mathfrak 4}, \mathbf
1513:   T_{\mathfrak 5}, w)$ only depends on the order of time points in
1514:   $\mathbf T_{\mathfrak 1}, \mathbf T_{\mathfrak 3}, \mathbf
1515:   T_{\mathfrak 4}, \mathbf T_{\mathfrak 5}$. There is another feature
1516:   we will need:
1517:   \begin{align}\label{eq:claim2}
1518:     \text{\parbox{12cm}{\it Let $\beta',\beta''$ be consecutive time
1519:         points in $\mathbf T$ with $\beta'\in \mathbf T_{\mathfrak 3},
1520:         \beta''\in \mathbf T_{\mathfrak 4}$.  Exchanging $\beta'$ and
1521:         $\beta''$ does not alter the random partition $\varphi(\mathbf
1522:         T_{\mathfrak 1}, \mathbf T_{\mathfrak 3}, \mathbf T_{\mathfrak
1523:           4}, \mathbf T_{\mathfrak 5}, w)$. Formally, if $\mathbf T
1524:         \cap (\beta',\beta'')=\varnothing$, $\mathbf T_{\mathfrak 3}' =
1525:         \mathbf T_{\mathfrak 3}\setminus \{\beta'\} \cup \{\beta''\}$
1526:         and $\mathbf T_{\mathfrak 4}' = \mathbf T_{\mathfrak
1527:           4}\setminus \{\beta''\} \cup \{\beta'\}$. Then $$
1528:         \varphi(\mathbf T_{\mathfrak 1}, \mathbf T_{\mathfrak 3}',
1529:         \mathbf T_{\mathfrak 4}', \mathbf T_{\mathfrak 5}, w) =
1530:         \varphi(\mathbf T_{\mathfrak 1}, \mathbf T_{\mathfrak 3},
1531:         \mathbf T_{\mathfrak 4}, \mathbf T_{\mathfrak 5}, w). $$}}
1532:   \end{align}
1533:   Assume $\beta'$ is the $m_{\mathfrak 3}$rd event in $\mathbf
1534:   T_{\mathfrak 3}$, $w_{{\mathfrak 3},m_{\mathfrak 3}}=j$ and
1535:   $\beta''$ is the $m_{\mathfrak 4}$th event in $\mathbf T_{\mathfrak
1536:     4}$ and $w_{{\mathfrak 4},m_{\mathfrak 4}}=m$. If $j$ and $k$ are
1537:   not in the same partition element for $\beta<\beta'$, the claim is
1538:   trivial as recombination events only make the partition finer.
1539:   Similarly, if $j>\min \zeta_{(j)}\cap\dickm{\ell}$ or $k>\min
1540:   \zeta_{(k)}\cap\dickm{r}$ only one transition occurs and the claim
1541:   follows. In the case $$\zeta_{(j)} = \zeta_{(k)},\quad j = \min
1542:   \zeta_{(j)}\cap\dickm{\ell},\quad k = \min
1543:   \zeta_{(j)}\cap\dickm{r}$$ two transitions occur if and only if
1544:   $\zeta_{(j)}=\zeta_{(k)} \in \zeta^B$. We illustrate this situation
1545:   in Figure \ref{smallFig}.
1546: 
1547:   \begin{figure}
1548:     \hspace{3cm} (a) \hspace{7.5cm}(b)
1549: 
1550:   \begin{center}
1551:     \includegraphics[width=7cm]{smallAnc1.ps}\hspace{1cm}
1552:     \includegraphics[width=7cm]{smallAnc2.ps}
1553:   \end{center}
1554:   \caption{\label{smallFig}(a) A partition element (a line) is hit by
1555:     an event taking both the $L$- and the $R$-locus to the wild-type background
1556:     at time $\beta'$. Afterwards, at time $\beta''$ the line is split
1557:     in the wild-type background. (b) Here, the $R$-locus is taken to
1558:     the wild-type background at time $\beta'$. Afterwords the
1559:     $L$-locus is taken to the same background at time $\beta''$. The
1560:     outcome is the same. The line moves from the beneficial to the
1561:     wild-type background and is split there.}
1562:   \end{figure}
1563: 
1564:   Observe that the two-step transitions for the pair
1565:   $\big($\eqref{eq:trans3}, \eqref{eq:trans4b}$\big)$ (see Figure
1566:   \ref{smallFig}(a)) as well as for the pair
1567:   $\big($\eqref{eq:trans4a}, \eqref{eq:trans3}$\big)$ (see Figure
1568:   \ref{smallFig}(b)) are given by
1569:   \begin{equation*} 
1570:   \left(\zeta^B,\zeta^b\right) \longrightarrow \left(\zeta^B \setminus \zeta_{(j)}, \;
1571:     \zeta^b  \cup \{\zeta_{(j)} \cap
1572:     \dickm{\ell},\zeta_{(j)}\cap\dickm{r}\}\right),
1573:   \end{equation*}
1574:   i.e, the partition element both moves from $\zeta^B$ to $\zeta^b$
1575:   and is split in its $L$- and $R$-loci. This proves
1576:   \eqref{eq:claim2}.
1577: \end{step}
1578: 
1579: \begin{step}(Probable order of events)\\
1580: %  Next, we will show that up to a small error the the events given by
1581: %  $\mathcal \mathbf T_{\mathfrak 1}$, $\mathcal{\mathbf T}_{\mathfrak
1582: %    3}$, $\mathcal{\mathbf T}_{\mathfrak 4}$, $\mathcal{\mathbf
1583: %    T}_{\mathfrak 5}$ follow a certain order. 
1584:   Define $\varepsilon:=\frac{(\log \alpha)^{2}}{\alpha}$ and
1585:   $T_{\varepsilon}:= \min\{t\geq 0: X_{t}= \varepsilon \}$. We will
1586:   show that (i) no coalescences, i.e., events $(\mathfrak 1)$, occur
1587:   in $[T_\varepsilon, T]$, (ii) no splits in the beneficial
1588:   background, i.e., events $(\mathfrak 5)$, occur during
1589:   $[0,T_\varepsilon]$ and (iii) splits in the beneficial background,
1590:   i.e., events $(\mathfrak 5)$ do not overlap with other recombination
1591:   events $(\mathfrak 3), (\mathfrak 4)$ with high probability. More
1592:   precisely, we claim
1593:   \begin{align}
1594:     \mathbb P[\mathcal{T}_{\mathfrak 1} \cap \left[ T_{\varepsilon}, T\right]
1595:     \neq \varnothing ] = \mathcal O\Big(
1596:     \frac{1}{(\log\alpha)^2}\Big),
1597:     \label{eq:step3a}
1598:     \\
1599:     \mathbb{P}\left[ \mathcal{T}_{\mathfrak 5} \cap \left[0,
1600:         T_{\varepsilon}\right] \neq \varnothing \right] =
1601:     \mathcal{O}\left(\frac{(\log\alpha)^{2}}{ \alpha}\right), \label{eq:step3b}\\
1602:     \mathbb{P} \left[ \min\mathcal{T}_{\mathfrak 5} <
1603:       \max(\mathcal{T}_{\mathfrak 3}\cup\mathcal{T}_{\mathfrak 4})
1604:     \right] =\mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}.
1605:     \right)\label{eq:step3c}
1606:   \end{align}
1607: 
1608:   First, \eqref{eq:step3a} coincides with the assertion of Lemma 4.3
1609:   in \cite{EtheridgePfaffelhuberWakolbinger2006}. Second, for
1610:   \eqref{eq:step3b}, we have $X_{t}\leq \frac{(\log
1611:     \alpha)^{2}}{\alpha}$ for all $t \leq T_{\varepsilon}$. Hence we
1612:   get
1613:   \begin{eqnarray*}
1614:     \mathbb{P}\left[\mathcal{T}_{\mathfrak 5} \cap \left[0, T_{\varepsilon}\right] = 
1615:       \varnothing \right]&=& \mathbb{E}\left[\exp\left(- r
1616:         \rho_{LR}\int_{0}^{T_{\varepsilon}}X_{s}ds \right) \right] \\ 
1617:     &\geq& \mathbb{E}\left[ \exp\left(- r \rho_{LR}\;\varepsilon\; T_{\varepsilon} \right)\right]
1618:     \geq  \exp\left(- r \rho_{LR}\;\varepsilon\; \mathbb{E}\left[ T\right] \right).
1619:   \end{eqnarray*}
1620:   By \eqref{eq:T} we see that $\mathbb{E}\left[ T\right]=\frac{2\log
1621:     \alpha}{\alpha}+\mathcal{O}\left(\frac{1}{\alpha}\right)$. By the
1622:   choice of $\varepsilon$, this finally gives
1623:      \begin{equation} \nonumber \mathbb{P}\left[\mathcal{T}_{\mathfrak 5} \cap
1624:       \left[0, T_{\varepsilon} \right] = \varnothing \right] \geq
1625:     1-\mathcal{O}\left(\frac{(\log\alpha)^{2}}{ \alpha}\right).
1626: \end{equation} 
1627: Third, for \eqref{eq:step3c} we write, using $\rho = \mathcal O\left(
1628:   \frac{\alpha}{\log\alpha}\right)$, which might change from
1629: occurrence to occurrence,
1630: \begin{equation}\label{eq:green1}
1631: \begin{aligned}
1632:   \mathbb{P} \left[ \min\mathcal{T}_{\mathfrak 5}  \right. &<
1633:     \max(\mathcal{T}_{\mathfrak 3}\cup\mathcal{T}_{\mathfrak 4}) \left. \right] 
1634:    = %\mathbb{E}   \Big[\mathbb{P}\left[\exists \; t \in \mathcal{T}_{\mathfrak 5}: t <
1635:    %\max(\mathcal{T}_{\mathfrak 3}\cup \mathcal{T}_{\mathfrak 4}) \big| \mathcal X \right]
1636: % \Big] 
1637: \\ 
1638:   & = \mathbb{E}\left[ \int_{0}^{T} \mathbb{P}\left[
1639:       \mathcal{T}_{\mathfrak 5} \cap \left[0, t \right] \neq \varnothing
1640:       \big|\max(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}) \in dt, \mathcal
1641:      X \right] \cdot \mathbb{P}\left[\max(\mathcal{T}_{\mathfrak 3}\cup
1642:       \mathcal{T}_{4}) \in dt \big| \mathcal X \right] \right] \\&
1643:   \leq \mathbb{E} \left[ \int_{0}^{T} \left(1- \exp
1644:       \left(-\int_{0}^{t} \rho X_{s}ds\right) \right)\cdot \rho
1645:     (1-X_{t}) \exp\left(-\int_{t}^{T} \rho
1646:       (1-X_{s})ds\right) \right]\\
1647:   & \leq \rho^2 \cdot \mathbb{E}\left[\int_{0}^{T} (1-X_{t})
1648:     \int_{0}^{t} X_{s} dsdt\right]. 
1649: \end{aligned}
1650: \end{equation}
1651: The last term can be estimated using the Green function for the
1652: diffusion \eqref{eq:SDE}. As the right hand side of \eqref{eq:green1}
1653: coincides with the second line of (4.5) in
1654: \cite{EtheridgePfaffelhuberWakolbinger2006} we immediately obtain
1655: \eqref{eq:step3c}.
1656: \end{step}
1657: 
1658: \medskip
1659: 
1660: In the next three steps we will show that realizing the different
1661: splits independently from a fixed sample path $\mathcal{X}=
1662: (X_{t})_{0Ê\leq t \leq T}$ will cause only a small error. To see this
1663: we will establish a general result on mixed Poisson processes in Step
1664: 4 and apply it to the Poisson processes introduced in Step 2. The
1665: proof of Proposition \ref{PropFirst} will then be concluded by an
1666: application of these two steps.
1667: 
1668: \begin{step}(General approximations of mixed Poisson processes) \\
1669:   Let $\{\Psi(\delta): \delta > 0\}$, $\{\Phi(\delta): \delta > 0 \}$
1670:   be families of random variables taking values in $\mathbb{R}^{+}$.
1671:   Assume that the expectations $\mathbb{E}[\Psi(\delta)]$,
1672:   $\mathbb{E}[\Phi(\delta)]$ are bounded in $\delta$ and
1673: \begin{equation} \label{var}
1674:  \mathbb{V}[\Psi(\delta)],\mathbb{V}[\Phi(\delta)] = \mathcal{O}\left(\delta \right)
1675: \end{equation}
1676: as $\delta \rightarrow 0$. Denote the distribution function of the
1677: Poisson distribution with parameter $\lambda$ by
1678: $\text{Poi}_{\lambda}(\cdot)$. We claim that for $k, l \in
1679: \mathbb{N}_0$
1680: \begin{eqnarray}\label{approx1}
1681:   \mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k) \right]&= &
1682:   \text{Poi}_{\mathbb{E}[\Psi(\delta)]}(k)+ \mathcal{O}\left(\delta \right)
1683:   \\ \label{approx2}
1684:   \mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k) \cdot 
1685:     \text{Poi}_{\Phi(\delta)}(l) \right]&=&\mathbb{E}
1686:   \left[\text{Poi}_{\Psi(\delta)}(k)\right]\cdot 
1687:   \mathbb{E}\left[\text{Poi}_{\Phi(\delta)}(l)\right] + 
1688:   \mathcal{O}\left(\delta  \right)\end{eqnarray}
1689: 
1690: Note that by a Taylor series approximation, for a random variable
1691: $\Psi$ in $\mathbb R_+$ with second moments and some $\tilde\Psi$
1692: satisfying $\left|\tilde \Psi - \mathbb{E}[\Psi]\right|\leq \left|\Psi -
1693: \mathbb{E}[\Psi]\right|$,
1694: \begin{eqnarray} \nonumber\left| \mathbb{E} \left[ e^{- \Psi}
1695:   \frac{\Psi^{k}}{k!} \right] - e^{- \mathbb{E}[\Psi
1696:     ]}\frac{\mathbb{E}[\Psi ]^{k}}{k!} \right| &=& \left|
1697:   \left[\frac{d^2}{d\Psi^2} \left(e^{- \Psi}
1698:   \frac{\Psi^{k}}{k!}\right)\right]_{\Psi=\mathbb{E}[\Psi]}\right|\cdot
1699:   \mathbb{E} \left[(\tilde \Psi - \mathbb{E}[\Psi])^{2} \right] \\
1700:   \nonumber &\leq&
1701:   e^{- \mathbb{E}\left[\Psi \right]} \left|\left\{ \frac{\mathbb{E}\left[\Psi \right]^{k-2}}{(k-2)!}- 2\frac{\mathbb{E}\left[\Psi \right]^{k-1}}{(k-1)!} + \frac{\mathbb{E}\left[\Psi \right]^{k}}{k!}\right\}\right|\cdot \mathbb{V}\left[\Psi\right] \\
1702:     &\leq& 2 \mathbb{V}\left[\Psi
1703:     \right] \label{Taylor}
1704: \end{eqnarray} 
1705: where the terms in $\{ \ldots \}$ only show up if the denominators are
1706: non-zero and the last step follows from the fact that the Poisson
1707: weights in $\{ \ldots \}$ lie in $[0,1]$. As this holds for every
1708: $\Psi(\delta)$, (\ref{approx1}) follows immediately from (\ref{var}).
1709: Moreover, by a calculation similar to (\ref{Taylor}),
1710: \begin{eqnarray*}
1711:   \mathbb{V} \left[\text{Poi}_{\Psi(\delta)}(k) \right] 
1712:   = \mathbb{E}\left[ e^{-2\Psi(\delta)} \frac{\Psi(\delta)^{2k}}{(k!)^{2}}\right] - \mathbb{E}\left[e^{-\Psi(\delta)}Ê\frac{\Psi(\delta)^{k}}{k!} \right]^{2}
1713:   =\mathcal O \big(\mathbb{V}\left[ \Psi(\delta) \right]\big) 
1714:   = \mathcal{O}\left(\delta \right).
1715: \end{eqnarray*}
1716: Additionally, (\ref{approx2}) follows easily from the fact that
1717: \begin{eqnarray*}
1718:   &&\big| \mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k) \cdot \text{Poi}_{\Phi(\delta)}(l) \right]-\mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k)\right]\cdot \mathbb{E}\left[\text{Poi}_{\Phi(\delta)}(l)\right] \big| \\
1719:   &&\qquad \qquad  =  \big| \text{Cov}\left[\text{Poi}_{\Psi(\delta)}(k) \cdot \text{Poi}_{\Phi(\delta)}(l) \right] \big| 
1720:   \leq \sqrt{
1721:     \mathbb{V}\left[ \text{Poi}_{\Psi(\delta)}(k)\right] \cdot \mathbb{V}\left[\text{Poi}_{\Phi(\delta)}(l) \right]} = \mathcal{O}\left( \delta \right)
1722: \end{eqnarray*}
1723: by the Cauchy-Schwarz inequality.
1724: \end{step}
1725: 
1726: \begin{step}(Green function estimates)\\
1727:   Set $\rho=\gamma \frac{\alpha}{\log \alpha}$ where $\gamma =
1728:   \gamma_{LR}$ for geometry (i) and $\gamma= \gamma_{LS}+ \gamma_{SR}$
1729:   for geometry (ii). Using our approximations from Step 4 we will show
1730:   next
1731: \begin{eqnarray} \label{Poi1}
1732: \mathbb{P}\left[|\mathcal{T}_{\mathfrak 5}|= k \right] &=&\text{Poi}_{\mathbb{E}[r\rho\int_{0}^{T}X_{s}ds]}(k)+ \mathcal{O}\left( \frac{1}{(\log \alpha)^{2}}\right) \\  \nonumber
1733: \mathbb{P} \left[\big|(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}) \cap [T_{\varepsilon}, T]\big|= k,  |\mathcal{T}_{\mathfrak 5}|= l \right]
1734: &=&\mathbb{P}\left[\big|(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4})\cap [T_{\varepsilon}, T]\big|= k \right] \cdot \mathbb{P}\left[|\mathcal{T}_{\mathfrak 5}|= l\ \right] \\ \label{Poi2}
1735: &&  \qquad \qquad \qquad \qquad + \;\mathcal{O}\left(\frac{1}{(\log \alpha)^{2}} \right)
1736: \end{eqnarray}
1737: as $\alpha \rightarrow \infty$.
1738: To see this, set $\delta= \frac{1}{(\log \alpha)^{2}}$ and define
1739: \[
1740:  \Psi(\delta)= r\rho\int_{0}^{T} X_{s}ds, \qquad\qquad \Phi(\delta)= (\ell+r)\rho \int_{T_{\varepsilon}}^{T}(1-X_{s})ds
1741: \]
1742: Observe that for $k= 0,1,2, \ldots$
1743: \begin{align}
1744: \mathbb{P}\left[|\mathcal{T}_{\mathfrak 5}|= k\right]&= \mathbb{E}\left[ \text{Poi}_{\Psi(\delta)}(k)\right]\label{Poi3}\\
1745: \mathbb{P}\left[\big|(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4})\cap [T_{\varepsilon}, T]\big|= k \right]& = \mathbb{E}\left[\text{Poi}_{\Phi(\delta)}(k) \right]\nonumber
1746: \end{align}
1747: because $\mathcal{T}_{\mathfrak 3}$, $\mathcal{T}_{\mathfrak 4}$, $\mathcal{T}_{\mathfrak 5}$ are randomly time-changed Poisson processes. By (\ref{approx1}) and (\ref{approx2}), (\ref{Poi1}) and (\ref{Poi2}) follow once we have shown
1748: \begin{eqnarray}
1749: \mathbb{E}\left[\rho \int_{T_{\varepsilon}}^{T}(1-X_{s})ds \right] &\leq& \mathbb{E}\left[\rho \int_{0}^{T}X_{s}ds \right] \leq 2\gamma + \mathcal{O}\left(\frac{1}{\alpha} \right) \label{key1} \\
1750: \mathbb{V}\left[\rho \int_{T_{\varepsilon}}^{T}(1-X_{s})ds \right] &\leq& \mathbb{V}\left[\rho \int_{0}^{T}X_{s}ds \right] = \mathcal{O}\left(\frac{1}{(\log\alpha)^{2}} \right) \label{key2}
1751: \end{eqnarray}
1752: as $\alpha \rightarrow \infty$.\\
1753: First observe that $\left(X_{t} \right)_{0 \leq t \leq T}$ has the
1754: same distribution as $(1-X_{T-t})_{0 \leq t\leq T}$ by
1755: time-reversibility (see e.g. \cite{KarlinTaylor1981,
1756:   Griffiths2003}). Hence the inequalities on the left hand side of
1757: (\ref{key1}) and (\ref{key2}) follow. Second, we verify the
1758: expressions on the right hand side of (\ref{key1}) and (\ref{key2}) by
1759: an application of the Green function $G(.,.)$ of the diffusion
1760: $(X_{t})_{0 \leq t\leq T}$.  This function satisfies
1761: \[
1762: \mathbb E_x\left[ \int_0^T g(X_t) dt\right] = \int_0^1 G(x,y) g(y) dy
1763: \]
1764: where $\mathbb E_x[.]$ refers to the path $(X_t)_{0\leq t\leq T}$ with
1765: $X_0=x$ and $\mathbb E[.] := \mathbb E_0[.]$. The Green function is
1766: given by
1767: \[
1768: G(x, y)=
1769: \begin{cases}
1770:   \frac{\left(1-e^{-\alpha(1-y)}\right) \left(1-e^{-\alpha y} \right)}{\alpha y \left(1-y \right)\left(1-e^{-\alpha} \right)} \qquad &\text{ if } x \leq y \\
1771:   \frac{\left(e^{-\alpha x} -e^{-\alpha} \right)\left(e^{\alpha y}-1
1772:     \right) \left(1-e^{-\alpha y} \right)} {\alpha y \left(1-y \right)
1773:     \left(1-e^{-\alpha} \right) \left(1-e^{-\alpha x} \right)} \qquad
1774:   &\text{ if } x\geq y,
1775: \end{cases}
1776: \]
1777: see e.g. \cite{KarlinTaylor1981,
1778:   EtheridgePfaffelhuberWakolbinger2006}. More generally, $G(.,.)$
1779: satisfies
1780: \begin{eqnarray*}
1781: &&\mathbb{E}_{x}\left[\int_{0}^{T} \int_{t_{1}}^{T} \ldots \int_{t_{k-1}}^{T} g_{k}(X_{t_{k}})\ldots g_{1}(X_{t_{1}})dt_{k}\ldots dt_{1} \right] \\
1782:   &&\qquad  \qquad \qquad \qquad
1783:   = \int_{0}^{1} \ldots \int_{0}^{1} G(x, x_{1}) \ldots G(x_{k-1}, x_{k}) g_{1}(x_{1}) \ldots g_{k}(x_{k})dx_{k}\ldots dx_{1}
1784: \end{eqnarray*}
1785: for all $k=1, 2, \ldots$ which can be proved by induction. We may thus
1786: write, because $G(x,y) = G(0,y)$ for $y\geq x$,
1787: \begin{align*} 
1788:   \mathbb{V}\Big[  \rho\int_{0}^{T}X_{s}ds\Big] &= 
1789:    \rho^2 \left( 2 \int_0^1 \int_0^1 G(0,x) G(x,y) xy dy dx - 2\int_0^1 \int_x^1 G(0,x) G(0,y) xy dy dx\right) \\
1790:   & = 2\rho^2 \int_0^1 \int_0^x G(0,x) G(x,y) xy dy dx
1791:   \: \leq \: 2 \rho^2 \int_0^1 \int_0^x G(0,x) G(x,y) dy dx \\
1792:   & = 2\rho^2 \mathbb V[T] = \mathcal{O}\left(
1793:     \frac{1}{(\log\alpha)^{2}}\right)
1794:   % = \rho^{2}\mathbb{V}\left[ \int_{0}^{T}X_{s}ds\right]  = 2\gamma^{2} \int_{0}^{1} \int_{0}^{\xi} G(0, \xi)G(\xi, \eta)\xi \; \eta \; d\eta d\xi \\
1795:   % = \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1}
1796:   % \int_{0}^{\xi} \frac{\left(
1797:   %     1-e^{-\alpha(1-\xi)}\right)\left(1-e^{-\alpha\xi}
1798:   %   \right)}{(1-\xi)} \; \frac{\left(e^{-\alpha\xi}- e^{-\alpha}
1799:   %   \right)\left(e^{\alpha\eta}-1 \right) \left(1-e^{-\alpha\eta}
1800:   %   \right)}{(1-\eta)\left(1-e^{-\alpha\xi}\right)}
1801:   % d\eta d\xi \\
1802:   % \leq
1803:   % \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1} \int_{0}^{\xi} \frac{e^{-\alpha\xi}\left(e^{\alpha\xi}-1\right)}{(1-\xi)} \frac{1}{(1-\eta)}d\eta d\xi \\
1804:   % =
1805:   % \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1}\frac{e^{-\alpha\xi}\ln(1-\xi)}{(1-\xi)}d\xi \\
1806:   % = \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1}
1807:   % e^{-\alpha\xi}d\xi \leq
1808: \end{align*}
1809: by \eqref{eq:T} which gives (\ref{key2}).
1810: \end{step}
1811: 
1812: \begin{step}(Approximate independence)\\
1813:   % We are going to use equations (\ref{approx1}) and (\ref{approx2})
1814:   % derived in Step 4 to conclude the proof of the proposition.
1815:   As we have seen in (\ref{eq:claim}) the distribution of
1816:   $\zeta^{\mathcal X}_T$ is determined by the distribution of the
1817:   order of events in the Poisson processes $\mathcal T_{\mathfrak 1}$,
1818:   $\mathcal T_{\mathfrak 3}$, $\mathcal T_{\mathfrak 4}$ and $\mathcal
1819:   T_{\mathfrak 5}$.  The calculations in Step 3 allow us to make the
1820:   assumptions
1821:   \[
1822:   \mathcal{T}_{\mathfrak 1}\cap \left[T_{\varepsilon}, T \right] =
1823:   \varnothing, \qquad \mathcal{T}_{\mathfrak 5}\cap \left[0,
1824:     T_{\varepsilon} \right] = \varnothing,\qquad \max
1825:   (\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}) < \min
1826:   \mathcal{T}_{5}
1827:   \]
1828:   on the ordering of events in these Poisson processes as these events
1829:   have probability $1-\mathcal O\big( (\log\alpha)^{-2}\big)$.
1830:   Furthermore, we know from \eqref{eq:claim2} that events in $\mathcal
1831:   T_{\mathfrak 3}$ and $\mathcal T_{\mathfrak 4}$ may be exchanged
1832:   without changing the distribution of $\zeta^{\mathcal X}_T$. Hence,
1833:   the distribution of $\zeta^{\mathcal X}_T$ is determined once the
1834:   joint distribution of
1835:   $$\mathcal{T}_{\mathfrak 1} \cap \left[ 0, T_{\varepsilon}\right], \qquad 
1836:   \mathcal{T}_{\mathfrak 3}\cap \left[ 0, T_{\varepsilon}\right],
1837:   \qquad \mathcal{T}_{\mathfrak 4}\cap [0,T_\varepsilon], \qquad
1838:   \left|(\mathcal{T}_{\mathfrak 3}\cup \mathcal{T}_{\mathfrak 4}) \cap
1839:     \left[ T_{\varepsilon}, T\right] \right|,\qquad
1840:   \left|\mathcal{T}_{\mathfrak 5}\right| $$ is known. To approximate
1841:   the joint distribution of these objects, define
1842: \[
1843: \mathcal{T}_{\mathfrak i}^{\varepsilon}:= \mathcal{T}_{\mathfrak i}
1844: \cap \left[0, T_{\varepsilon} \right]\text{, } \mathfrak i=
1845: \mathfrak{1,3,4} \quad \text{and} \quad K_{\mathfrak {3,4}}:=
1846: \big|\left(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}
1847: \right) \cap \left[T_{\varepsilon}, T\right] \big| \text{,} \quad K_{
1848:   \mathfrak 5}:=\big| \mathcal{T}_{\mathfrak 5} \big|.
1849: \]
1850: %The distribution conditioned on a frequency path $\mathcal{X}$ is
1851: %denoted by $\mathbb{P}_{\mathcal{X}}$. 
1852: We will prove 
1853: %for the joint distribution under $\mathbb{P}$
1854: \begin{equation} \label{independence} \mathbb P \circ
1855:   \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},
1856:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak
1857:       4}^{\varepsilon}, K_{\mathfrak {3,4}}, K_{\mathfrak 5}
1858:   \right)^{-1} = \mathbb P\circ\left(\mathcal{T}_{\mathfrak
1859:       1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},
1860:     \mathcal{T}_{\mathfrak 4}^{\varepsilon}, K_{\mathfrak{3,4}}
1861:   \right)^{-1} \otimes \;
1862:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{0}^{T}X_{s}ds \right]} +
1863:   \mathcal{O}\left(\frac{1}{\left(\log \alpha \right)^{2}} \right)
1864: \end{equation}
1865: where $\mathbb P\circ X^{-1}$ is the image measure of the random
1866: variable $X$ under $\mathbb P$ and the Landau symbol in this context
1867: gives the order in variation distance of the distributions.
1868: 
1869: Once \eqref{independence} is shown we conclude that $K_{\mathfrak 5}$
1870: is approximately independent of all other events. Furthermore, its
1871: distribution may be interpreted as the sum of $r$ Poisson
1872: distributions with parameter $\mathbb{E}\left[\rho\int_{0}^{T}X_{s}ds
1873: \right]$. These determine the number of split events on all partition
1874: elements $\xi\in\pi$ with $\xi\cap\dickm r\neq \varnothing$. A
1875: partition element splits, if it is hit by at least one split event.
1876: The probability for a split of a partition element is thus given,
1877: using \eqref{Poi1} and \eqref{Poi3} for $k=0$, by
1878: \[
1879: 1-\exp\Big( - \rho\cdot \mathbb{E}\left[\int_{0}^{T}X_{s}ds
1880: \right]\Big) = 1 - \mathbb E\Big[ \exp\Big( - \rho\int_0^T X_s
1881: ds\Big)\Big] + \mathcal O\Big( \frac{1}{(\log\alpha)^2}\Big).
1882: \]
1883: with $\rho=\rho_{LR}$ for geometry (i) and $\rho=\rho_{LS} +
1884: \rho_{SR}$ for geometry (ii). Observe that $\Gamma_\pi$ is determined
1885: by the distribution of $\left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},
1886:   \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak
1887:     4}^{\varepsilon}, K_{\mathfrak {3,4}}\right)$ if $K_{\mathfrak 5}$
1888: is known. The random partition $\Delta_\pi$ is determined by the
1889: distribution of $\left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},
1890:   \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak
1891:     4}^{\varepsilon}, K_{\mathfrak {3,4}}\right)$ independently of
1892: $K_{\mathfrak 5}$. So, Proposition \ref{PropFirst} is a consequence of
1893: the approximate independence of $\left(\mathcal{T}_{\mathfrak
1894:     1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},
1895:   \mathcal{T}_{\mathfrak 4}^{\varepsilon}, K_{\mathfrak {3,4}}\right)$
1896: and $K_{\mathfrak 5}$ given by \eqref{independence}.
1897: 
1898: \smallskip
1899: 
1900: We write
1901: \begin{align*}
1902:   \mathbb P \circ \big(\mathcal{T}_{\mathfrak 1}^{\varepsilon}, &
1903:   \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak
1904:     4}^{\varepsilon}, K_{\mathfrak{3,4}}, K_{\mathfrak{5}} \big)^{-1}
1905:   = \int \mathbb{P}_{\mathcal X} \circ \left(\mathcal{T}_{\mathfrak
1906:       1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},
1907:     \mathcal{T}_{\mathfrak 4}^{\varepsilon}, K_{\mathfrak {3,4}},
1908:     K_{\mathfrak{5}} \right)^{-1}\;\mathbb{P}\left[ d\mathcal{X}
1909:   \right] \\ & = \int \mathbb{P}_{(X_{t})_{0\leq t \leq T^{\varepsilon}}}
1910:   \circ \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},
1911:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak
1912:       4}^{\varepsilon} \right)^{-1} \mathbb{P}\left[d(X_{t})_{0\leq t
1913:       \leq T^{\varepsilon}}\right]\\ & \qquad\qquad \otimes \int
1914:   \mathbb{P}_{(X_{t})_{T^{\varepsilon}\leq t \leq T}}\circ \left(K_{\mathfrak
1915:       {3,4}}, K_{\mathfrak 5} \right)^{-1}
1916:   \mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq t \leq T}\right] +
1917:   \mathcal{O}\left(\frac{(\log \alpha)^{2}}{\alpha} \right)
1918: \end{align*}
1919: where we have used the fact that $T_{\varepsilon}$ is a stopping time and
1920: the strong Markov property of the process $\mathcal X$. Note that by
1921: \eqref{eq:step3b} we may assume $K_{ \mathfrak
1922:   5}=\big|\mathcal{T}_{\mathfrak 5} \cap \left[T_{\varepsilon}, T \right]
1923: \big|$ which gives an error of $\mathcal{O}\left(\frac{(\log
1924:     \alpha)^{2}}{\alpha} \right)$ in probability. From Steps 4 and 5
1925: we get
1926: \begin{multline*}
1927:   \int \mathbb{P}_{(X_{t})_{T^{\varepsilon}\leq t \leq T}}\circ
1928:   \left(K_{\mathfrak{3,4}}, K_{\mathfrak{5}} \right)^{-1}
1929:   \mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq t \leq T}\right]
1930:   \\
1931:   =
1932:   \text{Poi}_{\mathbb{E}\left[(\ell+r)\rho\int_{T_{\varepsilon}}^{T}\left(1-X_{s}
1933:       \right)ds \right]}\; \otimes\;
1934:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{T_{\varepsilon}}^{T}X_{s}ds
1935:     \right]} + \mathcal{O}\left(\frac{1}{(\log\alpha)^{2}}\right)
1936: \end{multline*}
1937: Rewriting
1938: $$\text{Poi}_{\mathbb{E}\left[(\ell+r)\rho\int_{T_{\varepsilon}}^{T}\left(1-X_{s}
1939:     \right)ds \right]} = \int \mathbb{P}_{(X_{t})_{T^{\varepsilon}}\leq t
1940:   \leq T}\circ\big(K_{
1941:   \mathfrak{3,4}}\big)^{-1}\mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq
1942:     t \leq T}\right] ,$$ and using the strong Markov property of
1943: $\mathcal X$ a second time we get
1944: \begin{align*}
1945:   \mathbb P\circ \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},
1946:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak
1947:       4}^{\varepsilon}, K_{\mathfrak{3,4}}, K_{\mathfrak{5}}
1948:   \right)^{-1} &= \int \mathbb{P}_{(X_{t})_{0\leq t \leq
1949:       T^{\varepsilon}}}\circ \left(\mathcal{T}_{\mathfrak
1950:       1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},
1951:     \mathcal{T}_{\mathfrak 4}^{\varepsilon}
1952:   \right)^{-1}\mathbb{P}\left[d(X_{t})_{0\leq t \leq
1953:       T^{\varepsilon}}\right]
1954:   \\
1955:   & \qquad \otimes \int \mathbb{P}_{(X_{t})_{T^{\varepsilon}\leq t \leq
1956:       T}} \circ \left(K_{\mathfrak{3,4}}\right)^{-1}
1957:   \mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq t
1958:       \leq T}\right] \\
1959:   & \qquad \qquad \qquad \otimes \quad
1960:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{0}^{T}X_{s}ds \right]} +
1961:   \mathcal{O}\left(\frac{1}{(\log\alpha)^{2}}\right)\\ &= \mathbb P
1962:   \circ \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},
1963:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak
1964:       4}^{\varepsilon}, K_{\mathfrak {3,4}} \right)^{-1} \otimes
1965:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{0}^{T}X_{s}ds \right]} +
1966:   \mathcal{O}\left(\frac{1}{\left(\log \alpha \right)^{2}} \right)
1967: \end{align*}
1968: and we are done.
1969: \end{step}
1970: \end{proof}
1971: 
1972: By Proposition \ref{PropFirst}, events $(5)$ can be generated
1973: independently of the frequency path and of all other events.  The
1974: rates of the recombination events $(3), (4), (6_i)$ at time $\beta$
1975: are all proportional to $(1-X_{T-\beta})$.  This is reminiscent of the
1976: case of only one neutral locus, studied in
1977: \cite{EtheridgePfaffelhuberWakolbinger2006}, where a line carrying one
1978: neutral locus in recombination distance $\rho$ recombines to the
1979: wild-type background with rate $\rho(1-X_{T-\beta})$. As a consequence we can use
1980: the same techniques used there, especially their Proposition 3.6.
1981: which states that a marked Yule tree approximately gives the same
1982: partition as the structured coalescent.
1983: 
1984: \begin{definition}\label{def:4}
1985: %  For $\pi\in \mathcal P_{\dickm \ell\,\cup\,\dickm r}$ d
1986:   Define a $\mathcal P'_{\dickm \ell\,\cup\,\dickm r}$-valued random
1987:   variable $\Xi_\pi$ as follows: For all partition elements
1988:   $\xi\in\pi$ which $\xi\cap\dickm\ell \neq \varnothing, \xi\cap\dickm
1989:   r\neq\varnothing$, i.e., $\xi$ carries both left and right loci,
1990:   split the partition element in its left and right loci,
1991:   $\xi\cap\dickm\ell, \xi\cap\dickm r$ according to \eqref{Split}.
1992:   Denote
1993:   %the random number of split lines by $S$ and 
1994:   the resulting partition by $\pi'$.
1995:   
1996:   Let $\mathbf Y$ be an infinite Yule tree with branching rate
1997:   $\alpha$. Moreover, consider the random tree $\mathbf Y_{|\pi'|}$
1998:   which arises by sampling $|\pi'|$ lines from $\mathbf Y$ at
1999:   infinity. Identify each of the $|\pi'|$ partition elements of $\pi'$
2000:   with one sampled line. Between the root of the Yule tree $\mathbf Y$
2001:   starts and the time it has $\lfloor 2\alpha \rfloor$ lines, mark all
2002:   lines by the following procedure:
2003: 
2004:   For geometry (i), the tree is marked by Poisson processes with rates
2005:   $\rho_{SL}$ and $\rho_{LR}$. These marks are relabelled such that
2006:   each branch is hit by at most one mark. Call the corresponding marks
2007:   $SL$-, $LR$- and $SLR$-marks. The following rules are applied:
2008:   \begin{enumerate}
2009:   \item[(a)] If the Poisson process with rate $\rho_{SL}$ puts the first
2010:     (backward in time) mark at time $t$ from the root, start a Poisson
2011:     process with rate $\rho_{LR}$ and run it for time $t$. If an event
2012:     occurs during this time, the branch is marked by an $SLR$-mark,
2013:     otherwise by an $SL$-mark.
2014:   \item[(b)] If the Poisson process with rate $\rho_{LR}$ puts the
2015:     first (backward in time) mark distinguish the following two cases:
2016:     if the Poisson process with rate $\rho_{SL}$ hits the branch as
2017:     well, it obtains an $SLR$-mark. Otherwise, it obtains an
2018:     $LR$-mark.
2019:   \end{enumerate}
2020: 
2021:   For geometry (ii), mark the tree by two independent Poisson
2022:   processes with rates $\rho_{LS}$ and $\rho_{SR}$. If a branch is hit
2023:   by one or more events of the Poisson process with rate $\rho_{LS}$,
2024:   it gets an $LS$-mark. If it is hit by one or more events with rate
2025:   $\rho_{SR}$, it additionally gets an $SR$-mark. 
2026: 
2027:   The result of this procedure is a marked Yule tree $\mathbf
2028:   Y_{|\pi'|}$. Given $\pi'$ and the marked Yule tree $\mathbf
2029:   Y_{|\pi'|}$ we use the same equivalence relation as given in
2030:   \eqref{eq:equivGeoi} and \eqref{eq:equivGeoii} to define
2031:   $\pi''\in\mathbf P'_{\dickm\ell \cup \dickm r}$. Furthermore, we
2032:   use \eqref{eq:uni} and \eqref{eq:unii} to define the random
2033:   partition
2034:   $$ \Xi_\pi:= ( \{\pi_f''\}, \pi''\setminus \{\pi_f''\}).$$ 
2035: \end{definition}
2036: 
2037: \begin{example} The two cases in which an $SLR$-mark occurs for
2038:   geometry (i) are illustrated in Figure \ref{smallYule}. Consider the
2039:   line in the sample Yule tree which can be identified with the
2040:   partition element $\{j,k\}$ where $j\in\dickm \ell$ and $k\in\dickm
2041:   r$. Consider case (a) first, shown on the left side of Figure
2042:   \ref{smallYule}: The $SL$-mark hitting a branch in $\mathbf
2043:   Y_{|\pi'|}$ leads to a jump of the partition element into the
2044:   wild-type background. We now have to consider the additional Poisson
2045:   process at rate $\rho_{LR}$ to determine whether or not the line
2046:   will split within the wild-type background. If an event with rate
2047:   $\rho_{LR}$ occurs, the $L$- is separated from the $R$-locus on this
2048:   line. Case (b) is illustrated on the right side of Figure
2049:   \ref{smallYule}. Here, the line which refers to the partition
2050:   element $\{j,k\}$ is first (backward in time) hit by an $LR$-mark,
2051:   bringing the $R$-locus into the wild-type background, and after that
2052:   an additional $SL$-mark hits the same branch, which additionally
2053:   brings the $L$-locus into the wild-type background.
2054: %  Now looking backwards in time again this means that first the
2055: %  $R$-locus, i.e.,  $\{j\}$, is carried into the wild-type background
2056: %  by a recombination event between $L$ and $R$.  Before the partition
2057: %  element $\{k\}$ carrying the $L$-locus which remains behind may
2058: %  coalesce with any other partition element in the beneficial
2059: %  background, a recombination event between $S$ and $L$ leads to the
2060: %  jump of $\{k\}$ into the wild-type background.
2061:   In both cases the loci $j$ and $k$ end up separated in the wild-type
2062:   background. This is summarized in Definition \ref{def:4} by an $SLR$-mark.
2063: \end{example}
2064: 
2065: \begin{figure}
2066: \hspace{3cm} (a) \hspace{7cm}(b)
2067: 
2068: \begin{center}
2069: \includegraphics[width=7cm]{smallYule1.ps} \hspace{0.5cm}
2070: \includegraphics[width=7cm]{smallYule2.ps}
2071: %\includegraphics[width=5cm]{smallYule3.ps}
2072: \end{center}
2073: \caption{\label{smallYule}
2074: There are two possibilities how an $SLR$-mark may occur. Here, $SL$
2075: and $LR$ refer to points in the Poisson processes with rates
2076: $\rho_{SL}$ and $\rho_{LR}$.  See text for further explanation.
2077: %(a) The line is
2078: %  first hit by a Poisson process with rate $\rho_{SL}$. The
2079: %  recombining line is split by a Poisson process with rate $\rho_{LR}$
2080: %  (b) The line is hit by the Poisson process with rate $\rho_{LR}$
2081: %  first and only afterwards is hit by the Poisson process with rate
2082: %  $\rho_{SL}$.  
2083: }
2084: \end{figure}
2085: 
2086: 
2087: \noindent
2088: As a next step in the Proof of Theorem \ref{T} we now show that
2089: $\Delta_\pi \approx \Xi_\pi$.
2090: 
2091: \begin{proposition}\label{PropSecond}
2092:   Let $\pi\in\mathcal P'_{\dickm \ell\cup\dickm r}$ and $\Delta_\pi$
2093:   and $\Xi_\pi$ be as in Definitions \ref{def:3} and \ref{def:4}.
2094:   Then,
2095:   $$ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}} 
2096:   \big|\mathbb P[\Delta_\pi = \xi] - \mathbb P[\Xi_\pi=\xi] \big|
2097:   = \mathcal O\Big( \frac{1}{(\log\alpha)^2}\Big). $$
2098: \end{proposition}
2099: 
2100: \begin{proof}
2101:   As the mechanism to generate splits in the beneficial background is
2102:   the same for both random partitions, $\Delta_\pi$ and $\Xi_\pi$,
2103:   we concentrate on all other events.
2104: 
2105:   The proof follows along the lines of the Yule approximation in the
2106:   case of only one neutral locus, given in \cite[Definition 3.3. and
2107:   Section 4.3.]{EtheridgePfaffelhuberWakolbinger2006}. The crucial
2108:   observation is that by a random time change $t\mapsto\tau$ given by
2109:   $d\tau = (1-X_t)dt$ the frequency path $\mathcal X$, given by
2110:   \eqref{eq:SDE}, is taken to the solution $\mathcal Z = (Z_t)_{t\geq
2111:     0}$ of
2112:   \begin{align} \label{eq:timechange} dZ = \alpha Z \coth(\alpha Z)dt
2113:     + \sqrt{Z} dW
2114:   \end{align}
2115:   with a standard Brownian motion $W$ and $Z_0=0$. This is an
2116:   $\alpha$-supercritical Feller branching process conditioned on
2117:   non-extinction.  It was shown in \cite{EvansOConnell1994} and
2118:   \cite{OConnell1993} that the genealogy of the $\alpha$-supercritical
2119:   branching process is a Yule process with branching rate $\alpha$.
2120:   Observe that the time-transformation $t\mapsto\tau$ only works until
2121:   the supercritical branching process has reached frequency 1. From
2122:   4.5(b) in \cite{EtheridgePfaffelhuberWakolbinger2006} we see that at
2123:   this time the number of lines in the Yule process is Poisson
2124:   distributed with mean $2\alpha$. (The additional factor of 2 arises
2125:   because we made the assumption that the individual offspring
2126:   variance in the underlying Cannings model is 1 rather than 2. See
2127:   also \cite{PfaffelhuberHauboldWakolbinger2006}.) However, as typical
2128:   deviations in this Poisson distribution are of the order
2129:   $\sqrt\alpha\ll\alpha$ we may instead assume that the Yule process
2130:   has $\lfloor 2\alpha \rfloor$ lines.  This was made precise in the
2131:   proof of Proposition 4.7. in
2132:   \cite{EtheridgePfaffelhuberWakolbinger2006}.
2133: 
2134:   Moreover, for geometries (i) and (ii) the rates in the process $\xi$
2135:   change at time $\beta$ from $\rho_{SL}(1-X_{T-\beta})$,
2136:   $\rho_{LR}(1-X_{T-\beta})$ to $\rho_{SL}$, $\rho_{LR}$ and from
2137:   $\rho_{LS}(1-X_{T-\beta})$, $\rho_{SR}(1-X_{T-\beta})$ to
2138:   $\rho_{LS}$, $\rho_{SR}$, respectively. Especially, the time-changed
2139:   rates are constant.  Under the random time change the coalescence
2140:   rate (1) changes at time $\beta$ from $1/X_{T-\beta}$ to
2141:   $1/(X_{T-\beta}(1-X_{T-\beta}))$. However, it was shown in
2142:   \cite[Proposition 4.2.]{EtheridgePfaffelhuberWakolbinger2006} that
2143:   the change of these rates can only produce an error in probability
2144:   of order $\mathcal O\big((\log\alpha)^{-2}\big)$. This fact was used
2145:   in \cite[Lemma 4.5., Proposition
2146:   4.7.]{EtheridgePfaffelhuberWakolbinger2006} to prove that the marked
2147:   Yule process gives an accurate approximation in the case for one
2148:   neutral locus. However, this result carries over to the present
2149:   situation because all Poisson processes along the Yule process have
2150:   constant rates.
2151: 
2152:   It remains to check whether the equivalence relation $\Xi_\pi$
2153:   coincides with $\Delta_\pi$ given the change in the coalescence rate
2154:   has no effect. First of all, realize the splits in the beneficial
2155:   background according to Definition \ref{def:3}. Then, take $j,k\in
2156:   \dickm\ell \cup \dickm r$ and trace their partition elements
2157:   backwards up to time $t=0, \beta=T$. We only consider geometry (i)
2158:   and $j\in\dickm\ell, k\in\dickm r$, since the other cases
2159:   $j,k\in\dickm\ell$ and $j,k\in\dickm r$ and all cases for geometry
2160:   (ii) are similar. If we consider the process $\eta^{\mathcal X}$
2161:   from Definition \ref{def:3} without any recombination events we
2162:   would obtain a tree $\Y$ for the genealogy relating $j$ and $k$.
2163:   However, recombination events may cause the $L$-locus $j$ and the
2164:   $R$-locus $k$ to end up in different partition element in the random
2165:   partitions $\Delta_\pi$. This will be the case if and only if one of
2166:   the following events occurs in the process $\eta^{\mathcal X}$:
2167: \begin{itemize}
2168: \item[(a)] a recombination event $(3_i)$ with rate
2169:   $\rho_{SL}\left(1-X_{}\right)$ on
2170:   $\!\!\!\!\!\!\!\!\Yup\!\!\!\!\!\!\!\!$, which takes either $j$ or
2171:   $k$ to the wild-type background before coalescence,
2172: \item[(b)] a recombination event $(4_i)$ with rate
2173:   $\rho_{LR}\left(1-X_{}\right)$ on
2174:   $\!\!\!\!\!\!\!\!\Yupri\!\!\!\!\!\!\!\!$, which takes $k$ to the
2175:   wild-type background before coalescence with $j$,
2176: \item[(c)] an event $(4_i)$ with rate $\rho_{LR}\left(1-X_{}\right)$ on
2177:   $\!\!\!\!\!\!\!\!\Ybottom\!\!\!\!\!\!\!\!$ before (backward in time)
2178:   an event with rate $\rho_{SL}\left(1-X_{}\right)$ happens on that
2179:   branch; in this case $j$ and $k$ have coalesced, but a recombination
2180:   event brings $k$ to the wild-type background without $j$,
2181: \item[(d)] an event $(3_i)$ with rate $\rho_{SL}\left(1-X_{}\right)$
2182:   on $\!\!\!\!\!\!\!\!\Ybottom\!\!\!\!\!\!\!\!$ before (backward in
2183:   time) an event with rate $\rho_{LR}\left(1-X_{}\right)$ happens on
2184:   that branch, which brings both $j$ and $k$ to the wild-type
2185:   background.  Here, an event $(6_i)$ at rate $\rho_{LR}(1-X_{})$ happens
2186:   which splits $j$ and $k$ in the wild-type background.
2187: \end{itemize}
2188: The trees in events (a)-(d) refer to trees generated by
2189: $\eta^{\mathcal X}$. By the random time change and our assumption that
2190: the change in coalescence rate does not alter random partitions we can
2191: as well take trees generated by the Yule process and change the rates
2192: $\rho_{SL}(1-X)$ and $\rho_{LR}(1-X)$ to $\rho_{SL}$ and $\rho_{LR}$.
2193: Hence we are dealing with a Yule tree with branching rates $\alpha$
2194: marked by Poisson processes with rates $\rho_{SL}$ and $\rho_{LR}$
2195: which is the exact situation of Definition \ref{def:4}. Using the
2196: definition of the $SL$-, $LR$- and $SLR$-marks, we note that
2197: \begin{itemize}
2198: \item (a) produces either an $SL$- or an $SLR$-mark on $\Yup$,
2199: \item (b) produces an $LR$-mark on $\Yupri$,
2200: \item (c) and (d) produce either an $LR$- or an $SLR$-mark on
2201:   $\Ybottom$.
2202: \end{itemize}
2203: If none of these marks occur, $j$ and $k$ are in the same partition
2204: element of $\Xi_\pi$ by \eqref{eq:equivGeoi}. Hence $\Delta_\pi$ and
2205: $\Xi_\pi$ coincide with high probability.
2206: 
2207: % Observe that events (c) and (d) produce $SLR$-marks on
2208: % $\!\!\!\!\!\!\!\!\Ybottom\!\!\!\!\!\!\!\!$ and the two casesexactly
2209: % correspond to cases (a) and (b) in the generation of $SLR$-marks on
2210: % the Yule tree according to Definition \ref{def:4}.
2211: 
2212: %   As long as we have to trace them back along different lines, no
2213: %   recombination event must take place on either of them since they
2214: %   won't coalesce in the wild-type background.  Explicitly, no event
2215: %   with rate ($3_{i}$) must happen to either of the two partition
2216: %   elements and no event at rate ($4_{i}$) should occur on the
2217: %   $k$-line. As soon as $j$ and $k$ belong to the same partition
2218: %   element, they may recombine into the wild-type background together,
2219: %   but must not split there. In case they do not leave the beneficial
2220: %   background, they must not be separated by an event of rate ($4_{i}$)
2221: %   (split where $k$ would leave the beneficial background). Under the
2222: %   time transformation the corresponding rates are rescaled and running
2223: %   the Poisson processes at these constant (rescaled) rates as stated
2224: %   in Definition $\ref{def:4}$ would lead to the marks shown in Table
2225: %   \ref{tab2}.
2226: % \begin{table}
2227: % \begin{center}
2228: % \vspace{1ex}
2229: 
2230: % \begin{tabular}{|c|c|c|c|}\hline
2231: %   \rule[-4mm]{0cm}{1cm}lines & $\Delta_\pi$ & $\Xi_\pi$ & mark \\\hline
2232: %   \rule[-4mm]{0cm}{1cm}$\Yup$ & $\rho_{SL}\left(1-X_{T-\beta}\right)$ & $\rho_{SL}$  & $SL$ or $SLR$\\
2233: %   \rule[-4mm]{0cm}{1cm}$\Yri$ & $\rho_{LR}\left(1-X_{T-\beta}\right)$ &$\rho_{LR}$ & $LR$\\
2234: %   \rule[-4mm]{0cm}{1cm}$\Ybottom$ & $\rho_{LR} X_{T-\beta}$& $\rho_{LR}$ &$SLR$ \\\hline
2235: % \end{tabular}
2236: % \end{center}
2237: % \caption {\label{tab2} Events which would lead to the separation of the two loci $j, k$ with their rates before and after the time transformation as well as the corresponding marks the Poisson processes at the rescaled rates would cause on the Yule sample tree $\mathcal{Y}$ }
2238: % \end{table}
2239: % The equivalence of the partition one gets from the process $\Delta_{\pi}$ to that defined by the equivalence relation \eqref{eq:equivGeoi} is immediately clear from Table \ref{tab2}. \\ 
2240:   
2241:   
2242: % \begin{tt}
2243: %  xxx  \begin{itemize} \item Is this what was necessary for the conclusion? Should I add more details? \item
2244: %  The probabilities for splits in B, which are realized at first, are only approximately equal, aren't they? In case of def. 5.3. they are realized in Yule time, this should again cause an error in probability of the right order ... Should this be mentioned?  Or is it clear from the references that they are approximately equal?\item
2245: %   maybe make a  figure?
2246: %   \end{itemize}
2247: %   \end{tt}
2248: \end{proof}
2249: 
2250: We conclude the proof of Theorem \ref{T} by showing that $\Xi_\pi$
2251: from Definition \ref{def:4} and $\Upsilon_\pi$ from Definition
2252: \ref{def:2} are close in variation distance. 
2253: 
2254: \begin{proposition}\label{PropThird}
2255:   Let $\pi\in\mathcal P'_{\dickm \ell\cup\dickm r}$ and $\Xi_\pi$ and
2256:   $\Upsilon_\pi$ be as in Definitions \ref{def:4} and \ref{def:2}.
2257:   Then,
2258:  \[ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}} 
2259:   \big|\mathbb P[\Xi_\pi = \xi] - \mathbb P[\Upsilon_\pi=\xi] \big| =
2260:   \mathcal O\left( \frac{1}{(\log\alpha)^2}\right). \]
2261: \end{proposition}
2262: 
2263: \begin{proof}
2264:   We will only consider geometry (i). The proof for geometry (ii) is
2265:   analogous. \\
2266:   
2267:   After realizing the splits in the beneficial background first
2268:   according to the probabilities given in \eqref{Split} and
2269:   \eqref{eq:Y2}, respectively, $\Xi_\pi$ and $\Upsilon_\pi$ are
2270:   determined by the same equivalence relations \eqref{eq:equivGeoi}
2271:   using the marks which hit the tree according to Definition
2272:   \ref{def:4} and Table \ref{tab:marks}. Hence our proof consists of
2273:   two steps. First, we show that the probabilities given in
2274:   \eqref{Split} and \eqref{eq:Y2} differ only by
2275:   $\mathcal{O}\left((\log \alpha)^{-2} \right)$. Second, we show that
2276:   the error caused by generating the $SL$-, $LR$- and $SLR$-marks
2277:   using \eqref{eq:Y3} instead of Definitions \ref{def:4} is
2278:   $\mathcal{O}\left((\log \alpha)^{-2} \right)$.
2279: 
2280:   Both assertions rely on the same calculation. Assume a line in the
2281:   Yule tree starts when the full Yule tree has $i_1$ lines for the
2282:   last time and ends when the full Yule tree has $i_2>i_1$ lines for the
2283:   last time.  Additionally, the line is hit by a Poisson process with
2284:   rate $\rho = \gamma\frac{\alpha}{\log\alpha}$. The probability that
2285:   the line is not hit by the Poisson process during the time the Yule
2286:   process has $i$ lines, $i_1 < i\leq i_2$, is
2287:   \[
2288:   \frac{i \alpha}{i\alpha + \rho}
2289:   \]
2290:   because of competing exponential clocks. Analogously, the
2291:   probability that the whole line is not hit, is, by a Taylor
2292:   approximation,
2293:   \begin{equation}\label{eq:rec5}\begin{aligned}
2294:       \prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho} &= \exp\left(
2295:         \sum_{i=i_1+1}^{i_2} \log \left( 1 -
2296:           \frac{\rho}{i\alpha+\rho}\right) \right) \\& = \exp\left(
2297:         -\frac{\gamma}{\log\alpha} \sum_{i=i_1+1}^{i_2}
2298:         \frac{1}{i+\rho/\alpha}\right) + \mathcal
2299:       O\left(\frac{1}{(\log\alpha)^2}\right) \\ & = \exp\left(
2300:         -\frac{\gamma}{\log\alpha} \sum_{i=i_1+1}^{i_2}
2301:         \frac{1}{i}\right)+ \mathcal
2302:       O\left(\frac{1}{(\log\alpha)^2}\right)= p_{i_1}^{i_2}(\gamma)+
2303:       \mathcal O\left(\frac{1}{(\log\alpha)^2}\right),\end{aligned}
2304:   \end{equation}
2305:   since the neglected terms in the Taylor
2306:   series are of order $\mathcal O \big( \rho^{2}/\alpha^2\big)=
2307:   \mathcal O\big((\log \alpha)^{-2}\big)$ and higher.
2308: 
2309:   To prove that \eqref{Split} and \eqref{eq:Y2} coincide
2310:   approximately, observe that 
2311:  \[ \mathbb E\left[ \exp\left( - \rho\cdot \int_0^T X_s ds \right)\right] = 
2312:   \mathbb E\left[ \exp\left( - \rho\cdot \int_0^T (1-X_s) ds \right)\right]
2313:   \]
2314:   by the time-reversibility of $\mathcal X$. Additionally, the right
2315:   hand side gives the probability that a Poisson process with rate
2316:   $\rho(1-X)$ does not hit a line by time $T$. By the random time
2317:   change $d\tau = (1-X_t)dt$ this is approximately the same as the
2318:   probability that a Poisson process with rate $\rho$ does not hit one
2319:   line in a Yule tree until it has $\lfloor 2\alpha\rfloor$ lines and
2320:   is hence given by $p_{0}^{\lfloor 2\alpha \rfloor}(\gamma)$.
2321: 
2322: %   We start by picking a partition element from the original partition
2323: %   $\pi$. For the moment forget about realizing the split before any of
2324: %   the other events. Then the left and right loci in this partition
2325: %   element would be separated at rate $\rho_{LR}X$.  Because of the
2326: %   time reversibility of the sample path $\mathcal{X}$ this rate is
2327: %   equivalent to $\rho_{LR}(1-X)$.  Thus the random time change
2328: %   \eqref{eq:timechange} would also result in a constant rate
2329: %   $\rho_{LR}$. Now consider a process at this rate. Note that for the
2330: %   moment the marks we are looking at are none of those mentioned in
2331: %   Definition \ref{def:4} or Definition \ref{def:2}, but indicating
2332: %   splits in the beneficial background. Afterwards we are returning to
2333: %   realizing these splits beforehand. Realizing events according to the
2334: %   given rate along the tree in Yule time scale instead of the true
2335: %   time scale of Definition \ref{def:4} produces an error of order
2336: %   $\mathcal{O}\left((\log \alpha)^{-2} \right)$ as was shown in the
2337: %   proof of Prop. 4.7 of \cite{EtheridgePfaffelhuberWakolbinger2006}.
2338: %   We start doing so by considering a branch in the Yule tree
2339: %   $\mathcal{Y}$ between the time when there are $i_{1}$ branches on
2340: %   the whole just until there are $i_{1}+1$ branches (see Figure
2341: %   \ref{Sample} for an example of counting in Yule time).
2342: % \begin{figure}
2343: % \begin{center} 
2344: % \includegraphics[width=3cm]{YuleSmall.ps}
2345: % \end{center}
2346: % \caption{\label{Sample} Example for Yule times}
2347: % \end{figure}
2348: % Two events may occur on the branch we have picked. It may be marked or one of the branches may be divided where these events are determined by independent Poisson processes at rate $(i_{1}+1) \alpha$ and $\rho_{LR}$. No mark will fall on the chosen branch if the Poisson process at rate $(i_{1}+1) \alpha$ jumps first. The probability for this to happen is equal to
2349: % \[
2350: % \frac{(i_{1}+1) \alpha}{(i_{1}+1)\alpha + \rho_{LR}}
2351: % \]
2352: % Hence the probability that no mark falls on a branch between the times $0$ and $\lfloor 2\alpha \rfloor$ is equal to
2353: % \[
2354: % \prod\limits_{i=2}^{\lfloor 2 \alpha \rfloor} \frac{i \alpha}{i\alpha + \rho_{LR}}
2355: % \]
2356: % By Taylor approximation we further get
2357: % \begin{equation}\label{eq:rec5}\begin{aligned} 
2358: % \prod_{i=2}^{\lfloor 2 \alpha \rfloor} 
2359: %     \frac{i\alpha}{i\alpha+\rho} &= \exp\left( \sum_{i=2}^{\lfloor 2 \alpha \rfloor} \log
2360: %     \left( 1 - \frac{\rho}{i\alpha+\rho}\right) \right) \\& 
2361: %     \approx \exp\left( -\frac \rho\alpha \sum_{i=2}^{\lfloor 2 \alpha \rfloor}
2362: %     \frac{1}{i+\rho/\alpha}\right) \approx \exp\left( -\frac \rho\alpha
2363: %     \sum_{i=2}^{\lfloor 2 \alpha \rfloor} \frac{1}{i}\right),\end{aligned}
2364: %  \end{equation}
2365: %  where the error we are making is of order $\mathcal{O}\left((\log\alpha)^{-2}\right)$ since the neglected terms in the Taylor series are of order $\rho_{LR}^{2}/\alpha= (\log \alpha)^{-2}$ and higher. It follows that the probability for the left and right loci to get separated while they are in the beneficial background according to Definition \ref{def:4} equals
2366: %  \[
2367: %  1-\exp\left(-\rho_{LR}\int_{0}^{T}X_{s}ds \right)=1- p_{1}^{\lfloor 2\alpha \rfloor}(\gamma_{LR}) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}} \right) 
2368: %  \]
2369: %  where the error term is due to the change from true time to Yule time as well as the approximative calculations above. Since the left hand size equals the probability defined by \eqref{eq:Y2}, we see that by returning to realizing the splits in the beneficial background beforehand to get the partition $\pi'$ according to \eqref{Split} and \eqref{eq:Y2}, we are only producing an error of order $\mathcal{O}\left((\log \alpha)^{-2}\right)$ on the whole.
2370: 
2371:   Next, we are considering the generation of the $SL$-, $LR$- and
2372:   $SLR$-marks along the Yule tree. The probability that more than one
2373:   event with rate $\rho_{SL}$ and $\rho_{LR}$ hits the Yule tree
2374:   during the time it has $i$ lines is
2375:   \[ \frac{\rho^2}{(i\alpha + \rho)^2} = \mathcal O\left(
2376:     \frac{1}{(\log\alpha)^2}\right). \] Hence we can ignore this
2377:   event. Together with the Markov property of the Poisson process we
2378:   see that the marks on different lines in a sample tree may be
2379:   generated independently once the topology and the total number of
2380:   lines in the full Yule tree is known.
2381: 
2382: Consider a branch which starts when the full Yule tree has $i_1$ lines
2383: and ends when it has $i_2$ lines. Using Definition \ref{def:4} this
2384: line is hit by an $SL$-mark iff it is hit by the Poisson process at
2385: rate $\rho_{SL}$ and an independent Poisson process with rate
2386: $\rho_{LR}$ produces no mark between time $0$ and the time the Yule
2387: tree has $i_2$ lines. Hence the probability for an $SL$-mark in
2388: $\Xi_\pi$ is approximately given by
2389: \begin{eqnarray}\begin{aligned} \nonumber
2390:     &\left( 1 - \prod_{i=i_1+1}^{i_2}
2391:       \frac{i\alpha}{i\alpha+\rho_{SL}}\right)\left( \prod_{i=1}^{i_2}
2392:       \frac{i\alpha}{i\alpha+\rho_{LR}}\right) =
2393:     \big(1-p_{i_{1}}^{i_{2}}(\gamma_{SL})\big)
2394:     p_{0}^{i_{2}}(\gamma_{LR}) + \mathcal{O}\left(\frac{1}{(\log
2395:         \alpha)^{2}}\right)
2396:   % & \qquad \qquad \qquad = \left( 1 - \exp\left( -\frac {\rho_{SL}}\alpha
2397:    % \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\right) \exp\left( -\frac {\rho_{LR}}\alpha
2398:    % \sum_{i=1}^{i_2} \frac{1}{i}\right) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)
2399: \end{aligned}\end{eqnarray}
2400: If a branch is hit by the Poisson process with rate $\rho_{SL}$ but
2401: did not obtain an $SL$-mark, it obtains an $SLR$-mark. Hence the
2402: probability for such a mark is given by
2403: \begin{eqnarray}\begin{aligned} \nonumber
2404:     & \left( 1 - \prod_{i=i_1+1}^{i_2}
2405:       \frac{i\alpha}{i\alpha+\rho_{SL}}\right)\left( 1
2406:       -\prod_{i=1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{LR}}\right) =
2407:     \left(1-p_{i_{1}}^{i_{2}}(\gamma_{SL}\right)\left(1-
2408:       p_{0}^{i_{2}}(\gamma_{LR})\right) +
2409:     \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)
2410: %   & \qquad \qquad    = \left( 1 - \exp\left( -\frac {\rho_{SL}}\alpha
2411:  %   \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\right) \left( 1 - \exp\left( -\frac {\rho_{LR}}\alpha
2412:   %  \sum_{i=1}^{i_2} \frac{1}{i}\right)\right)+ \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)
2413: \end{aligned}\end{eqnarray}
2414: The branch is hit by an $LR$-mark if it is hit by the Poisson process
2415: at rate $\rho_{LR}$ but not by the Poisson process with rate
2416: $\rho_{SL}$. Hence the probability for an $LR$-mark is
2417: \begin{eqnarray}\begin{aligned} \nonumber
2418:     &\prod_{i=i_1+1}^{i_2}
2419:     \frac{i\alpha}{i\alpha+\rho_{SL}}\left( 1 -\prod_{i=i_1+1}^{i_2}
2420:     \frac{i\alpha}{i\alpha+\rho_{LR}}\right)
2421:      = p_{i_{1}}^{i_{2}}(\gamma_{SL})\left(1- p_{i_{1}}^{i_{2}}(\gamma_{LR})\right) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)
2422:  %    & \qquad \qquad  = \exp\left( -\frac {\rho_{SL}}\alpha
2423:   %  \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\left( 1 - \exp\left( -\frac {\rho_{LR}}\alpha
2424:   %  \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\right)+ \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)
2425: \end{aligned}\end{eqnarray}
2426: % \item no mark
2427: % \begin{eqnarray}\begin{aligned} \nonumber
2428: % &\prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{SL}+ \rho_{LR}} 
2429: % %\approx
2430: % %\prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{SL}} 
2431: % %\prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{LR}} \\
2432: %  = p_{i_{1}+1}^{i_{2}}(\gamma_{SL}) \; p_{i_{1}+1}^{i_{2}}(\gamma_{LR}) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)
2433: % % = \exp\left( -\frac {\rho_{SL}}\alpha
2434: %  %   \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\exp\left( -\frac {\rho_{LR}}\alpha
2435: %  %   \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)
2436: % \end{aligned}\end{eqnarray}
2437: % where we apply \eqref{eq:rec5} with $\rho= \rho_{SL}+ \rho_{LR}$ and after splitting the summation get the product in the last equation. 
2438: 
2439: As a consequence, the marks in $\mathbf Y_{|\pi'|}$ and
2440: $\mathcal{Y}_{|\pi'|}$ coincide approximately (cf. Table
2441: \ref{tab:marks}) and we are done.
2442:  \end{proof}
2443: 
2444: \subsubsection*{Acknowledgement}
2445: We thank Bernhard Haubold and Joachim Hermisson for comments on the
2446: manuscript and Anton Wakolbinger and Franz Merkl for fruitful
2447: discussion. We are grateful to Andy Lehnert, not only for help with
2448: Figure \ref{sim}.
2449: 
2450: \newcommand{\etalchar}[1]{$^{#1}$}
2451: \begin{thebibliography}{NWK{\etalchar{+}}05}
2452: 
2453: \bibitem[Bar98]{Barton1998}
2454: N.~Barton.
2455: \newblock The effect of hitch-hiking on neutral genealogies.
2456: \newblock {\em Gen. Res.}, 72:123--133, 1998.
2457: 
2458: \bibitem[Con05]{hapmap2005}
2459: International~HapMap Consortium.
2460: \newblock {{A} haplotype map of the human genome}.
2461: \newblock {\em Nature}, 437(7063):1299--1320, 2005.
2462: 
2463: \bibitem[EO94]{EvansOConnell1994}
2464: S.N. Evans and N.~O'Connell.
2465: \newblock Weighted occupation time for branching particle systems and a
2466:   representation for the supercritical superprocess.
2467: \newblock {\em Canad. Math. Bull.}, 37(2):187--196, 1994.
2468: 
2469: \bibitem[EPW06]{EtheridgePfaffelhuberWakolbinger2006}
2470: A.~Etheridge, P.~Pfaffelhuber, and A.~Wakolbinger.
2471: \newblock An approximate sampling formula under genetic hitchhiking.
2472: \newblock {\em Ann. Appl. Probab.}, 15:685--729, 2006.
2473: 
2474: \bibitem[Ewe04]{Ewens2004}
2475: W.J. Ewens.
2476: \newblock {\em Mathematical {P}opulation {G}enetics. I. Theoretical
2477:   introduction. Second edition}.
2478: \newblock Springer, 2004.
2479: 
2480: \bibitem[FW00]{FayWu2000}
2481: J.C. Fay and C.-I. Wu.
2482: \newblock Hitchhiking under positive darwinian selection.
2483: \newblock {\em Genetics}, 155:1405--1413, 2000.
2484: 
2485: \bibitem[GM97]{GriffithsMarjoram1997}
2486: R.C. Griffiths and P.~Marjoram.
2487: \newblock An ancestral recombination graph.
2488: \newblock In {\em Progress in Population Genetics and Human Evolution, IMA
2489:   volumes in Mathematics and its Applications, 87. Springer Verlag, Berlin},
2490:   pages 257--270, 1997.
2491: 
2492: \bibitem[Gri03]{Griffiths2003}
2493: R.C. Griffiths.
2494: \newblock The frequency spectrum of a mutation and its age, in a general
2495:   diffusion model.
2496: \newblock {\em Theo. Pop. Biol.}, 64(2):241--251, 2003.
2497: 
2498: \bibitem[Hud83]{Hudson1983}
2499: R.R. Hudson.
2500: \newblock Properties of a neutral allele model with intragenic recombination.
2501: \newblock {\em Theo. Pop. Biol.}, 23:183--201, 1983.
2502: 
2503: \bibitem[KDH88]{KaplanDardenHudson1988}
2504: N.L. Kaplan, T.~Darden, and R.R. Hudson.
2505: \newblock The {C}oalescent {P}rocess in {M}odels with {S}election.
2506: \newblock {\em Genetics}, 120:819--829, 1988.
2507: 
2508: \bibitem[KHL89]{KaplanHudsonLangley1989}
2509: N.L. Kaplan, R.R. Hudson, and C.H. Langley.
2510: \newblock The '{H}itchhiking effect' revisited.
2511: \newblock {\em Genetics}, 123:887--899, 1989.
2512: 
2513: \bibitem[KS02]{KimStephan2002}
2514: Y.~Kim and W.~Stephan.
2515: \newblock Detecting a local signature of genetic hitchhiking along a
2516:   recombining chromosome.
2517: \newblock {\em Genetics}, 160:765--777, 2002.
2518: 
2519: \bibitem[KT81]{KarlinTaylor1981}
2520: S.~Karlin and H.M. Taylor.
2521: \newblock {\em A second course in stochastic processes}.
2522: \newblock Academic Press London, 1981.
2523: 
2524: \bibitem[Lew64]{Lewontin1964}
2525: R.C. Lewontin.
2526: \newblock {The interaction of selection and linkage. I. General considerations;
2527:   Heterotic models}.
2528: \newblock {\em Genetics}, 49:49--67, 1964.
2529: 
2530: \bibitem[LS05]{LiStephan2005}
2531: H.~Li and W.~Stephan.
2532: \newblock Maximum-likelihood methods for detecting recent positive selection
2533:   and localizing the selected site in a genome.
2534: \newblock {\em Genetics}, 171:377--384, 2005.
2535: 
2536: \bibitem[LSP06]{LehnertStephanPfaffelhuber2006}
2537: A.~Lehnert, W.~Stephan, and P.~Pfaffelhuber.
2538: \newblock A stochastic analysis of linkage disequilibrium under selective
2539:   sweeps.
2540: \newblock {\em submitted}, 2006.
2541: 
2542: \bibitem[MSH74]{MaynardSmithHaigh1974}
2543: J.~Maynard~Smith and J.~Haigh.
2544: \newblock The hitch-hiking effect of a favorable gene.
2545: \newblock {\em Gen. Res.}, 23:23--35, 1974.
2546: 
2547: \bibitem[Nur05]{Nurminsky2005}
2548: D.~Nurminsky.
2549: \newblock {\em Selective Sweep}.
2550: \newblock Kluwer, 2005.
2551: 
2552: \bibitem[NWK{\etalchar{+}}05]{NielsenEtAl2005}
2553: R.~Nielsen, S.~Williamson, Y.~Kim, M.J. Hubisz, A.G. Clark, and C.~Bustamante.
2554: \newblock {{G}enomic scans for selective sweeps using {S}{N}{P} data}.
2555: \newblock {\em Genome Res.}, 15(11):1566--1575, 2005.
2556: 
2557: \bibitem[O'C93]{OConnell1993}
2558: N.~O'Connell.
2559: \newblock Yule {P}rocess {A}pproximaion for the {S}keleton of a {B}ranching
2560:   {P}rocess.
2561: \newblock {\em J. Appl. Prob.}, 30:725--729, 1993.
2562: 
2563: \bibitem[PHW06]{PfaffelhuberHauboldWakolbinger2006}
2564: P.~Pfaffelhuber, B.~Haubold, and A.~Wakolbinger.
2565: \newblock Approximate genealogies under genetic hitchhiking.
2566: \newblock {\em Genetics}, to appear, 2006.
2567: 
2568: \bibitem[RT06]{ReedTishkoff2006}
2569: F.A. Reed and S.A. Tishkoff.
2570: \newblock {{P}ositive selection can create false hotspots of recombination}.
2571: \newblock {\em Genetics}, 172(3):2011--2014, 2006.
2572: 
2573: \bibitem[SD05]{SchweinsbergDurrett2005}
2574: J.~Schweinsberg and R.~Durrett.
2575: \newblock Random partitions approximating the coalescence of lineages during a
2576:   selective sweep.
2577: \newblock {\em Ann. Appl. Probab.}, 15:1591--1651, 2005.
2578: 
2579: \bibitem[SSL06]{StephanSongLangley2006}
2580: W.~Stephan, Y.~Song, and C.~Langley.
2581: \newblock The hitchhiking effect on linkage disequilibrium between linked
2582:   neutral loci.
2583: \newblock {\em Genetics}, 172:2647--2663, 2006.
2584: 
2585: \bibitem[STW84]{Saundersetal1984}
2586: I.W. Saunders, S.~Tavar\'e, and G.A. Watterson.
2587: \newblock On the genealogy of nested subsamples from a haploid population.
2588: \newblock {\em Adv. Appl. Probab.}, 16:471--491, 1984.
2589: 
2590: \bibitem[SWL92]{StephanWieheLenz1992}
2591: W.~Stephan, T.~Wiehe, and M.~Lenz.
2592: \newblock The effect of strongly selected substitutions on neutral
2593:   polymorphism: analytical results based on diffusion theory.
2594: \newblock {\em Theo. Pop. Biol.}, 41:237--254, 1992.
2595: 
2596: \end{thebibliography}
2597: 
2598: %\bibliography{PS}
2599: %\bibliographystyle{alpha}
2600: 
2601: 
2602: \end{document}
2603: