0611:q-bio0611029/PS.tex

1: \documentclass[11pt]{article}

2: \usepackage{latexsym, a4wide}

3: \usepackage{amsmath, rotating, color}

4: \usepackage{amsfonts,amssymb, amsthm}

5: %\usepackage{etex}

6: \usepackage{pictexwd}

7: %\usepackage[notcite,notref]{showkeys} % shows labels

8: %\textheight229mm

9: %\textwidth140mm

10: % max. 256 mm

11:

12: \newcommand{\dickm}[1]{\text{\boldmath ${#1}$}}

13: \newcommand{\sO}{\mathcal{O}}

14: \newcommand{\so}{\text{\scriptsize$\mathcal{O}$}}

15:

16: \newtheorem{theorem}{Theorem}

17: \newtheorem{proposition}{Proposition}[section]

18: \newtheorem{lemma}[proposition]{Lemma}

19: \newtheorem{corollary}[proposition]{Corollary}

20: \newtheorem{definition}[proposition]{Definition}

21: \theoremstyle{definition}

22: \newtheorem{remark}[proposition]{Remark}

23: \newtheorem{example}[proposition]{Example}

24: \newtheoremstyle{step}{3pt}{0pt}{}{}{\bf}{}{.5em}{}

25: \theoremstyle{step} \newtheorem{step}{Step}

26: \newcommand{\bs}[2]{\setcounter{step}{#1}{\addtocounter{step}{-1}}\begin{step}(#2)\end{step}}

27: \newtheorem{case}{Case}

28: \pagestyle{headings}

29: \numberwithin{equation}{section}

30:

31: \setcounter{secnumdepth}{2}

32: \newcommand{\ml}[1]{\mbox{}\marginpar{\raggedleft\hspace{0cm}#1}}

33: \setcounter{tocdepth}{2}

34: %\renewcommand{\thefootnote}{\alph{footnote}}

35: \newcommand\unnumberedfootnote[1]{ %

36:         \let\temp=\thefootnote %

37:         \renewcommand{\thefootnote}{}%

38:         \footnote{#1}%

39:         \let\thefootnote=\temp%

40:         \addtocounter{footnote}{-1}}

41:

42: \newcommand{\ij}[2]{#1\text{{\bf ---\!\!---}}#2}

43:

44:

45: %\newcommand{\ij}[2]{\text{\parbox{1cm}{\beginpicture

46: %\setcoordinatesystem units <.5cm,.5cm>

47: %\setplotarea x from 0 to 1, y from 0.2 to 1.1

48: %\plot 0.5 0.5 1.5 0.5 /

49: %\put{$#1$} [rC] at 0.2 0.5

50: %\put{$#2$} [lC] at 1.8 0.5 \endpicture}}}

51:

52: \newcommand{\fjk}{\text{\parbox{2cm}{\beginpicture

53: \setcoordinatesystem units <.5cm,0.5cm>

54: \setplotarea x from 0 to 2, y from 0 to 2

55: \plot 1 0.5 1 1 0.5 1.5 1 1 1.5 1.5 /

56: \put{$\bullet$} [cC] at 1 0.5

57: \put{$\pi'_{(j)}$} [cC] at 0.1 2

58: \put{$\pi'_{(k)}$} [cC] at 1.8 2 \endpicture}}}

59:

60: \newcommand{\ljk}[2]{\text{\parbox{2cm}{\beginpicture

61: \setcoordinatesystem units <1mm,1mm>

62: \setplotarea x from 5 to 15, y from 0 to 15

63: \plot 5 5 2.5 7.5 5 5 7.5 7.5 /

64: %\multiput{\tiny $\bullet$} at 5 5 *5  1 1/

65: \put{$#1$}[rC] at 2 8

66: \put{$#2$}[lC] at 7 8 \endpicture}}}

67:

68: \newcommand{\lk}[1]{\text{\parbox{2cm}{\beginpicture

69: \setcoordinatesystem units <.5cm,.5cm>

70: \setplotarea x from 1 to 2, y from 0 to 2

71: \plot 1 0.5 1 1 1.5 1.5 /

72: \put{$\bullet$}[cC] at 1 0.5

73: \put{$#1$}[lC] at 1.7 1.8

74:  \endpicture}}}

75:

76: \newcommand{\one}[1]{\text{\parbox{1cm}{\beginpicture

77: \setcoordinatesystem units <.1cm,.1cm>

78: \setplotarea x from 0 to 6, y from 3 to 13

79: \plot 3 3 3 8 /

80: \put{\footnotesize#1} [cC] at 3 11

81: \multiput{\tiny $\bullet$} at 3 3 *50  0 0.1 /

82: \endpicture}}}

83:

84: \newcommand{\Y}{\text{\parbox{1.5cm}{

85: \beginpicture

86: \setcoordinatesystem units <0.1cm, 0.1cm>

87: \setplotarea x from 0 to 20, y from 3 to 17

88: \plot 10 6 10 10 7 13 10 10 13 13 /

89: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

90: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

91: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

92: %\put{$\bullet$}[Cc]  at 10 6

93: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

94: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

95: \endpicture}}}

96:

97: \newcommand{\Yup}{\text{\parbox{1.5cm}{

98: \beginpicture

99: \setcoordinatesystem units <0.1cm, 0.1cm>

100: \setplotarea x from 0 to 20, y from 3 to 17

101: \plot 10 6 10 10 7 13 10 10 13 13 /

102: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

103: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

104: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

105: %\put{$\bullet$}[Cc]  at 10 6

106: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

107: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

108: \endpicture}}}

109:

110: \newcommand{\Yri}{\text{\parbox{1.5cm}{

111: \beginpicture

112: \setcoordinatesystem units <0.1cm, 0.1cm>

113: \setplotarea x from 0 to 20, y from 3 to 17

114: \plot 10 6 10 10 7 13 10 10 13 13 /

115: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

116: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

117: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

118: %\put{$\bullet$}[Cc]  at 10 6

119: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

120: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

121: \endpicture}}}

122:

123: \newcommand{\Yle}{\text{\parbox{1.5cm}{

124: \beginpicture

125: \setcoordinatesystem units <0.1cm, 0.1cm>

126: \setplotarea x from 0 to 20, y from 3 to 17

127: \plot 10 6 10 10 7 13 10 10 13 13 /

128: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

129: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

130: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

131: %\put{$\bullet$}[Cc]  at 10 6

132: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

133: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

134: \endpicture}}}

135:

136: \newcommand{\Ybottom}{\text{\parbox{1.5cm}{

137: \beginpicture

138: \setcoordinatesystem units <0.1cm, 0.1cm>

139: \setplotarea x from 0 to 20, y from 3 to 17

140: \plot 10 6 10 10 7 13 10 10 13 13 /

141: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

142: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

143: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

144: %\put{$\bullet$}[Cc]  at 10 6

145: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

146: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

147: \endpicture}}}

148:

149: \newcommand{\Yupri}{\text{\parbox{1.5cm}{

150: \beginpicture

151: \setcoordinatesystem units <0.1cm, 0.1cm>

152: \setplotarea x from 0 to 20, y from 3 to 17

153: \plot 10 6 10 10 7 13 10 10 13 13 /

154: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

155: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

156: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

157: %\put{$\bullet$}[Cc]  at 10 6

158: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

159: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

160: \endpicture}}}

161:

162: \newcommand{\Yuple}{\text{\parbox{1.5cm}{

163: \beginpicture

164: \setcoordinatesystem units <0.1cm, 0.1cm>

165: \setplotarea x from 0 to 20, y from 3 to 17

166: \plot 10 6 10 10 7 13 10 10 13 13 /

167: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

168: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

169: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

170: %\put{$\bullet$}[Cc]  at 10 6

171: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

172: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

173: \endpicture}}}

174:

175: \newcommand{\Yall}{\text{\parbox{1.5cm}{

176: \beginpicture

177: \setcoordinatesystem units <0.1cm, 0.1cm>

178: \setplotarea x from 0 to 20, y from 3 to 17

179: \plot 10 6 10 10 7 13 10 10 13 13 /

180: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

181: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

182: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

183: %\put{$\bullet$}[Cc]  at 10 6

184: \put{\footnotesize$\pi'_{(j)}$} [cC] at 4.5 15

185: \put{\footnotesize$\pi'_{(k)}$} [cC] at 15 15

186: \endpicture}}}

187:

188:

189:

190:

191: \begin{document}

192: \title{\LARGE Approximating genealogies for partially linked neutral

193:   loci under a selective sweep}

194: %\runtitle{Partially linked loci under a selective sweep}

195:

196: \thispagestyle{empty}

197:

198: \author{{\sc by P. Pfaffelhuber\thanks{Corresponding author; Tel: (+49)-89-74-2180-108; email: p.p@lmu.de} and A. Studeny} \\[2ex]

199:   \emph{Ludwig-Maximilian University Munich} \vspace*{-7ex}} \date{}

200:

201: \maketitle

202: \unnumberedfootnote{\emph{AMS 2000 subject classification.} 92D15

203:   (Primary), 60J80, 60J85, 60K37, 92D10 (Secondary).}

204:

205: \unnumberedfootnote{\emph{Keywords and phrases.} Selective sweep,

206:   genetic hitchhiking, diffusion approximation, Yule process,

207:   ancestral recombination graph, random background}

208:

209:

210: \begin{abstract}

211: \noindent

212: Consider a genetic locus carrying a strongly beneficial allele which

213: has recently fixed in a large population. As strongly beneficial

214: alleles fix quickly, sequence diversity at partially linked neutral

215: loci is reduced. This phenomenon is known as a \emph{selective sweep}.

216:

217: The fixation of the beneficial allele not only affects sequence

218: diversity at single neutral loci but also the joint allele

219: distribution of several partially linked neutral loci. This

220: distribution can be studied using the ancestral recombination graph

221: for samples of partially linked neutral loci during the selective

222: sweep. To approximate this graph, we extend recent work by

223: \cite{SchweinsbergDurrett2005, EtheridgePfaffelhuberWakolbinger2006}

224: using a marked Yule tree for the genealogy at a single neutral locus

225: linked to a strongly beneficial one.

226:

227: We focus on joint genealogies at two partially linked neutral loci in

228: the case of large selection coefficients $\alpha$ and recombination

229: rates $\rho=\mathcal O(\alpha/\log\alpha)$ between loci.  Our approach

230: leads to a full description of the genealogy with accuracy of

231: $\mathcal O((\log \alpha)^{-2})$ in probability. As an application, we

232: derive the expectation of Lewontin's $D$ as a measure for non-random

233: association of alleles.

234: \end{abstract}

235:

236: \section{Introduction}

237: The model of \emph{selective sweeps}, also known as \emph{genetic

238:   hitchhiking}, introduced by Maynard-Smith and Haigh in

239: \cite{MaynardSmithHaigh1974}, is the starting point for a large body

240: of both empirical and theoretical population genetic studies

241: (\cite{Nurminsky2005}). It predicts that sequence diversity is reduced

242: close to a strongly selected locus on a recombining genome near the

243: time of fixation of the beneficial allele. Theoretical studies aim at

244: describing these patterns of genetic diversity in detail while

245: empirical work uses this prediction to identify genes under selection.

246:

247: If a species or a population adapts to its environment, several genes

248: might be under strong selection. Moreover, if the function of genes

249: were known, we would have predictions as to which genes are

250: responsible for the adaptive process. Unfortunately, functional

251: information is scarce. Without functional knowledge and in the

252: presence of recombination, the model of selective sweeps helps to

253: identify candidate genes affected by recent selective pressures.

254: Genome scans are carried out for a sample of individuals, which show

255: patterns of sequence diversity at lots of marker loci in the whole

256: genome (\cite{NielsenEtAl2005}). If a marker shows low diversity,

257: statistical tests help to decide if a gene under selection is located

258: nearby (\cite{KimStephan2002, LiStephan2005}).

259:

260: Most theoretical studies of selective sweeps have focused on a model

261: with one selective and one partially linked neutral locus

262: (\cite{MaynardSmithHaigh1974, StephanWieheLenz1992,

263:   KaplanHudsonLangley1989, Barton1998, SchweinsbergDurrett2005,

264:   EtheridgePfaffelhuberWakolbinger2006}). This simple model already

265: describes the reduction in sequence diversity. However genetic data

266: are frequently available for many partially linked loci. This raises

267: the question of whether selective sweeps also generate distinct

268: patterns of multi-locus allele frequencies. We will follow

269: \cite{StephanSongLangley2006} and study a three locus model with one

270: selective and two partially linked neutral loci. Using this model, it

271: is possible to study the non-random association of allelic types at

272: the two neutral loci, which is usually called \emph{linkage

273:   disequilibrium}.

274:

275: % \smallskip

276:

277: % Since the invention of the hitchhiking model by

278: % \cite{MaynardSmithHaigh1974} several techniques were developed to

279: % study selective sweeps. Most influential for simulations was the

280: % approach of \cite{KaplanHudsonLangley1989} who used a structured

281: % coalescent for the ancestry of the neutral locus. Here, the two

282: % 'structures' are given by individuals carrying the beneficial and the

283: % wild-type, respectively. In this model, migration between the

284: % structures happens due to recombination events between the selected

285: % and the neutral locus (see also \cite{BravermanEtAl1995,

286: %   KimStephan2002}). As selection is assumed to be strong, the

287: % frequency of the selected allele can be assumed to be deterministic.

288: % Different from this analysis are diffusion models, which have been

289: % used in both the two locus model (\cite{StephanWieheLenz1992}) as well

290: % as the three locus model (\cite{StephanSongLangley2006}).

291:

292: An influential idea in the analysis of selective sweeps was to study

293: approximate genealogies describing relationships between the

294: individuals in a sample from the population. Studying genealogies at

295: the selected site started with \cite{KaplanDardenHudson1988} and was

296: carried further to linked neutral loci in

297: \cite{KaplanHudsonLangley1989}.

298:

299: The genealogy at a single neutral locus can be constructed as a

300: \emph{structured coalescent}.  Here, the beneficial and wild-type

301: allele at the selected locus form two subpopulations. Their sizes are

302: determined by the frequency path of the beneficial allele during the

303: selective sweep. Assume a new gamete is built (forward in time) by

304: recombination of a beneficial allele at the selected locus and a

305: neutral variant linked to a wild-type. Following the neutral variant

306: backward in time leads to a migration event from the beneficial to the

307: wild-type background. Therefore, recombination acts as migration

308: between the beneficial and the wild-type backgrounds.

309:

310: Genealogies of two or more loci can be constructed using the ancestral

311: recombination graph (\cite{Hudson1983, GriffithsMarjoram1997}).

312: Therefore, we will construct ancestries of two partially linked

313: neutral loci under a selective sweep by a \emph{structured ancestral

314:   recombination graph}.  As in the case of only one locus, the two

315: subpopulations are distinguished by the beneficial and wild-type

316: allele at the selected locus, respectively. This ancestral

317: recombination graph will serve as the exact model for genealogies at

318: partially linked loci under a selective sweep. However, an exact

319: analysis is hard to obtain, because the graph must be conditioned on

320: the random frequency path of the beneficial allele.

321:

322: An alternative approach uses a two-step procedure for genealogies at

323: the selective and the neutral locus. First, the (approximate) genealogy at

324: the selective locus is generated and second, the genealogy at the

325: neutral locus is added, which might differ due to recombination.  Two

326: approximate genealogies at the selected site have been proposed.

327: First, a star-like genealogy, which means that the most recent common

328: ancestor of all pairs in the population is the individual which

329: carried the beneficial allele first (\cite{SchweinsbergDurrett2005,

330:   NielsenEtAl2005}). Second, a Yule process, i.e., a pure birth

331: process, which allows for coalescences also during the selective sweep

332: (\cite{SchweinsbergDurrett2005,

333:   EtheridgePfaffelhuberWakolbinger2006}). It was shown in

334: \cite[Theorems 1.1, 1.2]{SchweinsbergDurrett2005} that the Yule

335: process approximation is more exact than the star-like approximation.

336: Therefore, we will use this Yule process approximation for the

337: genealogy at the selected site to study the three locus model of

338: \cite{StephanSongLangley2006} for selective sweeps. We will show that

339: the analysis carried out in

340: \cite{EtheridgePfaffelhuberWakolbinger2006} in the two locus case can

341: be extended to the three locus case (Theorem \ref{T}). Moreover, the

342: approximation by a Yule process can be used to calculate

343: characteristics of linkage disequilibrium explicitly (Theorem

344: \ref{T2}).

345:

346: \section{The model}

347: Consider a beneficial allele which enters a population of (haploid)

348: size $N$ at time $t=0$ and has a selective advantage of $s$ with

349: respect to the wild-type allele. Set $\alpha=sN$, which is called the

350: scaled selection coefficient. As selection can only be detected if the

351: beneficial allele fixes in the population, we condition on fixation of

352: the beneficial allele and let $T$ be the (random) time of fixation.

353:

354: \smallskip

355:

356: Assume reproduction in the population follows a Wright-Fisher model,

357: or, more generally, a Cannings model with individual offspring

358: variance 1. In the limit of infinite $N$ and a time rescaling in units

359: of $N$ generations, the frequency path of the beneficial allele is the

360: solution of the SDE

361: \begin{equation}\label{eq:SDE}

362:   dX = \alpha X(1-X)\coth(\alpha X)dt + \sqrt{X(1-X)}dW,

363: \end{equation}

364: with a standard Brownian motion $W$ and $X_0=0$. This diffusion arises

365: as $h$-transform of the process describing the unconditional frequency

366: path with the fixation probability of the beneficial allele as a

367: harmonic function and has $0$ as an entrance boundary. (See e.g.

368: \cite{Griffiths2003}, p. 245 and

369: \cite{EtheridgePfaffelhuberWakolbinger2006}, (2.1).)

370:

371: Two neutral loci are partially linked to the selected locus. For

372: simplicity, we refer to the two neutral loci as the \emph{l}eft and

373: \emph{r}ight neutral locus, denoted by $L$ and $R$. As illustrated in

374: Figure \ref{fig:geom}, the selected locus lies either (i) outside or

375: (ii) in between the neutral loci. All other possible geometries are

376: equivalent to either (i) or (ii) because of the symmetry in the model.

377:

378: Recombination can break up the association of these three loci. (We

379: only consider recombination as simple crossing over. Gene conversion

380: is not considered in our model.) As we take a limiting infinite

381: population and rescale time by a factor of $N$, we have to consider

382: scaled recombination rates. These are different for the two

383: geometries. For geometry (i) we denote the recombination rates between

384: the selective and neutral loci by $\rho_{SL}$, $\rho_{LR}$ and for

385: geometry (ii) by $\rho_{LS}$, $\rho_{SR}$ respectively.

386:

387: \begin{figure}

388: $$

389: (i): \text{

390: \parbox{2cm}{\beginpicture

391: \setcoordinatesystem units <1.5cm,1.3cm>

392: \setplotarea x from 0 to 2, y from 0 to 1

393: \plot 0 0.5 2 0.5 /

394: \plot 0 0.4 0 0.6 /

395: \plot 0.6 0.4 0.6 0.6 /

396: \plot 2 0.4 2 0.6 /

397: \put{$S$} [cC] at 0 0.2

398: \put{$L$} [cC] at 0.6 0.2

399: \put{$R$} [cC] at 2 0.2

400: \plot 0 0.7 0 0.9 /

401: \plot 0.6 0.7 0.6 0.9 /

402: \plot 2 0.7 2 0.9 /

403: \multiput {\tiny $\bullet$} at 0 .5 *200 .01 .0 /

404: \arrow <0.2cm> [0.375,1] from 0 .8 to .6 .8

405: \arrow <0.2cm> [0.375,1] from 0.6 .8 to 0 .8

406: \arrow <0.2cm> [0.375,1] from 2 .8 to .6 .8

407: \arrow <0.2cm> [0.375,1] from 0.6 .8 to 2 .8

408: \put{$\rho_{SL}$} [cC] at .3 1

409: \put{$\rho_{LR}$} [cC] at 1.3 1

410: \endpicture}}

411: \qquad\qquad

412: (ii): \text{

413: \parbox{2cm}{\beginpicture

414: \setcoordinatesystem units <1.5cm,1.3cm>

415: \setplotarea x from 0 to 2, y from 0 to 1

416: \plot 0 0.5 2 0.5 /

417: \plot 0 0.4 0 0.6 /

418: \plot 1.2 0.4 1.2 0.6 /

419: \plot 2 0.4 2 0.6 /

420: \put{$L$} [cC] at 0 0.2

421: \put{$S$} [cC] at 1.2 0.2

422: \put{$R$} [cC] at 2 0.2

423: \plot 0 0.7 0 0.9 /

424: \plot 1.2 0.7 1.2 0.9 /

425: \plot 2 0.7 2 0.9 /

426: \multiput {\tiny $\bullet$} at 0 .5 *200 .01 .0 /

427: \arrow <0.2cm> [0.375,1] from 0 .8 to 1.2 .8

428: \arrow <0.2cm> [0.375,1] from 1.2 .8 to 0 .8

429: \arrow <0.2cm> [0.375,1] from 2 .8 to 1.2 .8

430: \arrow <0.2cm> [0.375,1] from 1.2 .8 to 2 .8

431: \put{$\rho_{LS}$} [cC] at .6 1

432: \put{$\rho_{SR}$} [cC] at 1.5 1

433: \endpicture}}

434: $$

435: \caption{\label{fig:geom}The two possible geometries of the selected

436:   ($S$) and the two neutral loci ($L$ and $R$). The scaled

437:   recombination rates between loci are given by $\rho_{SL}, \rho{LR},

438:   \rho_{LS}$ and $\rho_{SR}$.}

439: \end{figure}

440:

441: The two linked neutral loci do not affect the frequency path of the

442: beneficial allele. In contrast, neutral variants which are linked to

443: the beneficial allele at the beginning of the selective sweep rise in

444: frequency.  Looking backward in time from the time $T$ of fixation, we

445: can trace back the history of a finite sample at all three loci. As

446: the neutral loci are linked to the selected one, the genealogies at

447: all three loci are correlated.

448:

449: For the construction of the ancestral recombination graph relating all

450: loci, time is running backward, so we set $\beta = T-t$. Conditioned

451: on a frequency path $\mathcal X=(X_t)_{0\leq t\leq T}$, given by

452: \eqref{eq:SDE}, we will describe the ancestral recombination graph as

453: a partition-valued process $\xi^{\mathcal X} =(\xi^{\mathcal

454:   X}_\beta)_{0\leq \beta\leq T}$.

455:

456: \smallskip

457:

458: Assume we take a sample from the population at time $T$. Every

459: individual in the sample carries one $L$ and one $R$-locus. Of all

460: $L$- and $R$-loci present in the sample we want to trace back a number

461: $\ell$ of $L$- and $r$ of $R$-loci. These loci are represented by sets

462: $\dickm\ell$ for the $L$- and $\dickm r$ for the $R$-loci. So, $\ell

463: := |\dickm \ell|, r:= |\dickm r|$. To define the state space of the

464: structured ancestral recombination graph denote by $\mathcal P_A$ the

465: set of partitions of $A$ for a finite set $A$ and define

466: $$ \mathcal P'_{\dickm \ell\,\cup\,\dickm r} := \{\xi=(\xi^B, \xi^b), \xi^B\cup\xi^b \in\mathcal

467: P_{\dickm\ell \cup \dickm r}, \xi^B\cap\xi^b=\varnothing\}.$$ The

468: coordinates $\xi^B$ and $\xi^b$ contain partition elements located in

469: the beneficial and the wild-type background, respectively. For

470: $\xi\in\mathcal P'_{\dickm \ell \cup \dickm r}$ we write $\xi_{(j)}$

471: for the partition element containing $j\in\dickm \ell\cup \dickm r$.

472:

473: The ancestral process is started at the time $\beta=0$ of fixation of

474: the beneficial allele. So, the sample of $L$- and $R$-loci is linked

475: to the beneficial allele. Therefore, we start the process in

476: $\xi^{\mathcal X}_0=(\pi, \varnothing)$ for some $\pi\in \mathcal

477: P_{\dickm \ell\,\cup\,\dickm r}$. Assume the state at time $\beta$ is

478: $\xi^{\mathcal X}_\beta=(\xi^B, \xi^b)\in\mathcal P'_{\dickm

479:   \ell\,\cup\,\dickm r}$. For $j\in\dickm \ell \cup \dickm r$ the

480: partition element which contains $j\in\dickm \ell$, i.e.,

481: $(\xi^{\mathcal X}_\beta)_{(j)}$, encodes the set of $L$- and

482: $R$-loci, taken from the population at time $T$, which have the same

483: ancestor as $j$ at time $T-\beta$.  Usually we will study the

484: genealogy of $n$ pairs of $L$- and $R$-loci. In this case set $\dickm

485: \ell:=\{1,\ldots, n\}$ and $\dickm r:=\{n+1,\ldots, 2n\}$ and start

486: the process with $\pi = \{\{1,n+1\}, \ldots, \{n,2n\}\}$.

487:

488: The dynamics of the process is given as follows: Coalescence events

489: occur for lines in the beneficial and the wild-type background with

490: pair coalescence rate $1/X_{T-\beta}$ and $1/(1-X_{T-\beta})$ at time

491: $\beta$, respectively. So, given $\xi^{\mathcal

492:   X}_\beta=(\xi^B,\xi^b)$ with $\xi^B=\{\xi^B_1, \ldots,

493: \xi^B_{|\xi^B|}\}$ and $\xi^b=\{\xi^b_1, \ldots, \xi^b_{|\xi^b|}\}$

494: transitions occur for $1\leq j\neq k\leq |\xi^B|$ and $1\leq j'\neq

495: k'\leq |\xi^b|$ from $(\xi^B, \xi^b)$ to

496: \begin{equation}\label{eq:coal}

497: \begin{aligned}

498:   &\left((\xi^B\setminus \{\xi^B_j, \xi^B_k\}) \cup \{\xi_j^B\cup

499:     \xi^B_k\}, \xi^b)\right) &&

500:   \text{ with rate }\frac{1}{X_{T-\beta}},  & \quad(1)\\

501:   & \left((\xi^B,(\xi^b\setminus \{\xi^b_{j'}, \xi^b_{k'}\}) \cup

502:     \{\xi_{j'}^b\cup \xi^b_{k'}\})\right) && \text{ with rate

503:     }\frac{1}{1-X_{T-\beta}}, & \quad(2)

504: \end{aligned}

505: \end{equation}

506: respectively. For transitions in the process $\xi^{\mathcal X}$ due to

507: recombination we focus on geometry (i) first. A recombination event

508: hits one line between the $S$ and the $L$ locus with rate

509: $\rho_{SL}$ and between the $L$ and the $R$ locus with rate

510: $\rho_{LR}$. If a recombination event occurs between the $S$ and the

511: $L$ locus, it may be that both recombining chromosomes carry the same

512: allele at the $S$ locus. This gives a recombination event which cannot

513: be seen effectively and we ignore it in the process $\xi^{\mathcal

514:   X}$. All other recombination events must be modeled. If

515: $\xi_\beta^{\mathcal X}=(\xi^B,\xi^b)$ with $\xi^B=\{\xi^B_1, \ldots,

516: \xi^B_{|\xi^B|}\}$ and $\xi^b=\{\xi^b_1, \ldots, \xi^b_{|\xi^b|}\}$,

517: transitions occur for $1\leq j\leq |\xi^B|$ and $1\leq k\leq |\xi^b|$

518: from $(\xi^B, \xi^b) $ to

519: \begin{equation}\label{eq:rec1}

520: \begin{aligned}

521:   &  \left(\xi^B\setminus\{\xi^B_j\}, \xi^b\cup \{\xi^B_j\}\right) && \text{ with rate }\rho_{SL}(1-X_{T-\beta}) & \qquad (3_i) \\

522:   &  \left((\xi^B\setminus\{\xi^B_j\}) \cup \{\xi^B_j\cap \dickm \ell\}, \xi^b\cup \{\xi^B_j\cap\dickm r\} \right) && \text{ with rate }\rho_{LR}(1-X_{T-\beta}) & (4_i)\\

523:   &  \left((\xi^B\setminus \{\xi^B_j\}) \cup \{\xi^B_j\cap \dickm \ell, \xi^B_j\cap \dickm r\}, \xi^b\}\right)  && \text{ with rate }\rho_{LR}X_{T-\beta} & (5_i)\\

524:   &  \left(\xi^B, (\xi^b \setminus\{\xi^b_k\}) \cup \{\xi^b_k\cap \dickm \ell, \xi^b_k\cap\dickm r\}\right) && \text{ with rate }\rho_{LR}(1-X_{T-\beta}) & (6_i)\\

525:   &  \left(\xi^B\cup \{\xi^b_k\}, \xi^b\setminus\{\xi^b_k\}\right) && \text{ with rate }\rho_{SL}X_{T-\beta} & (7_i)\\

526:   & \left(\xi^B\cup \{\xi^b_k\cap\dickm r\}, (\xi^b\setminus\{\xi^b_k\}) \cup

527:   \{\xi^b_k\cap \dickm \ell\}\right) && \text{ with rate

528:   }\rho_{LR}X_{T-\beta}. & (8_i)

529: \end{aligned}

530: \end{equation}

531: Here, $(3_i)$ encodes a recombination event which takes a pair of

532: linked $L$- and $R$-loci from the beneficial to the wild-type

533: background; an event ($4_i$) separates the $R$-locus of a line and

534: takes it to the wild-type background; by $(5_i)$ the $L$ and $R$ loci

535: of a line in the beneficial background are split but remain both in

536: the same background; $(6_i)$ describes the same transition for a line

537: in the wild-type background.  The transitions $(7_i)$ and $(8_i)$

538: describe the back-recombination of loci into the beneficial

539: background.

540:

541: \begin{example}

542:   An example displaying the dynamics of the process $\xi^{\mathcal X}$

543:   for geometry (i) is shown in Figure \ref{ancrecgraph}.  The sets of

544:   $L$- and $R$-loci are $\dickm\ell = \{1,2,3\}$ and $\dickm

545:   r=\{4,5,6\}$, respectively. The starting partition is $\xi^{\mathcal

546:     X}_0=(\pi,\varnothing)$ with $\pi = \{\{1,4\}, \{2,5\},

547:   \{3,6\}\}$. Several kinds of events can happen; coalescences in the

548:   beneficial background, i.e., an event (1), recombinations which

549:   leave the two neutral loci together but change the allele at the

550:   selected site, i.e., an event $(3_i)$ and recombination events which

551:   split the two neutral loci. The last kind of event may either bring

552:   one of the two neutral loci in a different background, $(4_i)$, or

553:   split a line within the beneficial background, $(5_i)$, or split a

554:   line in the wild-type background, $(6_i)$. The final partition is

555:   $\xi^{\mathcal X}_T = (\xi^B_T, \xi^b_T)$ with $\xi^B_T =

556:   \{\{1,2\}\}$, $\xi^b_T= \{\{3\}, \{4\}, \{5\}, \{6\}\}$.

557: \end{example}

558:

559: For geometry (ii) we have (rescaled) recombination rates $\rho_{LS}$

560: and $\rho_{SR}$ between the left neutral and the selective and the

561: right and the selective locus, respectively. Here, transitions occur

562: from $(\xi^B, \xi^b)$ to

563: \begin{equation}\label{eq:rec2}

564: \begin{aligned}

565:   &\left((\xi^B\setminus\{\xi^B_j\})\cup\{\xi_j^B\cap\dickm r\}, \xi^b\cup\{\xi_j^B\cap \dickm \ell\} \right) && \text{ with rate }\rho_{LS}(1-X_{T-\beta}) &  \quad (3_{ii})\\

566:   &\left((\xi^B\setminus\{\xi^B_j\})\cup\{\xi_j^B\cap\dickm \ell\}, \xi^b\cup\{\xi_j^B\cap \dickm r\} \right) && \text{ with rate }\rho_{SR}(1-X_{T-\beta}) & (4_{ii})\\

567:   &\left((\xi^B\setminus\{\xi^B_j\}) \cup \{\xi^B_j\cap \dickm \ell, \xi^B_j\cap \dickm r\}, \xi^b\right) && \text{ with rate }(\rho_{LS} + \rho_{SR})X_{T-\beta} & (5_{ii})\\

568:   &\left(\xi^B, (\xi^b\setminus\{\xi^b_k\}) \cup \{\xi^b_k\cap \dickm \ell, \xi^b_k\cap \dickm r\}\right) && \text{ with rate }(\rho_{LS} + \rho_{SR})(1-X_{T-\beta}) & (6_{ii})\\

569:   &\left(\xi^B\cup\{\xi_k^b\cap \dickm \ell\}, (\xi^b\setminus\{\xi^b_k\})\cup\{\xi_k^b\cap\dickm r\}\right)  && \text{ with rate }\rho_{LS}X_{T-\beta} & (7_{ii})\\

570:   &\left(\xi^B\cup\{\xi_k^b\cap \dickm r\},

571:  ( \xi^b\setminus\{\xi^b_k\})\cup\{\xi_j^b\cap\dickm \ell\}\right) && \text{

572:     with rate }\rho_{SR}X_{T-\beta}. & (8_{ii})

573: \end{aligned}

574: \end{equation}

575: These events refer to a change in background from the beneficial to

576: the wild-type background either for the $L$-locus, $(3_{ii})$, or the

577: $R$-locus, $(4_{ii})$. Splits in the beneficial and wild-type

578: background may happen as in the case of geometry (i); see events

579: $(5_{ii})$ and $(6_{ii})$. Back-recombinations to the beneficial

580: background are denoted by $(7_{ii})$ for the $L$- and $(8_{ii})$ for the

581: $R$-locus. Observe that a transition which takes both loci on one line

582: from the beneficial to the wild-type background cannot occur for

583: geometry (ii); cf. event $(3_i)$.

584:

585: \begin{figure}

586: \begin{center}

587: \includegraphics[width=15.5cm]{ancrecgraph.ps}

588: \end{center}

589: \caption{\label{ancrecgraph}A structured ancestral recombination graph

590:   $\xi^{\mathcal X}$ conditioned on the frequency path $\mathcal X$ of

591:   the beneficial allele. Between times $\beta=0$ and $\beta=T$

592:   coalescences may occur at rates $(1)$ and $(2)$. Recombination

593:   events happen at rates $(3_i)-(8_i)$. The dashed lines indicate

594:   ancestry of the $L$-locus while the $R$-locus may be traced along

595:   dotted lines.}

596: \end{figure}

597:

598: \begin{definition}\label{def:1}

599:   Assume $\dickm \ell$ and $\dickm r$ are sets of left and right

600:   neutral loci, respectively, and $\mathcal X=(X_t)_{0\leq t\leq T}$

601:   is a frequency path of the beneficial allele given by

602:   \eqref{eq:SDE}.

603:

604:   Conditioned on $\mathcal X$, consider the jump process

605:   $\xi^{\mathcal X}=(\xi^{\mathcal X}_\beta)_{0\leq \beta \leq T}$,

606:   which starts in $\xi_0^{\mathcal X} = (\pi, \varnothing)$ for

607:   $\pi\in \mathcal P_{\dickm\ell \cup \dickm r}$ and makes transitions

608:   by coalescence events (1), (2), given by \eqref{eq:coal} and

609:   recombination events ($3_i$)-($8_i$) or ($3_{ii}$)-($8_{ii}$) from

610:   \eqref{eq:rec1} and \eqref{eq:rec2}, respectively. This process

611:   $\xi^{\mathcal X}$ is denoted the \emph{structured ancestral recombination graph for

612:     the $L$ and $R$ locus} conditioned on $\mathcal X$ for geometry

613:   (i) or (ii), respectively.

614:

615:   The mixture of $\xi^{\mathcal X}_T$ over the distribution of

616:   frequency paths given by \eqref{eq:SDE} defines the random partition

617:   $\Gamma_\pi = (\Gamma^B_\pi, \Gamma^b_\pi)$, i.e.,

618:     $$\Gamma_\pi :=\int \xi^{\mathcal X}_T \mathbb{P}\left[d\mathcal X\right].$$

619:   \end{definition}

620:

621: \section{Main result}

622: We study selective sweeps in the infinite population limit, i.e., the

623: frequency of the beneficial allele follows the SDE given by

624: \eqref{eq:SDE}. Moreover, selection is most efficient for large

625: selection coefficients. Our goal is to derive a simpler but approximate expression

626: for $\Gamma_\pi$ in the regime of large $\alpha$. It was shown in

627: \cite{EtheridgePfaffelhuberWakolbinger2006} that for the fixation time

628: $T$ of the beneficial allele

629: \begin{align}\label{eq:T}

630:   \mathbb E[T] = \frac{2\log\alpha}{\alpha} + \mathcal

631:   O\Big(\frac{1}{\alpha} \Big), \qquad \mathbb V[T] = \mathcal

632:   O\Big(\frac{1}{\alpha^2} \Big)

633: \end{align}

634: for large $\alpha$. This suggests that only under the scaling $\rho =

635: \mathcal O(\alpha/ \log\alpha)$ for the recombination rate a

636: non-trivial number of recombination events occurs during the sweep for large $\alpha$. This is true for all possible kinds of

637: recombination events during the sweep, so the recombination rates

638: $\rho_{SL}, \rho_{LR}$ and $\rho_{LS}, \rho_{SR}$ for geometries (i)

639: and (ii) should be of this order.  Henceforth, we assume

640: \begin{equation*}

641:   \begin{aligned}

642:     \text{Geometry (i):} &\qquad \rho_{SL} =

643:     \gamma_{SL}\frac{\alpha}{\log\alpha}, &\quad \rho_{LR} =

644:     \gamma_{LR}\frac{\alpha}{\log\alpha}, &\qquad 0<\gamma_{SL}, \gamma_{LR}<\infty\\

645:     \text{Geometry (ii):} &\qquad \rho_{LS} =

646:     \gamma_{LS}\frac{\alpha}{\log\alpha},&\quad \rho_{SR} =

647:     \gamma_{SR}\frac{\alpha}{\log\alpha},&\qquad 0<\gamma_{LS},

648:     \gamma_{SR}<\infty.

649:   \end{aligned}

650: \end{equation*}

651:

652: Our approximation of $\Gamma_\pi$ is based on a Yule tree, which

653: serves as an approximation of the genealogy at the selected locus.  A

654: Yule tree is the realization of a Yule process, i.e., a pure birth

655: process which starts with one line and every line splits in two lines

656: after an exponential waiting time.

657:

658: In our approximation the quantity

659: \begin{align}\label{eq:pjk}

660:   p_{i_1}^{i_2}(\gamma) := \exp\Big( -\frac{\gamma}{\log\alpha} \sum_{i=i_1+1}^{i_2}

661:   \frac{1}{i}\Big)

662: \end{align}

663: will play an important role.

664:

665: Assume $\dickm\ell$ and $\dickm r$ are sets of left and right loci and

666: $\pi\in\mathcal P_{\dickm \ell\cup\dickm r}$. Three mechanisms

667: determine the Yule approximation of the partition $\Gamma_\pi$. First,

668: we approximate splits in the beneficial background, i.e., events

669: $(5_i)$ and $(5_{ii})$, by the following procedure:

670: \begin{align}\label{eq:Y2}

671:   \text{\parbox{13cm}{For all partition elements

672:       $\pi_1,\ldots,\pi_{|\pi|}$ realize Bernoulli random variables

673:       $U_1,\ldots, U_{|\pi|}$ which are 1 with success probability

674:       $$ \text{geometry (i):} \quad 1-p_0^{\lfloor

675:         2\alpha\rfloor}(\gamma_{LR}))\qquad \text{geometry (ii):}\quad

676:       1-p_0^{\lfloor 2\alpha\rfloor}(\gamma_{LS} + \gamma_{SR})).$$ If

677:       $U_i=1$, split the $i$th partition element in its left and right

678:       locus. Altogether, this defines a partition

679: $$ \pi' = \big\{ \{\pi_i \cap \dickm\ell\}, \{\pi_i\cap\dickm r\}: U_i=1\big\} \cup \big\{ \{\pi_i\}: U_i=0\big\}.$$

680: }}

681: \end{align}

682: Next, realize a Yule process with branching rate $\alpha$, i.e., each

683: line splits in two lines at rate $\alpha$. Stop this process when it

684: has $\lfloor {2}\alpha\rfloor$ lines. Call this tree $\mathcal Y$. To

685: obtain the genealogy of a sample of size $|\pi'|$ from this tree with

686: $\lfloor {2}\alpha\rfloor$ extant leaves, we use the following

687: construction:

688: \begin{align}\label{eq:Y1}

689:   \text{\parbox{13cm}{Start with $|\pi'|$ lines from the full Yule

690:       tree $\mathcal Y$ with $\lfloor 2\alpha \rfloor$ lines. When

691:       there are $k$ lines left at the time the full tree has $i$

692:       lines, the probability that a coalescence event occurs among the

693:       $k$ lines at the time the full tree goes from $i$ to $i-1$ lines

694:       is

695:       $$\frac{\binom{k}{2}}{\binom{i}{2}}.$$ By this construction we

696:       build a tree $\mathcal Y_{|\pi'|}$ with the partition elements

697:       of $\pi'$ as leaves and nodes which record the number of lines

698:       in the full Yule tree.  }}

699: \end{align}

700:

701: \begin{remark}

702:   To construct the sample tree $\mathcal Y_{|\pi'|}$ from $\mathcal Y$

703:   is a task equivalent to describing an exchangeable sample from a

704:   tree which arises by exchangeable binary coalescence dynamics. This

705:   has been studied by \cite{Saundersetal1984} and was recalled in

706:   \cite[Lemma 4.8]{EtheridgePfaffelhuberWakolbinger2006}.  If $I_t=i$

707:   is the number of lines in the Yule tree $\mathcal Y$ at time $t$,

708:   denote by $K_i$ the number of lines in $\mathcal Y_{|\pi'|}$ while

709:   $I_t=i$. The process $(K_i)_{\lfloor 2\alpha \rfloor\geq i\geq 1}$

710:   is a time-inhomogeneous Markov chain with transition probabilities

711: $$ \mathbb P[K_{i-1}=k-1 | K_i=k] = \frac{\binom{k}{2}}{\binom{i}{2}},

712: \qquad \qquad i=2,\ldots, \lfloor 2\alpha\rfloor, k=2,\ldots, |\pi'|.

713: $$

714: Moreover, the sample tree can be described forward in time by noting

715: that

716: $$ \mathbb P[K_{i}=k|K_{i-1}=k-1 ] = \frac{|\pi'| - k+1}{|\pi'|+i-1}. $$

717: \qed

718: \end{remark}

719:

720: The sample tree which is pruned out of the full tree in this way

721: represents the genealogy at the selected site. To describe the

722: genealogies at the partially linked neutral sites we mark the sample

723: Yule tree to determine further recombination events. A mark stands for

724: one (or two) recombination events that may occur. This works in the following way:

725: \begin{align}\label{eq:Y3}

726:   \text{\parbox{13cm}{Let a branch in the tree $\mathcal Y_{|\pi'|}$

727:       be given which starts when the full tree has $i_1$ lines and

728:       ends when the full genealogy has $i_2$ lines. For geometry (i),

729:       every branch can be hit by at most one of three different kinds

730:       of marks indicating recombination events.  These are $SL$-,

731:       $LR$-, and $SLR$-marks. Their probabilities are given in Table

732:       \ref{tab:marks}. For geometry (ii) the branch is hit

733:       independently by $LS$- and $SR$-marks with probabilities

734:       $(1-p_{i_1}^{i_2}(\gamma_{LS}))$ and $(1-p_{i_1}^{i_2}(\gamma_{SR}))$.\\[0.5ex]

735:       Here, $SL$-marks separate the $S$- from the $L$-locus on each

736:       branch of the tree, etc. For geometry (i), $SLR$-marks separate

737:       the $S$- from the $L$- and the $L$- from the $R$-locus.}}

738: \end{align}

739: %Observe the terms $p_0^k$ for the $SL$ and $SLR$ events for geometry

740: %(i).

741:

742: \begin{table}

743: \begin{center}

744: %\hspace{2cm} geometry (i) \hspace{5cm} geometry (ii)\hspace{2cm}

745: \vspace{1ex}

746:

747: \begin{tabular}{|c|c|}\hline

748:   \rule[-4mm]{0cm}{1cm}mark & probability \\\hline

749:   \rule[-4mm]{0cm}{1cm}$SL$ & $\big(1-p_{i_1}^{i_2}(\gamma_{SL})\big)p_0^{i_2}(\gamma_{LR})$ \\

750:   \rule[-4mm]{0cm}{1cm}$LR$ & $p_{i_1}^{i_2}(\gamma_{SL})\big(1-p_{i_1}^{i_2}(\gamma_{LR})\big)$ \\

751:   \rule[-4mm]{0cm}{1cm}$SLR$ & $\big(1-p_{i_1}^{i_2}(\gamma_{SL})\big)\big(1-p_0^{i_2}(\gamma_{LR})\big)$\\

752:   \rule[-4mm]{0cm}{1cm}no &  $p_{i_1}^{i_2}(\gamma_{SL})p_{i_1}^{i_2}(\gamma_{LR})$\\\hline

753: \end{tabular}

754: %\hspace{2ex}

755: %\begin{tabular}{|c|c|}\hline

756: %  \rule[-4mm]{0cm}{1cm}mark & probability \\\hline

757: %  \rule[-4mm]{0cm}{1cm}$LS$ & $\big(1-p_j^k(\gamma_{LS})\big)p_j^k(\gamma_{SR})$ \\

758: %  \rule[-4mm]{0cm}{1cm}$SR$ & $p_j^k(\gamma_{LS})\big(1-p_j^k(\gamma_{SR})\big)$ \\

759: %  \rule[-4mm]{0cm}{1cm}$LSR$& $\big(1-p_j^k(\gamma_{LS})\big)\big(1-p_j^k(\gamma_{SR})\big)$\\

760: %  \rule[-4mm]{0cm}{1cm}no   & $p_j^k(\gamma_{LS})p_j^k(\gamma_{SR})$\\\hline

761: %\end{tabular}

762: \end{center}

763: \caption{\label{tab:marks}For geometry (i), we mark every branch in

764:   the Yule tree by at most one from three different kinds of events.

765:   If a branch starts when the full Yule tree has $i_1$ and ends when

766:   it has $i_2$ lines, the probabilities for all marks are given in the

767:   table. }

768: \end{table}

769:

770: \begin{example}

771:   The above construction is illustrated in Figure \ref{yuleTree}. We

772:   consider geometry (i) here. A set $\dickm\ell=\{1,2,3,4\}$ of

773:   $L$-loci and $\dickm r=\{5,6,7,8\}$ of $R$-loci is given. Starting with

774:   $\pi=\{\{1,5\}, \{2,6\},\{3,7\},\{4,8\}\}$, every partition element

775:   is split with probability $p_0^{\lfloor 2\alpha\rfloor}$ according

776:   to \eqref{eq:Y2}. This results in the finer partition $\pi'$. The

777:   partition elements of $\pi'$ are used to construct a sample tree

778:   from a full Yule tree which has $\lfloor 2\alpha\rfloor$ lines. The

779:   coalescence probabilities for the sample are given by \eqref{eq:Y1}.

780:   On the sample tree, branches are marked by $SL$-, $LR$-, or

781:   $SLR$-marks according to Table \ref{tab:marks}. The resulting

782:   partition $\pi''$ is constructed as given in Definition \ref{def:2}.

783:   % Poisson processes with rates $\rho_{SL}$ and $\rho_{LR}$ are used

784:   % to construct $SL$, $LR$ and $SLR$ marks.  The leftmost mark is a

785:   % $SLR$ mark because the line is first hit by the Poisson process

786:   % with rate $\rho_{SL}$ and the recombined line is hit by the

787:   % Poisson process with rate $\rho_{LR}$.

788: \end{example}

789:

790: \begin{figure}

791: \begin{center}

792: \includegraphics[width=15.5cm]{yuleFigure.ps}

793: \end{center}

794: \caption{\label{yuleTree}The Yule process approximation for two linked

795:   neutral loci under a selective sweep. Here, we consider geometry

796:   (i). The $L$-locus may be traced back along dashed lines while

797:   dotted lines indicate ancestry of the $R$-locus. See text for

798:   explanation. }

799: \end{figure}

800:

801: We are now in a position to define our approximation based on the Yule

802: process.

803:

804: \begin{definition}\label{def:2}

805:   Assume $\dickm\ell$ and $\dickm r$ are sets of left and right

806:   neutral loci, respectively, and $\pi\in\mathcal P_{\dickm

807:     \ell\cup\dickm r}$. By \eqref{eq:Y2} construct the partition

808:   $\pi'$ and by \eqref{eq:Y1} and \eqref{eq:Y3} a Yule tree $\mathcal

809:   Y_{|\pi'|}$ with marks. For geometry (i) define the equivalence

810:   relation:

811:   \begin{equation}\label{eq:equivGeoi}

812:     j\sim k :\iff \begin{cases} \text{no $SL$-, $SLR$-mark on } \Yup & \text{ if }j,k\in\dickm \ell,\\

813:       \text{no $SL$-, $LR$-, $SLR$-mark on } \Yup, & \text{ if }j,k\in\dickm r\\[2ex]

814:       \text{no $SL$-mark on } \Yup, \\[1ex]

815:       \text{no $LR$-mark on } \Yri, & \text{ if }j\in\dickm \ell,k\in\dickm r\\

816:       \text{no $SLR$-mark on } \Yall

817:   \end{cases}

818:   \end{equation}

819:   where the bold lines indicate for which part of the tree $\mathcal

820:   Y_{|\pi'|}$ relating two lines with the root of the tree, the

821:   constraint on marks applies. For geometry (ii) set

822:   \begin{equation}\label{eq:equivGeoii}

823:     j\sim k :\iff \begin{cases} \text{no $LS$-mark on } \Yup, & \text{ if }j,k\in\dickm \ell,\\

824:       \text{no $SR$-mark on } \Yup, & \text{ if }j,k\in\dickm r,\\[2ex]

825:       \text{\parbox{5.2cm}{no $LS$-mark on \Yle, \\[1ex]

826:         no $SR$-mark on \Yri}} & \text{ if

827:       }j\in\dickm \ell, k\in\dickm r

828:     \end{cases}

829:   \end{equation}

830:   (The equations \eqref{eq:equivGeoi} and \eqref{eq:equivGeoii} indeed

831:   define equivalence relations, as can easily be checked.)  Each of

832:   these equivalence relations on $\dickm\ell\cup\dickm r$ defines a

833:   partition $\pi''$.  For geometry (i) there is a unique partition

834:   element

835: \begin{equation}\label{eq:uni}

836: \begin{aligned}

837:   \pi''_f = \Big\{j\in\dickm \ell: & \text{ no $SL$-, $SLR$-mark on

838:     \one{$\pi'_{(j)}$}}\Big\} \\ & \cup \Big\{k\in\dickm r: \text{ no $SL$-, $LR$-,

839:     $SLR$-mark on \one{$\pi'_{(k)}$}}\Big\}

840: \end{aligned}

841: \end{equation}

842: and for geometry (ii) a unique partition element

843: \begin{equation}

844: \begin{aligned}\label{eq:unii}

845:   \pi''_f = \Big\{j\in\dickm \ell: & \text{ no $LS$-mark on

846:     \one{$\pi'_{(j)}$}}\Big\} \cup \Big\{k\in\dickm r: \text{ no $SR$-mark on

847:     \one{$\pi'_{(k)}$}}\Big\}.

848: \end{aligned}

849: \end{equation}

850: Then the random partition

851: $$ \Upsilon_\pi:= ( \{\pi''_f\}, \pi''\setminus \{\pi_f''\})$$

852: is called the \emph{Yule approximation of } $\Gamma_\pi$.

853: \end{definition}

854:

855: \begin{example}

856:   For the example in Figure \ref{yuleTree} the $SL$-, $LR$- and

857:   $SLR$-marks on the sample tree lead to the realization

858: $$ \Upsilon_\pi = (\{\{3,4\}\}, \{\{1,2\}, \{5,6\},\{7\},\{8\}\}).$$

859: \end{example}

860:

861: \begin{theorem}\label{T}

862:   Let $\pi\in\mathcal P_{\dickm \ell\cup\dickm r}$ and $\Gamma_\pi$

863:   and $\Upsilon_\pi$ be as in Definitions \ref{def:1} and \ref{def:2}.

864:   Then,

865:   $$ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}}

866:   \big|\mathbb P[\Gamma_\pi = \xi] - \mathbb P[\Upsilon_\pi=\xi] \big| =

867:   \mathcal O\Big( \frac{1}{(\log\alpha)^2}\Big). $$

868: \end{theorem}

869:

870: \noindent

871: %A few remarks are in order.

872:

873: \begin{remark}

874: \begin{enumerate}

875: \item The Theorem states that, for large $\alpha$, the random

876:   partitions $\Gamma_\pi$ and $\Upsilon_\pi$ are close in variation

877:   distance. Here, variation distance refers to the maximal difference

878:   in the probabilities to obtain any partition $\xi\in\mathcal

879:   P'_{\dickm\ell \cup\dickm r}$. The order of accuracy, given by the

880:   Landau symbol, still depends on several parameters. These are the

881:   cardinalities $\ell$ and $r$ and recombination constants

882:   $\gamma_{SL}, \gamma_{LR}$ for geometry (i) and $\gamma_{LS}$ and

883:   $\gamma_{SR}$ for geometry (ii). The proof of Theorem \ref{T} will

884:   be given in Section \ref{proof}.

885: \item At first sight, comparing the Definitions \ref{def:2} and

886:   \ref{def:1} the Yule approximation does not look any simpler than

887:   the exact model. However, the Yule approximation has advantages both

888:   analytically and computationally. The random partition $\Gamma_\pi$

889:   relies on constructing a frequency path $\mathcal X$, while the Yule

890:   approximation $\Gamma_\pi$ constructs the ancestral recombination

891:   graph for the sample directly. Analytically, as we will see in

892:   Section \ref{app}, this means that explicit calculations are

893:   possible. Computationally, i.e., for simulations of the ancestral

894:   recombination graph, the direct construction of the ancestry of the

895:   sample allows for fast algorithms; see

896:   \cite{PfaffelhuberHauboldWakolbinger2006} for the case of a single

897:   neutral locus.

898: \item The current paper is a generalisation of results found in

899:   \cite{EtheridgePfaffelhuberWakolbinger2006} for a two-locus system

900:   with only one neutral locus. More precisely, consider the projection of

901:   $\Gamma_\pi$ on only one locus, i.e., on either $\dickm \ell$ or

902:   $\dickm r$. In Propositions 4.2 and 4.7 of that paper it was shown

903:   that the projection of $\Upsilon_\pi$ on $\dickm \ell$ or $\dickm r$

904:   is an approximation to a structured coalescent with an error in

905:   probability of the order $\mathcal O\big( (\log\alpha)^{-2}\big)$.

906: \item In \cite{EtheridgePfaffelhuberWakolbinger2006} an approximate

907:   sampling formula was given in the two-locus case. A similar approach

908:   would be possible here. However, we refrain from its derivation

909:   because it was shown in \cite{PfaffelhuberHauboldWakolbinger2006}

910:   that the sampling formula in the two-locus case only produces

911:   numerically sound results for $n\leq 5$.

912: \item As indicated numerically in

913:   \cite{PfaffelhuberHauboldWakolbinger2006}, the Yule approximation

914:   can be improved. To understand how this works, we need to collect

915:   the errors which contribute to the error of order $\mathcal

916:   O(1/(\log\alpha)^2)$. First, the Yule approximation ignores events

917:   $(2), (6_{ii}), (7)$ and $(8)$. Second, as will be clear in the

918:   proof of Proposition \ref{PropSecond}, the coalescent rate in the

919:   beneficial background is decreased from $1/X dt$ to $(1-X)/X dt$ by

920:   the Yule process. It is the latter error that dominates, at least in

921:   large samples, because the total coalescence rate increases

922:   quadratically with the number of lines. However, increasing the

923:   coalescence probability in \eqref{eq:Y1} to

924: \begin{align*}\label{eq:yuleCoal}

925:   1\wedge

926:   \frac{\binom{k}{2}}{\binom{i}{2}}\frac{1}{1-\tfrac{i-1}{2\alpha}}

927: \end{align*}

928: at the time the Yule tree has $i$ lines corrects for this error.

929: \item For simulations of genealogies it is most important that the

930:   Yule approximation given above is not restricted to the case of two

931:   neutral loci. The take-home-message from the construction of the

932:   Yule approximation is that splits in the beneficial background are

933:   generated first and afterwards marks on a Yule tree determine all

934:   recombination events. Both, splits in the beneficial background and

935:   recombination events along the Yule tree can be given along a

936:   continuous chromosome.

937:   %The implementation of the corresponding

938:   %mechanisms would result in software which is comparable to more

939:   %accurate than existing programs (see e.g. the program {\tt ssweep}

940:   %mentioned in \cite{KimStephan2002}).

941: \end{enumerate}

942: \end{remark}

943: \qed

944:

945: \section{Application: {\bf\emph{D}}}

946: \label{app}

947: Lewontin's $D$ is a measure of linkage disequilibrium (non-random

948: association of alleles) and is frequently used as a simple statistic

949: in a multi-locus setting (\cite{Lewontin1964}; see also

950: \cite[(2.89)]{Ewens2004}). Given two loci $L$ and $R$ with alleles 0

951: or 1 at each locus, it is defined as

952: \begin{equation} \label{eq:D}

953: D = p_{LR} - p_L p_R

954: \end{equation}

955: where $p_{LR}$ is the frequency of individuals carrying allele 1 at

956: both loci, $p_L$ is the frequency of 1's at the $L$ locus and $p_R$ is

957: the frequency of 1's at the $R$ locus..

958:

959: To predict patterns of $D$ between pairs of neutral loci at the time

960: $T$ of fixation of a beneficial allele we next approximate $\mathbb

961: E[D(T)]$ using Theorem \ref{T}. It is crucial to observe that $\mathbb

962: E[p_{LR}(T)]$ as well as $\mathbb E[p_{L}(T) p_R(T)]$ may be derived

963: by the distribution of genealogies of linked neutral loci under

964: selection and the expected allele frequencies at the beginning of the

965: sweep. To see this, note that $\mathbb E[p_{LR}(T)]$ equals the

966: probability that the ancestors of the $L$- and $R$-locus of one

967: randomly picked individual from the population at time $T$ carry

968: alleles 1 at both neutral loci. Analogously, $\mathbb E[p_{L}(T)

969: p_R(T)]$ is the probability that the ancestors of the $L$- and $R$-

970: loci of two different individuals at time $T$ both carry allele 1.

971: Denote by $q$ the probability that both loci, $L$ and $R$ from one

972: individual, picked at time $T$, have a common ancestor at the

973: beginning of the sweep. Analogously, $q'$ is the same probability for

974: the $L$- and $R$-loci from two different individuals. Using these

975: definitions we see that

976: \begin{equation}\label{eq:pLRpLpR}

977: \begin{aligned}

978:   \mathbb E\left[p_{LR}(T)\right] & = q \cdot \mathbb E\left[p_{LR}(0)] + (1-q)\cdot

979:   \mathbb E[p_{L}(0)p_R(0)\right], \\ \mathbb E\left[p_{L}(T)p_R(T)\right] & = q'\cdot

980:   \mathbb E\left[p_{LR}(0)\right] + (1-q')\cdot \mathbb E\left[p_{L}(0)p_R(0)\right].

981: \end{aligned}

982: \end{equation}

983: Combining \eqref{eq:pLRpLpR} with the definition of $D$ from

984: \eqref{eq:D},

985: \begin{equation}\label{eq:D1}

986: \mathbb E[D(T)] = (q - q') \mathbb E[D(0)].

987: \end{equation}

988: Both, $q$ and $q'$ may be approximated by Theorem \ref{T}. Formally,

989: setting $\dickm{\ell}=\{1\}, \dickm{r}=\{2\}$,

990: \begin{equation}

991: \begin{aligned}

992:   q & = \mathbb P\left[\Gamma^B_{\{1,2\}} \cup \Gamma^b_{\{1,2\}}= \{\{1,2\}\}\right],\\

993:   q'& = \mathbb P\left[\Gamma^B_{\{1\},\{2\}} \cup \Gamma^b_{\{1\},\{2\}} =

994:   \{\{1,2\}\}\right].

995: %  q & := \mathbb P[\xi^B(0) \cup \xi^b(0) = \{\{1,2\}\}| \xi^B(T) =

996: %  \{\{1,2\}\}],\\

997: %  q' & := \mathbb P[\xi^B(0) \cup \xi^b(0) =

998: %  \{\{1,2\}\}| \xi^B(T) = \{\{1\},\{2\}\}],

999: \end{aligned}

1000: \end{equation}

1001: As $\Gamma_\pi$ may be approximated by $\Upsilon_\pi$ this brings us

1002: in a position to predict patterns of $D$ at the end of a selective

1003: sweep.

1004:

1005: \begin{theorem}\label{T2}

1006: For geometry (i),

1007: \begin{equation}\label{eq:P:D:1}

1008: \begin{aligned}

1009:   \mathbb E[D(T)] & = p_0^{ 2\alpha }(2\gamma_{LR}) \Big(1 -

1010:   \sum_{k=2}^{ 2\alpha} \frac{2}{k(k+1)}

1011:   p_k^{2\alpha}(2\gamma_{SL})\Big)\mathbb E[D(0)] + \mathcal O\Big(

1012:   \frac{1}{(\log\alpha)^2}\Big),

1013: \end{aligned}

1014: \end{equation}

1015: and for geometry (ii),

1016: \begin{equation}\label{eq:P:D:2}

1017:   \mathbb E[D(T)] = \mathbb E[D(0)]\cdot\mathcal O\Big(

1018:   \frac{1}{(\log\alpha)^2}\Big).

1019: \end{equation}

1020: \end{theorem}

1021:

1022: \begin{figure}

1023: \begin{center}

1024: \includegraphics[width=10cm]{sim.ps}

1025: \end{center}

1026: \caption{\label{sim}The effect of Lewontin's $D$ under a selective

1027:   sweep may be simulated in a Wright-Fisher model. In this process,

1028:   the frequency path of the beneficial allele is stochastic and the

1029:   ancestral recombination graph may be built conditioned on this

1030:   frequency path. The locations of the $L$ and $R$ locus are fixed.

1031:   The position of the selected site varies along the $x$-axis.  If we

1032:   compare the result from \eqref{eq:P:D:1} to equation (47) of

1033:   \cite{StephanSongLangley2006} we see that the Yule process

1034:   approximation is more accurate. The parameters of the Wright-Fisher

1035:   model are $N=10^5, \alpha=1000, \rho_{LR}=20$ and $D(0) = 0.0242$.}

1036: \end{figure}

1037:

1038: \begin{remark}

1039: \begin{enumerate}

1040: \item Patterns of Lewontin's $D$ can be studied by deterministic

1041:   forward calculations instead of our genealogical approach. This was

1042:   carried out in \cite{StephanSongLangley2006} under the assumption

1043:   that strong selection leads to a deterministic behaviour of allele

1044:   frequencies. Specifically, the frequency of the beneficial allele

1045:   follows the logistic differential equation

1046:   $$ dX = \alpha X(1-X)dt,\qquad\qquad X_0 = \tfrac{1}{N} $$

1047:   instead of the stochastic path given by \eqref{eq:SDE}. Predictions

1048:   of $D$ at all times during the selective sweep were given. In

1049:   particular, their equation (47) approximates values of $D$ at the

1050:   end of the sweep for geometry (i).

1051:   %They found

1052:   %\begin{equation}

1053:   %  \begin{aligned}

1054:   %    D(T) = e^{-\rho_{LR} T }\Big( 1 - \Big(

1055:   %    \frac{p_S(0)}{1-p_S(0)}\Big)^{2\rho_{RS}/\alpha} \Big(

1056:   %    \frac{1-2p_S(0)}{1-p_S(0)}\Big)^{2}\Big) D(0)

1057:   %  \end{aligned}

1058:   %\end{equation}

1059:   %where $p_S(0)$ is the frequency of the beneficial allele at the

1060:   %beginning of the sweep and $$T = \frac{2}{\alpha}\log\Big(

1061:   %\frac{1-p_S(0)}{p_S(0)}\Big) $$ is the (deterministic) duration of

1062:   %the sweep. (Their results differ by a factor of 2 because they take

1063:   %$s$ to be the selective advantage of a diploid individual which is

1064:   %homozygous for the beneficial allele while $s$ in our analysis is

1065:   %the advantage of a heterozygote.)

1066:

1067:   % Almost the same result was found by

1068:   % \cite{LehnertStephanPfaffelhuber2006} who used a genealogical

1069:   % approach and approximated the genealogy at the selected site as a

1070:   % star-like phylogeny rather than a Yule process; see their equation

1071:   % (8).

1072:   In real populations, random effects due to genetic drift are not

1073:   negligible.  This has been pointed out by

1074:   \cite{LehnertStephanPfaffelhuber2006}.

1075: % and may be seen from the

1076: %  divergence of the deterministic analysis, carried out by

1077: %  \cite{StephanSongLangley2006}, from the simulation of a

1078: %  Wright-Fisher model in Figure \ref{sim}.

1079:   The Yule process approximation captures most random effects. Indeed,

1080:   comparison with simulations from

1081:   \cite{LehnertStephanPfaffelhuber2006} shows that the results

1082:   produced by the Yule process approximation are more accurate than

1083:   those of \cite{StephanSongLangley2006}.

1084:

1085:  % These effects are not captured by the analysis in

1086:  % \cite{StephanSongLangley2006} but in the Yule process approximation

1087:  % of Theorem \ref{T2}.  Indeed, Figure \ref{sim} shows that the Yule

1088:  % process approximation produces more accurate results than the

1089:   %results of \cite{StephanSongLangley2006}.

1090:

1091: \item For empirical studies it is most interesting to know which

1092:   patterns of linkage disequilibrium  to look for in real data. The

1093:   pattern genetic hitchhiking can produce was discussed in

1094:   \cite{StephanSongLangley2006} and \cite{ReedTishkoff2006}.

1095:   Surprisingly, hitchhiking reduces levels of linkage disequilibrium

1096:   compared to the neutral expectation. This is evident from Figure

1097:   \ref{sim}. If the selected locus is far from both neutral loci,

1098:   linkage disequilibrium between the neutral loci is not affected by

1099:   hitchhiking. Therefore, values of $D$ for large $\rho_{SL}$ converge

1100:   to the expectation of $D$ under neutrality.  This effect was taken

1101:   up by \cite{ReedTishkoff2006} to argue that genetic hitchhiking

1102:   produces patterns in the association of alleles similar to

1103:   recombination hotspots, which are e.g. important in genetic

1104:   association studies in humans (\cite{hapmap2005}). However, genetic

1105:   hitchhiking certainly produces patterns different from recombination

1106:   hotspots in general, e.g., a low neutral diversity or a

1107:   distinctive site frequency spectrum (\cite{FayWu2000}).

1108: \item An accurate approximation of $\mathbb E[D(T)]$ does not suffice

1109:   to predict patterns of linkage disequilibrium in general. In

1110:   addition to genetic drift, random effects which affect $D(T)$ were

1111:   found in \cite{StephanSongLangley2006} to be the allelic type of the

1112:   founder of the sweep and its frequency. The resulting variance in

1113:   $D$ can be considerably higher than under neutrality.

1114:   % Additionally, as simulations in

1115:   % \cite{LehnertStephanPfaffelhuber2006} show, the measure $r^2$ for

1116:   % linkage disequilibrium does not vanish near the selected locus. As

1117:   %   $$ \mathbb E[r^2] = \mathbb E\Big[ \frac{D^2}{p_L (1-p_L) p_R(1-p_R)}\Big] \approx

1118:   %   \frac{\mathbb{V}[D]}{\mathbb E[p_L (1-p_L) p_R(1-p_R)]}$$ (see

1119:   %   \cite{McVean2002}) this indicates that $\mathbb{V}[D(T)]$ is not

1120:   %   negligible.

1121: \end{enumerate}

1122: \end{remark}

1123:

1124: \noindent

1125: Now we come to the proof of Theorem \ref{T2}.

1126:

1127: \begin{proof} The key in the proof is to compute the probabilities $q$

1128:   and $q'$. This is achieved by the Yule process approximation

1129:   $\Upsilon_\pi$ of Theorem \ref{T}.

1130:

1131:    We start with geometry (ii). Here, we can see from the Yule

1132:    approximation \eqref{eq:equivGeoii} that $q = q'$ up to a term of

1133:    order $1/(\log\alpha)^2$ since one $L$ and one $R$ locus are

1134:    identical by descent iff there is no $LS$ mark on

1135:    \text{\parbox{1.2cm}{

1136: \beginpicture

1137: \setcoordinatesystem units <0.1cm, 0.1cm>

1138: \setplotarea x from 3 to 17, y from 3 to 17

1139: \plot 10 6 10 10 7 13 10 10 13 13 /

1140: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

1141: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

1142: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

1143: \put{$\bullet$}[Cc]  at 10 6

1144: \put{\footnotesize$1$} [cC] at 5 14

1145: \put{\footnotesize$2$} [cC] at 15 14

1146: \endpicture}}

1147: and no $SR$ mark on

1148:    \text{\parbox{1.2cm}{

1149: \beginpicture

1150: \setcoordinatesystem units <0.1cm, 0.1cm>

1151: \setplotarea x from 3 to 17, y from 3 to 17

1152: \plot 10 6 10 10 7 13 10 10 13 13 /

1153: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

1154: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

1155: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

1156: \put{$\bullet$}[Cc]  at 10 6

1157: \put{\footnotesize$1$} [cC] at 5 14

1158: \put{\footnotesize$2$} [cC] at 15 14

1159: \endpicture}}. It

1160:    does not depend on the linkage of the $L$ and the $R$ locus at the

1161:    end of the sweep.  Consequently, \eqref{eq:P:D:2} follows.

1162:

1163:    For geometry (i), we start with the approximation of $q'$. For one

1164:    $L$ and one $R$ locus from two different individuals there is a

1165:    random number $K$ of lines in the full tree of the Yule

1166:    approximation at the time the selected loci which are linked to the

1167:    neutral ones coalesce.  To obtain the distribution of $K$, we

1168:    compute

1169: $$ \mathbb P[K=k] = \prod_{l=k+1}^{ 2\alpha} \left( 1 -

1170: \frac{1}{\binom{l}{2}}\right) \frac{1}{\binom{k}{2}} = \left(

1171: \prod_{l=k+1}^{ 2\alpha} \frac{(l+1)(l-2)}{l(l-1)}\right)

1172: \frac{2}{k(k-1)} = \frac{2}{k(k+1)} + \mathcal O\left(

1173: \frac{1}{\alpha}\right),$$ which is a special case of

1174: \cite{EtheridgePfaffelhuberWakolbinger2006}, (4.16). We read from

1175: \eqref{eq:equivGeoi} that the $L$ and $R$ locus are identical by

1176: descent at the beginning of the sweep if and only if (a) no mark or an

1177: $SL$ mark falls on

1178:    \text{\parbox{1.2cm}{

1179: \beginpicture

1180: \setcoordinatesystem units <0.1cm, 0.1cm>

1181: \setplotarea x from 3 to 17, y from 3 to 17

1182: \plot 10 6 10 10 7 13 10 10 13 13 /

1183: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

1184: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

1185: \multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

1186: \put{$\bullet$}[Cc]  at 10 6

1187: \put{\footnotesize$1$} [cC] at 5 14

1188: \put{\footnotesize$2$} [cC] at 15 14

1189: \endpicture}},

1190: (b) no mark hits

1191:    \text{\parbox{1.2cm}{

1192: \beginpicture

1193: \setcoordinatesystem units <0.1cm, 0.1cm>

1194: \setplotarea x from 3 to 17, y from 3 to 17

1195: \plot 10 6 10 10 7 13 10 10 13 13 /

1196: \multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

1197: %\multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

1198: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

1199: \put{$\bullet$}[Cc]  at 10 6

1200: \put{\footnotesize$1$} [cC] at 5 14

1201: \put{\footnotesize$2$} [cC] at 15 14

1202: \endpicture}}

1203: and (c) no mark or an $LR$ mark falls on

1204:    \text{\parbox{1.2cm}{

1205: \beginpicture

1206: \setcoordinatesystem units <0.1cm, 0.1cm>

1207: \setplotarea x from 3 to 17, y from 3 to 17

1208: \plot 10 6 10 10 7 13 10 10 13 13 /

1209: %\multiput{\tiny $\bullet$} at 10 10 *100  0.03 0.03 /

1210: \multiput{\tiny $\bullet$} at 10 10 *100  -0.03 0.03 /

1211: %\multiput{\tiny $\bullet$} at 10 10 *100  0 -0.04 /

1212: \put{$\bullet$}[Cc]  at 10 6

1213: \put{\footnotesize$1$} [cC] at 5 14

1214: \put{\footnotesize$2$} [cC] at 15 14

1215: \endpicture}}. Hence we compute

1216: \begin{equation}

1217: \begin{aligned}

1218:   q' & = \sum_{k=2}^{ 2\alpha} \frac{2}{k(k+1)}

1219:   p_0^k(\gamma_{LR}) p_k^{ 2\alpha }(\gamma_{SL})p_k^{ 2\alpha

1220:   }(\gamma_{LR})p_k^{ 2\alpha }(\gamma_{SL}) + \mathcal O\left(

1221:   \frac{1}{(\log\alpha)^2}\right)\\ & = p_0^{ 2\alpha }(\gamma_{LR})

1222:   \sum_{k=2}^{ 2\alpha} \frac{2}{k(k+1)} p_k^{2\alpha}(2\gamma_{SL}) +

1223:   \mathcal O\left( \frac{1}{(\log\alpha)^2}\right).

1224: \end{aligned}

1225: \end{equation}

1226: For $q$ we have to distinguish the cases where the $L$- and the $R$-loci

1227:  split or not. If they do not split, the $L$- and $R$-locus have

1228: the same ancestor at the beginning of the sweep if and only if there

1229: is neither an $LR$- nor an $SLR$-mark on \one{$\{1,2\}$}. If they split,

1230: the probability of a common ancestor is $q'$. Therefore,

1231: \begin{equation}

1232: \begin{aligned}

1233: q & = p_0^{ 2\alpha }(\gamma_{LR}) p_0^{ 2\alpha }(\gamma_{LR}) +

1234: \big(1-p_0^{ 2\alpha }(\gamma_{LR})\big) q'+

1235:   \mathcal O\left( \frac{1}{(\log\alpha)^2}\right).

1236: \end{aligned}

1237: \end{equation}

1238: Hence

1239: \begin{equation}

1240: \begin{aligned}

1241:   \mathbb E[D(T)] & = p_0^{ 2\alpha }(\gamma_{LR}) \big(p_0^{2\alpha

1242:   }(\gamma_{LR}) - q'\big) \mathbb E[D(0)]+ \mathcal O\left(

1243:   \frac{1}{(\log\alpha)^2}\right)

1244: \end{aligned}

1245: \end{equation}

1246: and the result follows.

1247: \end{proof}

1248:

1249: \section{Proof of Theorem  \ref{T}}

1250: \label{proof}

1251: The proof deals with geometries (i) and (ii) simultaneously.  We will

1252: write events at rates (3)-(8) whenever we refer to the rates

1253: ($3_i$)-($8_i$) for geometry (i) and ($3_{ii}$)-($8_{ii}$) for

1254: geometry (ii), respectively.

1255:

1256: We will be dealing with several random partitions all of which agree

1257: up to an error of order $\mathcal O\big( (\log(\alpha))^{-2}\big)$.

1258: Exactly, we will prove

1259: $$\Gamma_\pi \quad \stackrel{\text{Prop. \ref{PropFirst}}}{\approx}\quad

1260: \Delta_\pi \quad \stackrel{\text{Prop.

1261:     \ref{PropSecond}}}{\approx}\quad \Xi_\pi \quad

1262: \stackrel{\text{Prop. \ref{PropThird}}}{\approx}\quad \Upsilon_\pi$$ where

1263: $\Gamma_\pi, \Delta_\pi, \Xi_\pi$ and $\Upsilon_\pi$ are given in

1264: Definitions \ref{def:1}, \ref{def:3}, \ref{def:4} and \ref{def:2},

1265: respectively and '$\approx$' means that the random partitions differ

1266: by $\mathcal O\left( (\log\alpha)^{-2}\right)$ in variation distance.

1267:

1268: While $\Gamma_\pi$ is the random partition which is defined by the

1269: structured ancestral recombination graph, the other random partitions

1270: are approximations. First, $\Delta_\pi$ arises by (i) ignoring events

1271: which occur according to rates $(2), (6_{ii}), (7)$ and $(8)$ and (ii)

1272: realizing all events according to rate $(5)$ first and only

1273: afterwards, construct the process using rates $(1), (3), (4)$ and

1274: $(6_i)$. Second, $\Xi_\pi$ already deals with the Yule process. It is

1275: derived by marking an infinite Yule tree by two constant rate Poisson

1276: processes with rates $\rho_{SL}, \rho_{LR}$ for geometry (i) and

1277: $\rho_{LS}, \rho_{SR}$ for geometry (ii). Finally, the Yule

1278: approximation $\Upsilon_\pi$ of $\Gamma_\pi$ arises by considering

1279: only the number of lines in an infinite Yule tree at times of

1280: coalescence in a sample.

1281:

1282: In the whole proof we rely on a probability measure $\mathbb P$ on a

1283: probability space on which the solution of \eqref{eq:SDE} as well as

1284: arbitrarily many independent Poisson processes and other random

1285: variables are realized.

1286:

1287: \begin{definition}

1288: \label{def:3}

1289: Define a $\mathcal P'_{\dickm\ell\,\cup\,\dickm r}$-valued random

1290: variable $\Delta_\pi$ as follows: starting in $\pi\in\mathcal

1291: P_{\dickm \ell\cup\dickm r}$ split all partition elements $\xi\in\pi$

1292: independently into $\xi \cap \dickm \ell, \xi \cap \dickm r$ with

1293: probability

1294: \begin{equation} \label{Split} 1-\mathbb E\left[\exp\left( - \rho\cdot

1295:     \mathbb \int_0^T X_sds\right)\right]

1296: \end{equation}

1297: where $\rho= \rho_{LR}$ for geometry (i) and $\rho= \rho_{LS}+

1298: \rho_{SR}$ for geometry (ii).  The resulting partition $\pi'$ is used

1299: for the starting point $(\pi',\varnothing)$ of a process

1300: $\eta^{\mathcal X} = (\eta^{\mathcal X}_\beta)_{0\leq\beta\leq T}$,

1301: conditioned on a frequency path $\mathcal X = (X_t)_{0\leq t\leq T}$

1302: with transitions according to events ($1$),($3_i$), ($4_i$), ($6_i$),

1303: given by \eqref{eq:rec1}, for geometry (i) and to events ($1$),

1304: ($3_{ii}$) and ($4_{ii}$), given by \eqref{eq:rec2}, for geometry

1305: (ii), respectively. Given $\eta^{\mathcal X}$, define $$\Delta_\pi :=

1306: \int \eta^{\mathcal X}_T \mathbb P[d\mathcal X].$$

1307: \end{definition}

1308:

1309: \begin{proposition}\label{PropFirst}

1310:   Let $\pi\in\mathcal P_{\dickm \ell\cup\dickm r}$ and $\Gamma_\pi$

1311:   and $\Delta_\pi$ be as in Definitions \ref{def:1} and \ref{def:3}.

1312:   Then,

1313:   $$ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}}

1314:   \big|\mathbb P[\Gamma_\pi = \xi] - \mathbb P[\Delta_\pi=\xi] \big|

1315:   = \mathcal O\left( \frac{1}{(\log\alpha)^2}\right). $$

1316: \end{proposition}

1317:

1318: \begin{proof}

1319:   We proceed in several steps. Our arguments in Step 1 show that we

1320:   may discard events which occur at rates (2), ($6_{ii}$), (7) and

1321:   (8). In Step 2 we use a fixed number of Poisson processes to

1322:   generate the random partition we want to approximate.  Our goal is

1323:   to separate events ($5$) from the rest by verifying a certain order

1324:   of the possible events and establishing an approximate independence

1325:   of the events (5). Particularly, we show in Step 3 that splits in

1326:   the beneficial background (i.e., events (5)) take place before all

1327:   other events with high probability. The approximate independence

1328:   will be proved in Steps 5 and 6 by an application of a general

1329:   result on mixed Poisson processes we establish in Step 4.

1330:

1331:   \begin{step}

1332:     (Small probability of events (2), ($6_{ii}$), (7) and (8))\\

1333:     First, note that by Proposition 3.4 of

1334:     \cite{EtheridgePfaffelhuberWakolbinger2006} events ($2$), i.e.,

1335:     coalescences in the wild-type background, have a probability of

1336:     order $\mathcal O\big((\log\alpha)^{-2}\big)$.  Furthermore,

1337:     events ($7$) and ($8$) are back-recombinations into the beneficial

1338:     background and hence have a probability of order $\mathcal

1339:     O\big((\log\alpha)^{-2}\big)$ as well.  Additionally, for geometry

1340:     (ii), events ($6_{ii}$), i.e., splits in the wild-type background,

1341:     can only occur if a coalescence event (2) has happened before.

1342:     % by Proposition 3.4 of

1343:     % \cite{EtheridgePfaffelhuberWakolbinger2006}.

1344:     As a consequence, we can discard events which occur at rates (2),

1345:     ($6_{ii}$), (7) and (8) producing only an error in variation

1346:     distance of at most $\mathcal O\big((\log\alpha)^{-2}\big)$.

1347:

1348:     So we are left with a $\mathcal P'_{\dickm\ell\cup\dickm

1349:       r}$-valued stochastic process conditioned on $\mathcal X$,

1350:     $\zeta^{\mathcal X} = (\zeta^{\mathcal X}_\beta)_{0\leq \beta\leq

1351:       T}$, which arises by events $(1)$, $(3)$,$(4)$,$(5)$ and

1352:     $(6_i)$, started in $\zeta_0^{\mathcal X} = (\pi,\varnothing)$.

1353:   \end{step}

1354:

1355:   \begin{step} (Construction of $\zeta^{\mathcal X}$ by Poisson processes)\\

1356:     Recall that $\ell:=|\dickm {\ell}|$ and $r:=|\dickm{r}|$ are the

1357:     number of $L$ and $R$ loci under consideration. Take Poisson

1358:     processes which are all conditionally independent given the random

1359:     frequency path $\mathcal X$ of the beneficial allele.  For

1360:     coalescence, take a Poisson process $\mathcal T_{\mathfrak 1}$ with

1361:   \begin{equation}

1362:     \begin{aligned}

1363:       \text{ rate }\binom{\ell + r}{2} \frac{1}{X_{T-\beta}} &&

1364:       && \qquad

1365:       (\text{coalescence in the beneficial background}) && \qquad \mathfrak{(1)},

1366:     \end{aligned}

1367:   \end{equation}

1368:   at time $\beta$; for recombination events take Poisson processes

1369:   $\mathcal T_{\mathfrak {3_i}}$, $\mathcal T_{\mathfrak {4_i}}$,

1370:   $\mathcal T_{\mathfrak {5_i}}$ with

1371:   \begin{equation}

1372:     \begin{aligned}

1373:       &\text{ rate }\ell\rho_{SL}(1-X_{T-\beta})& &\qquad(\text{rec. to the wild-type background})&& \qquad \mathfrak{(3_i)},\\

1374:       & \text{ rate }  r \rho_{LR}(1-X_{T-\beta})& &\qquad(\text{rec. to or split in the wild-type background})&&\qquad \mathfrak{(4_i)},\\

1375:       &\text{ rate } r \rho_{LR}X_{T-\beta} & &\qquad(\text{split in the beneficial background})&&\qquad \mathfrak{(5_i)},\\

1376: %      \mathfrak{(6_i)} && \text{ rate }  (\ell\wedge r) \rho_{LR}(1-X) && \qquad(\text{split in $b$})\\

1377:     \end{aligned}

1378:   \end{equation}

1379:   at time $\beta$ for geometry (i) and Poisson processes $\mathcal

1380:   T_{\mathfrak {3_{ii}}}$, $\mathcal T_{\mathfrak {4_{ii}}}$, $\mathcal

1381:   T_{\mathfrak {5_{ii}}}$ with

1382:   \begin{equation}

1383:     \begin{aligned}

1384:       &\text{ rate } \ell \rho_{LS}(1-X_{T-\beta})& &\qquad(\text{rec. to the wild-type background})&&\qquad \qquad \quad\: \mathfrak{(3_{ii})},\\

1385:       & \text{ rate } r \rho_{SR}(1-X_{T-\beta})& &\qquad(\text{rec. to the wild-type background})&& \qquad \qquad \quad\: \mathfrak{(4_{ii})},\\

1386:       &\text{ rate } r (\rho_{LS}+\rho_{SR})X_{T-\beta}&

1387:       &\qquad(\text{split in the beneficial background})&& \quad

1388:       \qquad \qquad \:\mathfrak{(5_{ii})},

1389:     \end{aligned}

1390:   \end{equation}

1391:   at time $\beta$ for geometry (ii). We have combined recombinations

1392:   to the wild-type and splits in the wild-type background in case of

1393:   geometry $(i)$ since they happen with the same rates.

1394:

1395:   Additionally, let $W=(W_{{\mathfrak i},m})_{{\mathfrak i} =

1396:     {\mathfrak 1}, {\mathfrak 3}, {\mathfrak 4}, {\mathfrak 5},

1397:     m=1,2,\ldots}$ be a random array such that all $W_{{\mathfrak

1398:       i},m}$'s are independent, $W_{{\mathfrak 1},m}$ is uniformly

1399:   distributed on all pairs of $\dickm{\ell}\cup\dickm{r}$,

1400:   $W_{{\mathfrak 3},m}$ is uniformly distributed on $\dickm{\ell}$,

1401:   and $W_{{\mathfrak 4},m}$ and $W_{{\mathfrak 5},m}$ are uniformly

1402:   distributed on $\dickm{r}$, $m=1,2,\ldots$.

1403:

1404:   The set $\dickm{\ell}\cup\dickm{r}$ can be totally ordered, so we

1405:   may assume that every partition element in $\zeta\in\mathcal

1406:   P'_{\dickm{\ell}\cup \dickm{r}}$ has a smallest element. Recall that

1407:   we write $\zeta_{(j)}$ for the partition element containing

1408:   $j\in\dickm{\ell}\cup\dickm{r}$.

1409:

1410:   We abbreviate by $\mathcal T_{\mathfrak 3}$-$\mathcal T_{\mathfrak

1411:     5}$ the Poisson processes $\mathcal T_{\mathfrak {3_i}}$-$\mathcal

1412:   T_{\mathfrak {5_i}}$ for geometry (i) and the Poisson processes

1413:   $\mathcal T_{\mathfrak {3_{ii}}}$-$\mathcal T_{\mathfrak {5_{ii}}}$ for

1414:   geometry (ii). We next show that the distribution of

1415:   $\zeta^{\mathcal X}_T$ is the image measure of the tupel $(\mathcal

1416:   T_{\mathfrak 1}, \mathcal T_{\mathfrak 3}, \mathcal T_{\mathfrak 4},

1417:   \mathcal T_{\mathfrak 5}, W)$ under a map $\varphi$. Specifically,

1418:   the distribution of $\zeta_T^{\mathcal X}$ is uniquely determined by

1419:   the distribution of $(\mathcal T_{\mathfrak 1}, \mathcal

1420:   T_{\mathfrak 3}, \mathcal T_{\mathfrak 4}, \mathcal T_{\mathfrak 5},

1421:   W)$.

1422:

1423:   To define $\varphi$, consider a discrete set $\mathbf T_{\mathfrak

1424:     1}\subseteq[0,T]$ and finite sets $\mathbf T_{\mathfrak 3},\mathbf

1425:   T_{\mathfrak 4},\mathbf T_{\mathfrak 5}\subseteq [0,T]$ such that

1426:   $\mathbf T_{\mathfrak i_1}\cap \mathbf T_{\mathfrak i_2}=\varnothing$

1427:   for $\mathfrak{i_1} \neq \mathfrak{i_2}$ and set $\mathbf

1428:   T=\bigcup_{\mathfrak i} \mathbf T_{\mathfrak i}$. Furthermore $w =

1429:   (w_{{\mathfrak i},m})_{{\mathfrak i} = {\mathfrak 1}, {\mathfrak 3},

1430:     {\mathfrak 4}, {\mathfrak 5}, m=1,2,\ldots}$ such that for all

1431:   $m=1,2,\ldots$, $w_{{\mathfrak 1},m}$ is a pair in

1432:   $\dickm{\ell}\cup\dickm{r}$, $w_{{\mathfrak 3},m} \in \dickm{\ell}$

1433:   and $w_{{\mathfrak 4},m}, w_{{\mathfrak 5},m} \in\dickm{r}$. Given

1434:   $(\mathbf T_{\mathfrak 1},\mathbf T_{\mathfrak 3},\mathbf

1435:   T_{\mathfrak 4},\mathbf T_{\mathfrak 5},w)$ we generate a partition

1436:   by considering the events in $\mathbf T$ in decreasing order. Assume

1437:   $\zeta^{\mathcal X}_0=(\pi,\varnothing)$ and after the $(m-1)$st

1438:   event at time $\beta$ we obtain a partition $\zeta^{\mathcal

1439:     X}_\beta = (\zeta^B, \zeta^b)\in\mathcal

1440:   P'_{\dickm{\ell}\cup\dickm{r}}$ and the $m$th event in $\mathbf T$

1441:   to be realized happens at time $\beta'\in \mathbf T$.

1442:

1443:   Consider first the case $\beta'$ is the $m$th event is the

1444:   $m_{\mathfrak 1}$st event in $\beta'\in \mathbf T_{\mathfrak 1}$.

1445:   The pair $w_{{\mathfrak 1}, m_{\mathfrak 1}}=(j,k)$ gives a random

1446:   pair of loci.  If $\zeta_{(j)}, \zeta_{(k)}\in\zeta^B$ and if both,

1447:   $j$ and $k$, are the smallest elements of their partition elements,

1448:   coalesce these partition elements, i.e., make the transition

1449:   $$ \left(\zeta^B,\zeta^b\right) \longrightarrow \left((\zeta^B\setminus \{\zeta_{(j)},

1450:   \zeta_{(k)}\}) \cup \{ \zeta_{(j)}\cup \zeta_{(k)}\},\; \zeta^b\right).$$

1451:   Otherwise do nothing.

1452:

1453:   The next case to consider is that $\beta'$ is the $m_{\mathfrak

1454:     3}$rd event in $\mathbf T_{\mathfrak 3}$ and $w_{{\mathfrak 3},

1455:     m_{\mathfrak 3}}=j$ for some $j\in\dickm{\ell}$. If

1456:   $\zeta_{(j)}\in\zeta^B$ and if $j$ is the smallest element of

1457:   $\zeta_{(j)}\cap\dickm{\ell}$, change the partition element from

1458:   $\zeta^B$ to $\zeta^b$, i.e., make the transition

1459:   \begin{equation}\label{eq:trans3}

1460:     \left(\zeta^B,\zeta^b\right) \longrightarrow \left(\zeta^B\setminus \{\zeta_{(j)}\},\; \zeta^b\cup\{\zeta_{(j)} \}\right).

1461:   \end{equation}

1462:   Otherwise do nothing. The case $t\in \mathbf T_{\mathfrak 5}$ is similar and

1463:   is omitted.

1464:

1465:   If $\beta'$ is the $m_{\mathfrak 4}$th event in $\mathbf

1466:   T_{\mathfrak 4}$ and $w_{{\mathfrak 4}, m_{\mathfrak 4}}=j$ for

1467:   $j\in\dickm{r}$ the partition $\zeta$ again only changes if $j =

1468:   \min \zeta_{(j)}\cap\dickm{r}$. We distinguish two cases,

1469:   $\zeta_{(j)}\in\zeta^B$ and $\zeta_{(j)}\in\zeta^b$. In the former

1470:   case, split the $L$- and $R$-loci in the partition element in two

1471:   partition elements and bring all $R$-loci into the wild-type

1472:   background, i.e., make the transition

1473:   \begin{equation}\label{eq:trans4a} \left(\zeta^B,\zeta^b\right) \longrightarrow

1474:     \left((\zeta^B\setminus\{\zeta^B_{(j)}\}) \cup \{\zeta^B_{(j)}\cap \dickm

1475:     \ell\},\; \zeta^b\cup \{\zeta^B_{(j)}\cap\dickm r\}\right).\end{equation}

1476:   This corresponds to an event (4).

1477:   In the latter case split all $L$- and $R$-loci of $\zeta_{(j)}$ and leave them in

1478:   the wild-type background, i.e., make the transition

1479:   \begin{equation}\label{eq:trans4b}

1480:   \left(\zeta^B,\zeta^b\right) \longrightarrow \left(\zeta^B,�\;

1481:     (\zeta^b\setminus\{\zeta_{(j)}\}) \cup \{\zeta_{(j)} \cap

1482:     \dickm{\ell}, \zeta_{(j)}\cap\dickm{r}\}\right),

1483:     \end{equation}

1484:   which corresponds to an event $(6_i)$.

1485:   Recall that for geometry (ii) one $L$- and one $R$-locus cannot recombine to the wild-type background

1486:   together.  Hence partition elements in $\zeta^b$ are either subsets of

1487:   $\dickm{\ell}$ or of $\dickm{r}$ such that the last transition must not occur for this geometry.

1488:

1489:   By generating all events according to this procedure we end with a

1490:   partition $\zeta^{\mathcal X}_T$. Therefore we have defined the map

1491:   $\varphi: (\mathbf T_{\mathfrak 1},\mathbf T_{\mathfrak 3},\mathbf

1492:   T_{\mathfrak 4},\mathbf T_{\mathfrak 5},w) \mapsto \zeta^{\mathcal

1493:     X}_T$.

1494:   \begin{align}\label{eq:claim}

1495:     \text{\parbox{12cm}{\it The distribution of $\zeta^{\mathcal X}_T$ is

1496:         the image measure of $(\mathcal T_{\mathfrak 1}, \mathcal

1497:         T_{\mathfrak 3}, \mathcal T_{\mathfrak 4}, \mathcal

1498:         T_{\mathfrak 5}, W)$ under the map $\varphi$.}}

1499:   \end{align}

1500:   To see this, observe first, that there are only finitely many

1501:   recombination events (3), (4), (5) and ($6_i$). Almost surely, all

1502:   events in the Poisson processes occur at different times, so

1503:   $\varphi$ is defined on a set of probability 1.  By the above

1504:   construction, we obtain that two partition elements in $\zeta^B$

1505:   coalesce by event (1). The Poisson processes $\mathcal T_{\mathfrak

1506:     1}$, $\mathcal T_{\mathfrak 3}$, $\mathcal T_{\mathfrak 4}$,

1507:   $\mathcal T_{\mathfrak 5}$ produce exactly the recombination events

1508:   (3), (4), (5) and ($6_i$). Hence \eqref{eq:claim} is proved.

1509:   \smallskip

1510:

1511:   Given $w$, the random partition $\varphi(\mathbf T_{\mathfrak 1},

1512:   \mathbf T_{\mathfrak 3}, \mathbf T_{\mathfrak 4}, \mathbf

1513:   T_{\mathfrak 5}, w)$ only depends on the order of time points in

1514:   $\mathbf T_{\mathfrak 1}, \mathbf T_{\mathfrak 3}, \mathbf

1515:   T_{\mathfrak 4}, \mathbf T_{\mathfrak 5}$. There is another feature

1516:   we will need:

1517:   \begin{align}\label{eq:claim2}

1518:     \text{\parbox{12cm}{\it Let $\beta',\beta''$ be consecutive time

1519:         points in $\mathbf T$ with $\beta'\in \mathbf T_{\mathfrak 3},

1520:         \beta''\in \mathbf T_{\mathfrak 4}$.  Exchanging $\beta'$ and

1521:         $\beta''$ does not alter the random partition $\varphi(\mathbf

1522:         T_{\mathfrak 1}, \mathbf T_{\mathfrak 3}, \mathbf T_{\mathfrak

1523:           4}, \mathbf T_{\mathfrak 5}, w)$. Formally, if $\mathbf T

1524:         \cap (\beta',\beta'')=\varnothing$, $\mathbf T_{\mathfrak 3}' =

1525:         \mathbf T_{\mathfrak 3}\setminus \{\beta'\} \cup \{\beta''\}$

1526:         and $\mathbf T_{\mathfrak 4}' = \mathbf T_{\mathfrak

1527:           4}\setminus \{\beta''\} \cup \{\beta'\}$. Then $$

1528:         \varphi(\mathbf T_{\mathfrak 1}, \mathbf T_{\mathfrak 3}',

1529:         \mathbf T_{\mathfrak 4}', \mathbf T_{\mathfrak 5}, w) =

1530:         \varphi(\mathbf T_{\mathfrak 1}, \mathbf T_{\mathfrak 3},

1531:         \mathbf T_{\mathfrak 4}, \mathbf T_{\mathfrak 5}, w). $$}}

1532:   \end{align}

1533:   Assume $\beta'$ is the $m_{\mathfrak 3}$rd event in $\mathbf

1534:   T_{\mathfrak 3}$, $w_{{\mathfrak 3},m_{\mathfrak 3}}=j$ and

1535:   $\beta''$ is the $m_{\mathfrak 4}$th event in $\mathbf T_{\mathfrak

1536:     4}$ and $w_{{\mathfrak 4},m_{\mathfrak 4}}=m$. If $j$ and $k$ are

1537:   not in the same partition element for $\beta<\beta'$, the claim is

1538:   trivial as recombination events only make the partition finer.

1539:   Similarly, if $j>\min \zeta_{(j)}\cap\dickm{\ell}$ or $k>\min

1540:   \zeta_{(k)}\cap\dickm{r}$ only one transition occurs and the claim

1541:   follows. In the case $$\zeta_{(j)} = \zeta_{(k)},\quad j = \min

1542:   \zeta_{(j)}\cap\dickm{\ell},\quad k = \min

1543:   \zeta_{(j)}\cap\dickm{r}$$ two transitions occur if and only if

1544:   $\zeta_{(j)}=\zeta_{(k)} \in \zeta^B$. We illustrate this situation

1545:   in Figure \ref{smallFig}.

1546:

1547:   \begin{figure}

1548:     \hspace{3cm} (a) \hspace{7.5cm}(b)

1549:

1550:   \begin{center}

1551:     \includegraphics[width=7cm]{smallAnc1.ps}\hspace{1cm}

1552:     \includegraphics[width=7cm]{smallAnc2.ps}

1553:   \end{center}

1554:   \caption{\label{smallFig}(a) A partition element (a line) is hit by

1555:     an event taking both the $L$- and the $R$-locus to the wild-type background

1556:     at time $\beta'$. Afterwards, at time $\beta''$ the line is split

1557:     in the wild-type background. (b) Here, the $R$-locus is taken to

1558:     the wild-type background at time $\beta'$. Afterwords the

1559:     $L$-locus is taken to the same background at time $\beta''$. The

1560:     outcome is the same. The line moves from the beneficial to the

1561:     wild-type background and is split there.}

1562:   \end{figure}

1563:

1564:   Observe that the two-step transitions for the pair

1565:   $\big($\eqref{eq:trans3}, \eqref{eq:trans4b}$\big)$ (see Figure

1566:   \ref{smallFig}(a)) as well as for the pair

1567:   $\big($\eqref{eq:trans4a}, \eqref{eq:trans3}$\big)$ (see Figure

1568:   \ref{smallFig}(b)) are given by

1569:   \begin{equation*}

1570:   \left(\zeta^B,\zeta^b\right) \longrightarrow \left(\zeta^B \setminus \zeta_{(j)}, \;

1571:     \zeta^b  \cup \{\zeta_{(j)} \cap

1572:     \dickm{\ell},\zeta_{(j)}\cap\dickm{r}\}\right),

1573:   \end{equation*}

1574:   i.e, the partition element both moves from $\zeta^B$ to $\zeta^b$

1575:   and is split in its $L$- and $R$-loci. This proves

1576:   \eqref{eq:claim2}.

1577: \end{step}

1578:

1579: \begin{step}(Probable order of events)\\

1580: %  Next, we will show that up to a small error the the events given by

1581: %  $\mathcal \mathbf T_{\mathfrak 1}$, $\mathcal{\mathbf T}_{\mathfrak

1582: %    3}$, $\mathcal{\mathbf T}_{\mathfrak 4}$, $\mathcal{\mathbf

1583: %    T}_{\mathfrak 5}$ follow a certain order.

1584:   Define $\varepsilon:=\frac{(\log \alpha)^{2}}{\alpha}$ and

1585:   $T_{\varepsilon}:= \min\{t\geq 0: X_{t}= \varepsilon \}$. We will

1586:   show that (i) no coalescences, i.e., events $(\mathfrak 1)$, occur

1587:   in $[T_\varepsilon, T]$, (ii) no splits in the beneficial

1588:   background, i.e., events $(\mathfrak 5)$, occur during

1589:   $[0,T_\varepsilon]$ and (iii) splits in the beneficial background,

1590:   i.e., events $(\mathfrak 5)$ do not overlap with other recombination

1591:   events $(\mathfrak 3), (\mathfrak 4)$ with high probability. More

1592:   precisely, we claim

1593:   \begin{align}

1594:     \mathbb P[\mathcal{T}_{\mathfrak 1} \cap \left[ T_{\varepsilon}, T\right]

1595:     \neq \varnothing ] = \mathcal O\Big(

1596:     \frac{1}{(\log\alpha)^2}\Big),

1597:     \label{eq:step3a}

1598:     \\

1599:     \mathbb{P}\left[ \mathcal{T}_{\mathfrak 5} \cap \left[0,

1600:         T_{\varepsilon}\right] \neq \varnothing \right] =

1601:     \mathcal{O}\left(\frac{(\log\alpha)^{2}}{ \alpha}\right), \label{eq:step3b}\\

1602:     \mathbb{P} \left[ \min\mathcal{T}_{\mathfrak 5} <

1603:       \max(\mathcal{T}_{\mathfrak 3}\cup\mathcal{T}_{\mathfrak 4})

1604:     \right] =\mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}.

1605:     \right)\label{eq:step3c}

1606:   \end{align}

1607:

1608:   First, \eqref{eq:step3a} coincides with the assertion of Lemma 4.3

1609:   in \cite{EtheridgePfaffelhuberWakolbinger2006}. Second, for

1610:   \eqref{eq:step3b}, we have $X_{t}\leq \frac{(\log

1611:     \alpha)^{2}}{\alpha}$ for all $t \leq T_{\varepsilon}$. Hence we

1612:   get

1613:   \begin{eqnarray*}

1614:     \mathbb{P}\left[\mathcal{T}_{\mathfrak 5} \cap \left[0, T_{\varepsilon}\right] =

1615:       \varnothing \right]&=& \mathbb{E}\left[\exp\left(- r

1616:         \rho_{LR}\int_{0}^{T_{\varepsilon}}X_{s}ds \right) \right] \\

1617:     &\geq& \mathbb{E}\left[ \exp\left(- r \rho_{LR}\;\varepsilon\; T_{\varepsilon} \right)\right]

1618:     \geq  \exp\left(- r \rho_{LR}\;\varepsilon\; \mathbb{E}\left[ T\right] \right).

1619:   \end{eqnarray*}

1620:   By \eqref{eq:T} we see that $\mathbb{E}\left[ T\right]=\frac{2\log

1621:     \alpha}{\alpha}+\mathcal{O}\left(\frac{1}{\alpha}\right)$. By the

1622:   choice of $\varepsilon$, this finally gives

1623:      \begin{equation} \nonumber \mathbb{P}\left[\mathcal{T}_{\mathfrak 5} \cap

1624:       \left[0, T_{\varepsilon} \right] = \varnothing \right] \geq

1625:     1-\mathcal{O}\left(\frac{(\log\alpha)^{2}}{ \alpha}\right).

1626: \end{equation}

1627: Third, for \eqref{eq:step3c} we write, using $\rho = \mathcal O\left(

1628:   \frac{\alpha}{\log\alpha}\right)$, which might change from

1629: occurrence to occurrence,

1630: \begin{equation}\label{eq:green1}

1631: \begin{aligned}

1632:   \mathbb{P} \left[ \min\mathcal{T}_{\mathfrak 5}  \right. &<

1633:     \max(\mathcal{T}_{\mathfrak 3}\cup\mathcal{T}_{\mathfrak 4}) \left. \right]

1634:    = %\mathbb{E}   \Big[\mathbb{P}\left[\exists \; t \in \mathcal{T}_{\mathfrak 5}: t <

1635:    %\max(\mathcal{T}_{\mathfrak 3}\cup \mathcal{T}_{\mathfrak 4}) \big| \mathcal X \right]

1636: % \Big]

1637: \\

1638:   & = \mathbb{E}\left[ \int_{0}^{T} \mathbb{P}\left[

1639:       \mathcal{T}_{\mathfrak 5} \cap \left[0, t \right] \neq \varnothing

1640:       \big|\max(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}) \in dt, \mathcal

1641:      X \right] \cdot \mathbb{P}\left[\max(\mathcal{T}_{\mathfrak 3}\cup

1642:       \mathcal{T}_{4}) \in dt \big| \mathcal X \right] \right] \\&

1643:   \leq \mathbb{E} \left[ \int_{0}^{T} \left(1- \exp

1644:       \left(-\int_{0}^{t} \rho X_{s}ds\right) \right)\cdot \rho

1645:     (1-X_{t}) \exp\left(-\int_{t}^{T} \rho

1646:       (1-X_{s})ds\right) \right]\\

1647:   & \leq \rho^2 \cdot \mathbb{E}\left[\int_{0}^{T} (1-X_{t})

1648:     \int_{0}^{t} X_{s} dsdt\right].

1649: \end{aligned}

1650: \end{equation}

1651: The last term can be estimated using the Green function for the

1652: diffusion \eqref{eq:SDE}. As the right hand side of \eqref{eq:green1}

1653: coincides with the second line of (4.5) in

1654: \cite{EtheridgePfaffelhuberWakolbinger2006} we immediately obtain

1655: \eqref{eq:step3c}.

1656: \end{step}

1657:

1658: \medskip

1659:

1660: In the next three steps we will show that realizing the different

1661: splits independently from a fixed sample path $\mathcal{X}=

1662: (X_{t})_{0�\leq t \leq T}$ will cause only a small error. To see this

1663: we will establish a general result on mixed Poisson processes in Step

1664: 4 and apply it to the Poisson processes introduced in Step 2. The

1665: proof of Proposition \ref{PropFirst} will then be concluded by an

1666: application of these two steps.

1667:

1668: \begin{step}(General approximations of mixed Poisson processes) \\

1669:   Let $\{\Psi(\delta): \delta > 0\}$, $\{\Phi(\delta): \delta > 0 \}$

1670:   be families of random variables taking values in $\mathbb{R}^{+}$.

1671:   Assume that the expectations $\mathbb{E}[\Psi(\delta)]$,

1672:   $\mathbb{E}[\Phi(\delta)]$ are bounded in $\delta$ and

1673: \begin{equation} \label{var}

1674:  \mathbb{V}[\Psi(\delta)],\mathbb{V}[\Phi(\delta)] = \mathcal{O}\left(\delta \right)

1675: \end{equation}

1676: as $\delta \rightarrow 0$. Denote the distribution function of the

1677: Poisson distribution with parameter $\lambda$ by

1678: $\text{Poi}_{\lambda}(\cdot)$. We claim that for $k, l \in

1679: \mathbb{N}_0$

1680: \begin{eqnarray}\label{approx1}

1681:   \mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k) \right]&= &

1682:   \text{Poi}_{\mathbb{E}[\Psi(\delta)]}(k)+ \mathcal{O}\left(\delta \right)

1683:   \\ \label{approx2}

1684:   \mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k) \cdot

1685:     \text{Poi}_{\Phi(\delta)}(l) \right]&=&\mathbb{E}

1686:   \left[\text{Poi}_{\Psi(\delta)}(k)\right]\cdot

1687:   \mathbb{E}\left[\text{Poi}_{\Phi(\delta)}(l)\right] +

1688:   \mathcal{O}\left(\delta  \right)\end{eqnarray}

1689:

1690: Note that by a Taylor series approximation, for a random variable

1691: $\Psi$ in $\mathbb R_+$ with second moments and some $\tilde\Psi$

1692: satisfying $\left|\tilde \Psi - \mathbb{E}[\Psi]\right|\leq \left|\Psi -

1693: \mathbb{E}[\Psi]\right|$,

1694: \begin{eqnarray} \nonumber\left| \mathbb{E} \left[ e^{- \Psi}

1695:   \frac{\Psi^{k}}{k!} \right] - e^{- \mathbb{E}[\Psi

1696:     ]}\frac{\mathbb{E}[\Psi ]^{k}}{k!} \right| &=& \left|

1697:   \left[\frac{d^2}{d\Psi^2} \left(e^{- \Psi}

1698:   \frac{\Psi^{k}}{k!}\right)\right]_{\Psi=\mathbb{E}[\Psi]}\right|\cdot

1699:   \mathbb{E} \left[(\tilde \Psi - \mathbb{E}[\Psi])^{2} \right] \\

1700:   \nonumber &\leq&

1701:   e^{- \mathbb{E}\left[\Psi \right]} \left|\left\{ \frac{\mathbb{E}\left[\Psi \right]^{k-2}}{(k-2)!}- 2\frac{\mathbb{E}\left[\Psi \right]^{k-1}}{(k-1)!} + \frac{\mathbb{E}\left[\Psi \right]^{k}}{k!}\right\}\right|\cdot \mathbb{V}\left[\Psi\right] \\

1702:     &\leq& 2 \mathbb{V}\left[\Psi

1703:     \right] \label{Taylor}

1704: \end{eqnarray}

1705: where the terms in $\{ \ldots \}$ only show up if the denominators are

1706: non-zero and the last step follows from the fact that the Poisson

1707: weights in $\{ \ldots \}$ lie in $[0,1]$. As this holds for every

1708: $\Psi(\delta)$, (\ref{approx1}) follows immediately from (\ref{var}).

1709: Moreover, by a calculation similar to (\ref{Taylor}),

1710: \begin{eqnarray*}

1711:   \mathbb{V} \left[\text{Poi}_{\Psi(\delta)}(k) \right]

1712:   = \mathbb{E}\left[ e^{-2\Psi(\delta)} \frac{\Psi(\delta)^{2k}}{(k!)^{2}}\right] - \mathbb{E}\left[e^{-\Psi(\delta)}�\frac{\Psi(\delta)^{k}}{k!} \right]^{2}

1713:   =\mathcal O \big(\mathbb{V}\left[ \Psi(\delta) \right]\big)

1714:   = \mathcal{O}\left(\delta \right).

1715: \end{eqnarray*}

1716: Additionally, (\ref{approx2}) follows easily from the fact that

1717: \begin{eqnarray*}

1718:   &&\big| \mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k) \cdot \text{Poi}_{\Phi(\delta)}(l) \right]-\mathbb{E}\left[\text{Poi}_{\Psi(\delta)}(k)\right]\cdot \mathbb{E}\left[\text{Poi}_{\Phi(\delta)}(l)\right] \big| \\

1719:   &&\qquad \qquad  =  \big| \text{Cov}\left[\text{Poi}_{\Psi(\delta)}(k) \cdot \text{Poi}_{\Phi(\delta)}(l) \right] \big|

1720:   \leq \sqrt{

1721:     \mathbb{V}\left[ \text{Poi}_{\Psi(\delta)}(k)\right] \cdot \mathbb{V}\left[\text{Poi}_{\Phi(\delta)}(l) \right]} = \mathcal{O}\left( \delta \right)

1722: \end{eqnarray*}

1723: by the Cauchy-Schwarz inequality.

1724: \end{step}

1725:

1726: \begin{step}(Green function estimates)\\

1727:   Set $\rho=\gamma \frac{\alpha}{\log \alpha}$ where $\gamma =

1728:   \gamma_{LR}$ for geometry (i) and $\gamma= \gamma_{LS}+ \gamma_{SR}$

1729:   for geometry (ii). Using our approximations from Step 4 we will show

1730:   next

1731: \begin{eqnarray} \label{Poi1}

1732: \mathbb{P}\left[|\mathcal{T}_{\mathfrak 5}|= k \right] &=&\text{Poi}_{\mathbb{E}[r\rho\int_{0}^{T}X_{s}ds]}(k)+ \mathcal{O}\left( \frac{1}{(\log \alpha)^{2}}\right) \\  \nonumber

1733: \mathbb{P} \left[\big|(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}) \cap [T_{\varepsilon}, T]\big|= k,  |\mathcal{T}_{\mathfrak 5}|= l \right]

1734: &=&\mathbb{P}\left[\big|(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4})\cap [T_{\varepsilon}, T]\big|= k \right] \cdot \mathbb{P}\left[|\mathcal{T}_{\mathfrak 5}|= l\ \right] \\ \label{Poi2}

1735: &&  \qquad \qquad \qquad \qquad + \;\mathcal{O}\left(\frac{1}{(\log \alpha)^{2}} \right)

1736: \end{eqnarray}

1737: as $\alpha \rightarrow \infty$.

1738: To see this, set $\delta= \frac{1}{(\log \alpha)^{2}}$ and define

1739: \[

1740:  \Psi(\delta)= r\rho\int_{0}^{T} X_{s}ds, \qquad\qquad \Phi(\delta)= (\ell+r)\rho \int_{T_{\varepsilon}}^{T}(1-X_{s})ds

1741: \]

1742: Observe that for $k= 0,1,2, \ldots$

1743: \begin{align}

1744: \mathbb{P}\left[|\mathcal{T}_{\mathfrak 5}|= k\right]&= \mathbb{E}\left[ \text{Poi}_{\Psi(\delta)}(k)\right]\label{Poi3}\\

1745: \mathbb{P}\left[\big|(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4})\cap [T_{\varepsilon}, T]\big|= k \right]& = \mathbb{E}\left[\text{Poi}_{\Phi(\delta)}(k) \right]\nonumber

1746: \end{align}

1747: because $\mathcal{T}_{\mathfrak 3}$, $\mathcal{T}_{\mathfrak 4}$, $\mathcal{T}_{\mathfrak 5}$ are randomly time-changed Poisson processes. By (\ref{approx1}) and (\ref{approx2}), (\ref{Poi1}) and (\ref{Poi2}) follow once we have shown

1748: \begin{eqnarray}

1749: \mathbb{E}\left[\rho \int_{T_{\varepsilon}}^{T}(1-X_{s})ds \right] &\leq& \mathbb{E}\left[\rho \int_{0}^{T}X_{s}ds \right] \leq 2\gamma + \mathcal{O}\left(\frac{1}{\alpha} \right) \label{key1} \\

1750: \mathbb{V}\left[\rho \int_{T_{\varepsilon}}^{T}(1-X_{s})ds \right] &\leq& \mathbb{V}\left[\rho \int_{0}^{T}X_{s}ds \right] = \mathcal{O}\left(\frac{1}{(\log\alpha)^{2}} \right) \label{key2}

1751: \end{eqnarray}

1752: as $\alpha \rightarrow \infty$.\\

1753: First observe that $\left(X_{t} \right)_{0 \leq t \leq T}$ has the

1754: same distribution as $(1-X_{T-t})_{0 \leq t\leq T}$ by

1755: time-reversibility (see e.g. \cite{KarlinTaylor1981,

1756:   Griffiths2003}). Hence the inequalities on the left hand side of

1757: (\ref{key1}) and (\ref{key2}) follow. Second, we verify the

1758: expressions on the right hand side of (\ref{key1}) and (\ref{key2}) by

1759: an application of the Green function $G(.,.)$ of the diffusion

1760: $(X_{t})_{0 \leq t\leq T}$.  This function satisfies

1761: \[

1762: \mathbb E_x\left[ \int_0^T g(X_t) dt\right] = \int_0^1 G(x,y) g(y) dy

1763: \]

1764: where $\mathbb E_x[.]$ refers to the path $(X_t)_{0\leq t\leq T}$ with

1765: $X_0=x$ and $\mathbb E[.] := \mathbb E_0[.]$. The Green function is

1766: given by

1767: \[

1768: G(x, y)=

1769: \begin{cases}

1770:   \frac{\left(1-e^{-\alpha(1-y)}\right) \left(1-e^{-\alpha y} \right)}{\alpha y \left(1-y \right)\left(1-e^{-\alpha} \right)} \qquad &\text{ if } x \leq y \\

1771:   \frac{\left(e^{-\alpha x} -e^{-\alpha} \right)\left(e^{\alpha y}-1

1772:     \right) \left(1-e^{-\alpha y} \right)} {\alpha y \left(1-y \right)

1773:     \left(1-e^{-\alpha} \right) \left(1-e^{-\alpha x} \right)} \qquad

1774:   &\text{ if } x\geq y,

1775: \end{cases}

1776: \]

1777: see e.g. \cite{KarlinTaylor1981,

1778:   EtheridgePfaffelhuberWakolbinger2006}. More generally, $G(.,.)$

1779: satisfies

1780: \begin{eqnarray*}

1781: &&\mathbb{E}_{x}\left[\int_{0}^{T} \int_{t_{1}}^{T} \ldots \int_{t_{k-1}}^{T} g_{k}(X_{t_{k}})\ldots g_{1}(X_{t_{1}})dt_{k}\ldots dt_{1} \right] \\

1782:   &&\qquad  \qquad \qquad \qquad

1783:   = \int_{0}^{1} \ldots \int_{0}^{1} G(x, x_{1}) \ldots G(x_{k-1}, x_{k}) g_{1}(x_{1}) \ldots g_{k}(x_{k})dx_{k}\ldots dx_{1}

1784: \end{eqnarray*}

1785: for all $k=1, 2, \ldots$ which can be proved by induction. We may thus

1786: write, because $G(x,y) = G(0,y)$ for $y\geq x$,

1787: \begin{align*}

1788:   \mathbb{V}\Big[  \rho\int_{0}^{T}X_{s}ds\Big] &=

1789:    \rho^2 \left( 2 \int_0^1 \int_0^1 G(0,x) G(x,y) xy dy dx - 2\int_0^1 \int_x^1 G(0,x) G(0,y) xy dy dx\right) \\

1790:   & = 2\rho^2 \int_0^1 \int_0^x G(0,x) G(x,y) xy dy dx

1791:   \: \leq \: 2 \rho^2 \int_0^1 \int_0^x G(0,x) G(x,y) dy dx \\

1792:   & = 2\rho^2 \mathbb V[T] = \mathcal{O}\left(

1793:     \frac{1}{(\log\alpha)^{2}}\right)

1794:   % = \rho^{2}\mathbb{V}\left[ \int_{0}^{T}X_{s}ds\right]  = 2\gamma^{2} \int_{0}^{1} \int_{0}^{\xi} G(0, \xi)G(\xi, \eta)\xi \; \eta \; d\eta d\xi \\

1795:   % = \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1}

1796:   % \int_{0}^{\xi} \frac{\left(

1797:   %     1-e^{-\alpha(1-\xi)}\right)\left(1-e^{-\alpha\xi}

1798:   %   \right)}{(1-\xi)} \; \frac{\left(e^{-\alpha\xi}- e^{-\alpha}

1799:   %   \right)\left(e^{\alpha\eta}-1 \right) \left(1-e^{-\alpha\eta}

1800:   %   \right)}{(1-\eta)\left(1-e^{-\alpha\xi}\right)}

1801:   % d\eta d\xi \\

1802:   % \leq

1803:   % \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1} \int_{0}^{\xi} \frac{e^{-\alpha\xi}\left(e^{\alpha\xi}-1\right)}{(1-\xi)} \frac{1}{(1-\eta)}d\eta d\xi \\

1804:   % =

1805:   % \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1}\frac{e^{-\alpha\xi}\ln(1-\xi)}{(1-\xi)}d\xi \\

1806:   % = \frac{2\gamma^{2}} {\alpha^{2}(1-e^{-\alpha})^{2}} \int_{0}^{1}

1807:   % e^{-\alpha\xi}d\xi \leq

1808: \end{align*}

1809: by \eqref{eq:T} which gives (\ref{key2}).

1810: \end{step}

1811:

1812: \begin{step}(Approximate independence)\\

1813:   % We are going to use equations (\ref{approx1}) and (\ref{approx2})

1814:   % derived in Step 4 to conclude the proof of the proposition.

1815:   As we have seen in (\ref{eq:claim}) the distribution of

1816:   $\zeta^{\mathcal X}_T$ is determined by the distribution of the

1817:   order of events in the Poisson processes $\mathcal T_{\mathfrak 1}$,

1818:   $\mathcal T_{\mathfrak 3}$, $\mathcal T_{\mathfrak 4}$ and $\mathcal

1819:   T_{\mathfrak 5}$.  The calculations in Step 3 allow us to make the

1820:   assumptions

1821:   \[

1822:   \mathcal{T}_{\mathfrak 1}\cap \left[T_{\varepsilon}, T \right] =

1823:   \varnothing, \qquad \mathcal{T}_{\mathfrak 5}\cap \left[0,

1824:     T_{\varepsilon} \right] = \varnothing,\qquad \max

1825:   (\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}) < \min

1826:   \mathcal{T}_{5}

1827:   \]

1828:   on the ordering of events in these Poisson processes as these events

1829:   have probability $1-\mathcal O\big( (\log\alpha)^{-2}\big)$.

1830:   Furthermore, we know from \eqref{eq:claim2} that events in $\mathcal

1831:   T_{\mathfrak 3}$ and $\mathcal T_{\mathfrak 4}$ may be exchanged

1832:   without changing the distribution of $\zeta^{\mathcal X}_T$. Hence,

1833:   the distribution of $\zeta^{\mathcal X}_T$ is determined once the

1834:   joint distribution of

1835:   $$\mathcal{T}_{\mathfrak 1} \cap \left[ 0, T_{\varepsilon}\right], \qquad

1836:   \mathcal{T}_{\mathfrak 3}\cap \left[ 0, T_{\varepsilon}\right],

1837:   \qquad \mathcal{T}_{\mathfrak 4}\cap [0,T_\varepsilon], \qquad

1838:   \left|(\mathcal{T}_{\mathfrak 3}\cup \mathcal{T}_{\mathfrak 4}) \cap

1839:     \left[ T_{\varepsilon}, T\right] \right|,\qquad

1840:   \left|\mathcal{T}_{\mathfrak 5}\right| $$ is known. To approximate

1841:   the joint distribution of these objects, define

1842: \[

1843: \mathcal{T}_{\mathfrak i}^{\varepsilon}:= \mathcal{T}_{\mathfrak i}

1844: \cap \left[0, T_{\varepsilon} \right]\text{, } \mathfrak i=

1845: \mathfrak{1,3,4} \quad \text{and} \quad K_{\mathfrak {3,4}}:=

1846: \big|\left(\mathcal{T}_{\mathfrak 3} \cup \mathcal{T}_{\mathfrak 4}

1847: \right) \cap \left[T_{\varepsilon}, T\right] \big| \text{,} \quad K_{

1848:   \mathfrak 5}:=\big| \mathcal{T}_{\mathfrak 5} \big|.

1849: \]

1850: %The distribution conditioned on a frequency path $\mathcal{X}$ is

1851: %denoted by $\mathbb{P}_{\mathcal{X}}$.

1852: We will prove

1853: %for the joint distribution under $\mathbb{P}$

1854: \begin{equation} \label{independence} \mathbb P \circ

1855:   \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},

1856:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak

1857:       4}^{\varepsilon}, K_{\mathfrak {3,4}}, K_{\mathfrak 5}

1858:   \right)^{-1} = \mathbb P\circ\left(\mathcal{T}_{\mathfrak

1859:       1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},

1860:     \mathcal{T}_{\mathfrak 4}^{\varepsilon}, K_{\mathfrak{3,4}}

1861:   \right)^{-1} \otimes \;

1862:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{0}^{T}X_{s}ds \right]} +

1863:   \mathcal{O}\left(\frac{1}{\left(\log \alpha \right)^{2}} \right)

1864: \end{equation}

1865: where $\mathbb P\circ X^{-1}$ is the image measure of the random

1866: variable $X$ under $\mathbb P$ and the Landau symbol in this context

1867: gives the order in variation distance of the distributions.

1868:

1869: Once \eqref{independence} is shown we conclude that $K_{\mathfrak 5}$

1870: is approximately independent of all other events. Furthermore, its

1871: distribution may be interpreted as the sum of $r$ Poisson

1872: distributions with parameter $\mathbb{E}\left[\rho\int_{0}^{T}X_{s}ds

1873: \right]$. These determine the number of split events on all partition

1874: elements $\xi\in\pi$ with $\xi\cap\dickm r\neq \varnothing$. A

1875: partition element splits, if it is hit by at least one split event.

1876: The probability for a split of a partition element is thus given,

1877: using \eqref{Poi1} and \eqref{Poi3} for $k=0$, by

1878: \[

1879: 1-\exp\Big( - \rho\cdot \mathbb{E}\left[\int_{0}^{T}X_{s}ds

1880: \right]\Big) = 1 - \mathbb E\Big[ \exp\Big( - \rho\int_0^T X_s

1881: ds\Big)\Big] + \mathcal O\Big( \frac{1}{(\log\alpha)^2}\Big).

1882: \]

1883: with $\rho=\rho_{LR}$ for geometry (i) and $\rho=\rho_{LS} +

1884: \rho_{SR}$ for geometry (ii). Observe that $\Gamma_\pi$ is determined

1885: by the distribution of $\left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},

1886:   \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak

1887:     4}^{\varepsilon}, K_{\mathfrak {3,4}}\right)$ if $K_{\mathfrak 5}$

1888: is known. The random partition $\Delta_\pi$ is determined by the

1889: distribution of $\left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},

1890:   \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak

1891:     4}^{\varepsilon}, K_{\mathfrak {3,4}}\right)$ independently of

1892: $K_{\mathfrak 5}$. So, Proposition \ref{PropFirst} is a consequence of

1893: the approximate independence of $\left(\mathcal{T}_{\mathfrak

1894:     1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},

1895:   \mathcal{T}_{\mathfrak 4}^{\varepsilon}, K_{\mathfrak {3,4}}\right)$

1896: and $K_{\mathfrak 5}$ given by \eqref{independence}.

1897:

1898: \smallskip

1899:

1900: We write

1901: \begin{align*}

1902:   \mathbb P \circ \big(\mathcal{T}_{\mathfrak 1}^{\varepsilon}, &

1903:   \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak

1904:     4}^{\varepsilon}, K_{\mathfrak{3,4}}, K_{\mathfrak{5}} \big)^{-1}

1905:   = \int \mathbb{P}_{\mathcal X} \circ \left(\mathcal{T}_{\mathfrak

1906:       1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},

1907:     \mathcal{T}_{\mathfrak 4}^{\varepsilon}, K_{\mathfrak {3,4}},

1908:     K_{\mathfrak{5}} \right)^{-1}\;\mathbb{P}\left[ d\mathcal{X}

1909:   \right] \\ & = \int \mathbb{P}_{(X_{t})_{0\leq t \leq T^{\varepsilon}}}

1910:   \circ \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},

1911:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak

1912:       4}^{\varepsilon} \right)^{-1} \mathbb{P}\left[d(X_{t})_{0\leq t

1913:       \leq T^{\varepsilon}}\right]\\ & \qquad\qquad \otimes \int

1914:   \mathbb{P}_{(X_{t})_{T^{\varepsilon}\leq t \leq T}}\circ \left(K_{\mathfrak

1915:       {3,4}}, K_{\mathfrak 5} \right)^{-1}

1916:   \mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq t \leq T}\right] +

1917:   \mathcal{O}\left(\frac{(\log \alpha)^{2}}{\alpha} \right)

1918: \end{align*}

1919: where we have used the fact that $T_{\varepsilon}$ is a stopping time and

1920: the strong Markov property of the process $\mathcal X$. Note that by

1921: \eqref{eq:step3b} we may assume $K_{ \mathfrak

1922:   5}=\big|\mathcal{T}_{\mathfrak 5} \cap \left[T_{\varepsilon}, T \right]

1923: \big|$ which gives an error of $\mathcal{O}\left(\frac{(\log

1924:     \alpha)^{2}}{\alpha} \right)$ in probability. From Steps 4 and 5

1925: we get

1926: \begin{multline*}

1927:   \int \mathbb{P}_{(X_{t})_{T^{\varepsilon}\leq t \leq T}}\circ

1928:   \left(K_{\mathfrak{3,4}}, K_{\mathfrak{5}} \right)^{-1}

1929:   \mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq t \leq T}\right]

1930:   \\

1931:   =

1932:   \text{Poi}_{\mathbb{E}\left[(\ell+r)\rho\int_{T_{\varepsilon}}^{T}\left(1-X_{s}

1933:       \right)ds \right]}\; \otimes\;

1934:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{T_{\varepsilon}}^{T}X_{s}ds

1935:     \right]} + \mathcal{O}\left(\frac{1}{(\log\alpha)^{2}}\right)

1936: \end{multline*}

1937: Rewriting

1938: $$\text{Poi}_{\mathbb{E}\left[(\ell+r)\rho\int_{T_{\varepsilon}}^{T}\left(1-X_{s}

1939:     \right)ds \right]} = \int \mathbb{P}_{(X_{t})_{T^{\varepsilon}}\leq t

1940:   \leq T}\circ\big(K_{

1941:   \mathfrak{3,4}}\big)^{-1}\mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq

1942:     t \leq T}\right] ,$$ and using the strong Markov property of

1943: $\mathcal X$ a second time we get

1944: \begin{align*}

1945:   \mathbb P\circ \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},

1946:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak

1947:       4}^{\varepsilon}, K_{\mathfrak{3,4}}, K_{\mathfrak{5}}

1948:   \right)^{-1} &= \int \mathbb{P}_{(X_{t})_{0\leq t \leq

1949:       T^{\varepsilon}}}\circ \left(\mathcal{T}_{\mathfrak

1950:       1}^{\varepsilon}, \mathcal{T}_{\mathfrak 3}^{\varepsilon},

1951:     \mathcal{T}_{\mathfrak 4}^{\varepsilon}

1952:   \right)^{-1}\mathbb{P}\left[d(X_{t})_{0\leq t \leq

1953:       T^{\varepsilon}}\right]

1954:   \\

1955:   & \qquad \otimes \int \mathbb{P}_{(X_{t})_{T^{\varepsilon}\leq t \leq

1956:       T}} \circ \left(K_{\mathfrak{3,4}}\right)^{-1}

1957:   \mathbb{P}\left[d(X_{t})_{T^{\varepsilon}\leq t

1958:       \leq T}\right] \\

1959:   & \qquad \qquad \qquad \otimes \quad

1960:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{0}^{T}X_{s}ds \right]} +

1961:   \mathcal{O}\left(\frac{1}{(\log\alpha)^{2}}\right)\\ &= \mathbb P

1962:   \circ \left(\mathcal{T}_{\mathfrak 1}^{\varepsilon},

1963:     \mathcal{T}_{\mathfrak 3}^{\varepsilon}, \mathcal{T}_{\mathfrak

1964:       4}^{\varepsilon}, K_{\mathfrak {3,4}} \right)^{-1} \otimes

1965:   \text{Poi}_{\mathbb{E}\left[r\rho\int_{0}^{T}X_{s}ds \right]} +

1966:   \mathcal{O}\left(\frac{1}{\left(\log \alpha \right)^{2}} \right)

1967: \end{align*}

1968: and we are done.

1969: \end{step}

1970: \end{proof}

1971:

1972: By Proposition \ref{PropFirst}, events $(5)$ can be generated

1973: independently of the frequency path and of all other events.  The

1974: rates of the recombination events $(3), (4), (6_i)$ at time $\beta$

1975: are all proportional to $(1-X_{T-\beta})$.  This is reminiscent of the

1976: case of only one neutral locus, studied in

1977: \cite{EtheridgePfaffelhuberWakolbinger2006}, where a line carrying one

1978: neutral locus in recombination distance $\rho$ recombines to the

1979: wild-type background with rate $\rho(1-X_{T-\beta})$. As a consequence we can use

1980: the same techniques used there, especially their Proposition 3.6.

1981: which states that a marked Yule tree approximately gives the same

1982: partition as the structured coalescent.

1983:

1984: \begin{definition}\label{def:4}

1985: %  For $\pi\in \mathcal P_{\dickm \ell\,\cup\,\dickm r}$ d

1986:   Define a $\mathcal P'_{\dickm \ell\,\cup\,\dickm r}$-valued random

1987:   variable $\Xi_\pi$ as follows: For all partition elements

1988:   $\xi\in\pi$ which $\xi\cap\dickm\ell \neq \varnothing, \xi\cap\dickm

1989:   r\neq\varnothing$, i.e., $\xi$ carries both left and right loci,

1990:   split the partition element in its left and right loci,

1991:   $\xi\cap\dickm\ell, \xi\cap\dickm r$ according to \eqref{Split}.

1992:   Denote

1993:   %the random number of split lines by $S$ and

1994:   the resulting partition by $\pi'$.

1995:

1996:   Let $\mathbf Y$ be an infinite Yule tree with branching rate

1997:   $\alpha$. Moreover, consider the random tree $\mathbf Y_{|\pi'|}$

1998:   which arises by sampling $|\pi'|$ lines from $\mathbf Y$ at

1999:   infinity. Identify each of the $|\pi'|$ partition elements of $\pi'$

2000:   with one sampled line. Between the root of the Yule tree $\mathbf Y$

2001:   starts and the time it has $\lfloor 2\alpha \rfloor$ lines, mark all

2002:   lines by the following procedure:

2003:

2004:   For geometry (i), the tree is marked by Poisson processes with rates

2005:   $\rho_{SL}$ and $\rho_{LR}$. These marks are relabelled such that

2006:   each branch is hit by at most one mark. Call the corresponding marks

2007:   $SL$-, $LR$- and $SLR$-marks. The following rules are applied:

2008:   \begin{enumerate}

2009:   \item[(a)] If the Poisson process with rate $\rho_{SL}$ puts the first

2010:     (backward in time) mark at time $t$ from the root, start a Poisson

2011:     process with rate $\rho_{LR}$ and run it for time $t$. If an event

2012:     occurs during this time, the branch is marked by an $SLR$-mark,

2013:     otherwise by an $SL$-mark.

2014:   \item[(b)] If the Poisson process with rate $\rho_{LR}$ puts the

2015:     first (backward in time) mark distinguish the following two cases:

2016:     if the Poisson process with rate $\rho_{SL}$ hits the branch as

2017:     well, it obtains an $SLR$-mark. Otherwise, it obtains an

2018:     $LR$-mark.

2019:   \end{enumerate}

2020:

2021:   For geometry (ii), mark the tree by two independent Poisson

2022:   processes with rates $\rho_{LS}$ and $\rho_{SR}$. If a branch is hit

2023:   by one or more events of the Poisson process with rate $\rho_{LS}$,

2024:   it gets an $LS$-mark. If it is hit by one or more events with rate

2025:   $\rho_{SR}$, it additionally gets an $SR$-mark.

2026:

2027:   The result of this procedure is a marked Yule tree $\mathbf

2028:   Y_{|\pi'|}$. Given $\pi'$ and the marked Yule tree $\mathbf

2029:   Y_{|\pi'|}$ we use the same equivalence relation as given in

2030:   \eqref{eq:equivGeoi} and \eqref{eq:equivGeoii} to define

2031:   $\pi''\in\mathbf P'_{\dickm\ell \cup \dickm r}$. Furthermore, we

2032:   use \eqref{eq:uni} and \eqref{eq:unii} to define the random

2033:   partition

2034:   $$ \Xi_\pi:= ( \{\pi_f''\}, \pi''\setminus \{\pi_f''\}).$$

2035: \end{definition}

2036:

2037: \begin{example} The two cases in which an $SLR$-mark occurs for

2038:   geometry (i) are illustrated in Figure \ref{smallYule}. Consider the

2039:   line in the sample Yule tree which can be identified with the

2040:   partition element $\{j,k\}$ where $j\in\dickm \ell$ and $k\in\dickm

2041:   r$. Consider case (a) first, shown on the left side of Figure

2042:   \ref{smallYule}: The $SL$-mark hitting a branch in $\mathbf

2043:   Y_{|\pi'|}$ leads to a jump of the partition element into the

2044:   wild-type background. We now have to consider the additional Poisson

2045:   process at rate $\rho_{LR}$ to determine whether or not the line

2046:   will split within the wild-type background. If an event with rate

2047:   $\rho_{LR}$ occurs, the $L$- is separated from the $R$-locus on this

2048:   line. Case (b) is illustrated on the right side of Figure

2049:   \ref{smallYule}. Here, the line which refers to the partition

2050:   element $\{j,k\}$ is first (backward in time) hit by an $LR$-mark,

2051:   bringing the $R$-locus into the wild-type background, and after that

2052:   an additional $SL$-mark hits the same branch, which additionally

2053:   brings the $L$-locus into the wild-type background.

2054: %  Now looking backwards in time again this means that first the

2055: %  $R$-locus, i.e.,  $\{j\}$, is carried into the wild-type background

2056: %  by a recombination event between $L$ and $R$.  Before the partition

2057: %  element $\{k\}$ carrying the $L$-locus which remains behind may

2058: %  coalesce with any other partition element in the beneficial

2059: %  background, a recombination event between $S$ and $L$ leads to the

2060: %  jump of $\{k\}$ into the wild-type background.

2061:   In both cases the loci $j$ and $k$ end up separated in the wild-type

2062:   background. This is summarized in Definition \ref{def:4} by an $SLR$-mark.

2063: \end{example}

2064:

2065: \begin{figure}

2066: \hspace{3cm} (a) \hspace{7cm}(b)

2067:

2068: \begin{center}

2069: \includegraphics[width=7cm]{smallYule1.ps} \hspace{0.5cm}

2070: \includegraphics[width=7cm]{smallYule2.ps}

2071: %\includegraphics[width=5cm]{smallYule3.ps}

2072: \end{center}

2073: \caption{\label{smallYule}

2074: There are two possibilities how an $SLR$-mark may occur. Here, $SL$

2075: and $LR$ refer to points in the Poisson processes with rates

2076: $\rho_{SL}$ and $\rho_{LR}$.  See text for further explanation.

2077: %(a) The line is

2078: %  first hit by a Poisson process with rate $\rho_{SL}$. The

2079: %  recombining line is split by a Poisson process with rate $\rho_{LR}$

2080: %  (b) The line is hit by the Poisson process with rate $\rho_{LR}$

2081: %  first and only afterwards is hit by the Poisson process with rate

2082: %  $\rho_{SL}$.

2083: }

2084: \end{figure}

2085:

2086:

2087: \noindent

2088: As a next step in the Proof of Theorem \ref{T} we now show that

2089: $\Delta_\pi \approx \Xi_\pi$.

2090:

2091: \begin{proposition}\label{PropSecond}

2092:   Let $\pi\in\mathcal P'_{\dickm \ell\cup\dickm r}$ and $\Delta_\pi$

2093:   and $\Xi_\pi$ be as in Definitions \ref{def:3} and \ref{def:4}.

2094:   Then,

2095:   $$ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}}

2096:   \big|\mathbb P[\Delta_\pi = \xi] - \mathbb P[\Xi_\pi=\xi] \big|

2097:   = \mathcal O\Big( \frac{1}{(\log\alpha)^2}\Big). $$

2098: \end{proposition}

2099:

2100: \begin{proof}

2101:   As the mechanism to generate splits in the beneficial background is

2102:   the same for both random partitions, $\Delta_\pi$ and $\Xi_\pi$,

2103:   we concentrate on all other events.

2104:

2105:   The proof follows along the lines of the Yule approximation in the

2106:   case of only one neutral locus, given in \cite[Definition 3.3. and

2107:   Section 4.3.]{EtheridgePfaffelhuberWakolbinger2006}. The crucial

2108:   observation is that by a random time change $t\mapsto\tau$ given by

2109:   $d\tau = (1-X_t)dt$ the frequency path $\mathcal X$, given by

2110:   \eqref{eq:SDE}, is taken to the solution $\mathcal Z = (Z_t)_{t\geq

2111:     0}$ of

2112:   \begin{align} \label{eq:timechange} dZ = \alpha Z \coth(\alpha Z)dt

2113:     + \sqrt{Z} dW

2114:   \end{align}

2115:   with a standard Brownian motion $W$ and $Z_0=0$. This is an

2116:   $\alpha$-supercritical Feller branching process conditioned on

2117:   non-extinction.  It was shown in \cite{EvansOConnell1994} and

2118:   \cite{OConnell1993} that the genealogy of the $\alpha$-supercritical

2119:   branching process is a Yule process with branching rate $\alpha$.

2120:   Observe that the time-transformation $t\mapsto\tau$ only works until

2121:   the supercritical branching process has reached frequency 1. From

2122:   4.5(b) in \cite{EtheridgePfaffelhuberWakolbinger2006} we see that at

2123:   this time the number of lines in the Yule process is Poisson

2124:   distributed with mean $2\alpha$. (The additional factor of 2 arises

2125:   because we made the assumption that the individual offspring

2126:   variance in the underlying Cannings model is 1 rather than 2. See

2127:   also \cite{PfaffelhuberHauboldWakolbinger2006}.) However, as typical

2128:   deviations in this Poisson distribution are of the order

2129:   $\sqrt\alpha\ll\alpha$ we may instead assume that the Yule process

2130:   has $\lfloor 2\alpha \rfloor$ lines.  This was made precise in the

2131:   proof of Proposition 4.7. in

2132:   \cite{EtheridgePfaffelhuberWakolbinger2006}.

2133:

2134:   Moreover, for geometries (i) and (ii) the rates in the process $\xi$

2135:   change at time $\beta$ from $\rho_{SL}(1-X_{T-\beta})$,

2136:   $\rho_{LR}(1-X_{T-\beta})$ to $\rho_{SL}$, $\rho_{LR}$ and from

2137:   $\rho_{LS}(1-X_{T-\beta})$, $\rho_{SR}(1-X_{T-\beta})$ to

2138:   $\rho_{LS}$, $\rho_{SR}$, respectively. Especially, the time-changed

2139:   rates are constant.  Under the random time change the coalescence

2140:   rate (1) changes at time $\beta$ from $1/X_{T-\beta}$ to

2141:   $1/(X_{T-\beta}(1-X_{T-\beta}))$. However, it was shown in

2142:   \cite[Proposition 4.2.]{EtheridgePfaffelhuberWakolbinger2006} that

2143:   the change of these rates can only produce an error in probability

2144:   of order $\mathcal O\big((\log\alpha)^{-2}\big)$. This fact was used

2145:   in \cite[Lemma 4.5., Proposition

2146:   4.7.]{EtheridgePfaffelhuberWakolbinger2006} to prove that the marked

2147:   Yule process gives an accurate approximation in the case for one

2148:   neutral locus. However, this result carries over to the present

2149:   situation because all Poisson processes along the Yule process have

2150:   constant rates.

2151:

2152:   It remains to check whether the equivalence relation $\Xi_\pi$

2153:   coincides with $\Delta_\pi$ given the change in the coalescence rate

2154:   has no effect. First of all, realize the splits in the beneficial

2155:   background according to Definition \ref{def:3}. Then, take $j,k\in

2156:   \dickm\ell \cup \dickm r$ and trace their partition elements

2157:   backwards up to time $t=0, \beta=T$. We only consider geometry (i)

2158:   and $j\in\dickm\ell, k\in\dickm r$, since the other cases

2159:   $j,k\in\dickm\ell$ and $j,k\in\dickm r$ and all cases for geometry

2160:   (ii) are similar. If we consider the process $\eta^{\mathcal X}$

2161:   from Definition \ref{def:3} without any recombination events we

2162:   would obtain a tree $\Y$ for the genealogy relating $j$ and $k$.

2163:   However, recombination events may cause the $L$-locus $j$ and the

2164:   $R$-locus $k$ to end up in different partition element in the random

2165:   partitions $\Delta_\pi$. This will be the case if and only if one of

2166:   the following events occurs in the process $\eta^{\mathcal X}$:

2167: \begin{itemize}

2168: \item[(a)] a recombination event $(3_i)$ with rate

2169:   $\rho_{SL}\left(1-X_{}\right)$ on

2170:   $\!\!\!\!\!\!\!\!\Yup\!\!\!\!\!\!\!\!$, which takes either $j$ or

2171:   $k$ to the wild-type background before coalescence,

2172: \item[(b)] a recombination event $(4_i)$ with rate

2173:   $\rho_{LR}\left(1-X_{}\right)$ on

2174:   $\!\!\!\!\!\!\!\!\Yupri\!\!\!\!\!\!\!\!$, which takes $k$ to the

2175:   wild-type background before coalescence with $j$,

2176: \item[(c)] an event $(4_i)$ with rate $\rho_{LR}\left(1-X_{}\right)$ on

2177:   $\!\!\!\!\!\!\!\!\Ybottom\!\!\!\!\!\!\!\!$ before (backward in time)

2178:   an event with rate $\rho_{SL}\left(1-X_{}\right)$ happens on that

2179:   branch; in this case $j$ and $k$ have coalesced, but a recombination

2180:   event brings $k$ to the wild-type background without $j$,

2181: \item[(d)] an event $(3_i)$ with rate $\rho_{SL}\left(1-X_{}\right)$

2182:   on $\!\!\!\!\!\!\!\!\Ybottom\!\!\!\!\!\!\!\!$ before (backward in

2183:   time) an event with rate $\rho_{LR}\left(1-X_{}\right)$ happens on

2184:   that branch, which brings both $j$ and $k$ to the wild-type

2185:   background.  Here, an event $(6_i)$ at rate $\rho_{LR}(1-X_{})$ happens

2186:   which splits $j$ and $k$ in the wild-type background.

2187: \end{itemize}

2188: The trees in events (a)-(d) refer to trees generated by

2189: $\eta^{\mathcal X}$. By the random time change and our assumption that

2190: the change in coalescence rate does not alter random partitions we can

2191: as well take trees generated by the Yule process and change the rates

2192: $\rho_{SL}(1-X)$ and $\rho_{LR}(1-X)$ to $\rho_{SL}$ and $\rho_{LR}$.

2193: Hence we are dealing with a Yule tree with branching rates $\alpha$

2194: marked by Poisson processes with rates $\rho_{SL}$ and $\rho_{LR}$

2195: which is the exact situation of Definition \ref{def:4}. Using the

2196: definition of the $SL$-, $LR$- and $SLR$-marks, we note that

2197: \begin{itemize}

2198: \item (a) produces either an $SL$- or an $SLR$-mark on $\Yup$,

2199: \item (b) produces an $LR$-mark on $\Yupri$,

2200: \item (c) and (d) produce either an $LR$- or an $SLR$-mark on

2201:   $\Ybottom$.

2202: \end{itemize}

2203: If none of these marks occur, $j$ and $k$ are in the same partition

2204: element of $\Xi_\pi$ by \eqref{eq:equivGeoi}. Hence $\Delta_\pi$ and

2205: $\Xi_\pi$ coincide with high probability.

2206:

2207: % Observe that events (c) and (d) produce $SLR$-marks on

2208: % $\!\!\!\!\!\!\!\!\Ybottom\!\!\!\!\!\!\!\!$ and the two casesexactly

2209: % correspond to cases (a) and (b) in the generation of $SLR$-marks on

2210: % the Yule tree according to Definition \ref{def:4}.

2211:

2212: %   As long as we have to trace them back along different lines, no

2213: %   recombination event must take place on either of them since they

2214: %   won't coalesce in the wild-type background.  Explicitly, no event

2215: %   with rate ($3_{i}$) must happen to either of the two partition

2216: %   elements and no event at rate ($4_{i}$) should occur on the

2217: %   $k$-line. As soon as $j$ and $k$ belong to the same partition

2218: %   element, they may recombine into the wild-type background together,

2219: %   but must not split there. In case they do not leave the beneficial

2220: %   background, they must not be separated by an event of rate ($4_{i}$)

2221: %   (split where $k$ would leave the beneficial background). Under the

2222: %   time transformation the corresponding rates are rescaled and running

2223: %   the Poisson processes at these constant (rescaled) rates as stated

2224: %   in Definition $\ref{def:4}$ would lead to the marks shown in Table

2225: %   \ref{tab2}.

2226: % \begin{table}

2227: % \begin{center}

2228: % \vspace{1ex}

2229:

2230: % \begin{tabular}{|c|c|c|c|}\hline

2231: %   \rule[-4mm]{0cm}{1cm}lines & $\Delta_\pi$ & $\Xi_\pi$ & mark \\\hline

2232: %   \rule[-4mm]{0cm}{1cm}$\Yup$ & $\rho_{SL}\left(1-X_{T-\beta}\right)$ & $\rho_{SL}$  & $SL$ or $SLR$\\

2233: %   \rule[-4mm]{0cm}{1cm}$\Yri$ & $\rho_{LR}\left(1-X_{T-\beta}\right)$ &$\rho_{LR}$ & $LR$\\

2234: %   \rule[-4mm]{0cm}{1cm}$\Ybottom$ & $\rho_{LR} X_{T-\beta}$& $\rho_{LR}$ &$SLR$ \\\hline

2235: % \end{tabular}

2236: % \end{center}

2237: % \caption {\label{tab2} Events which would lead to the separation of the two loci $j, k$ with their rates before and after the time transformation as well as the corresponding marks the Poisson processes at the rescaled rates would cause on the Yule sample tree $\mathcal{Y}$ }

2238: % \end{table}

2239: % The equivalence of the partition one gets from the process $\Delta_{\pi}$ to that defined by the equivalence relation \eqref{eq:equivGeoi} is immediately clear from Table \ref{tab2}. \\

2240:

2241:

2242: % \begin{tt}

2243: %  xxx  \begin{itemize} \item Is this what was necessary for the conclusion? Should I add more details? \item

2244: %  The probabilities for splits in B, which are realized at first, are only approximately equal, aren't they? In case of def. 5.3. they are realized in Yule time, this should again cause an error in probability of the right order ... Should this be mentioned?  Or is it clear from the references that they are approximately equal?\item

2245: %   maybe make a  figure?

2246: %   \end{itemize}

2247: %   \end{tt}

2248: \end{proof}

2249:

2250: We conclude the proof of Theorem \ref{T} by showing that $\Xi_\pi$

2251: from Definition \ref{def:4} and $\Upsilon_\pi$ from Definition

2252: \ref{def:2} are close in variation distance.

2253:

2254: \begin{proposition}\label{PropThird}

2255:   Let $\pi\in\mathcal P'_{\dickm \ell\cup\dickm r}$ and $\Xi_\pi$ and

2256:   $\Upsilon_\pi$ be as in Definitions \ref{def:4} and \ref{def:2}.

2257:   Then,

2258:  \[ \sup_{\xi\in\mathcal P'_{\dickm \ell\cup\dickm r}}

2259:   \big|\mathbb P[\Xi_\pi = \xi] - \mathbb P[\Upsilon_\pi=\xi] \big| =

2260:   \mathcal O\left( \frac{1}{(\log\alpha)^2}\right). \]

2261: \end{proposition}

2262:

2263: \begin{proof}

2264:   We will only consider geometry (i). The proof for geometry (ii) is

2265:   analogous. \\

2266:

2267:   After realizing the splits in the beneficial background first

2268:   according to the probabilities given in \eqref{Split} and

2269:   \eqref{eq:Y2}, respectively, $\Xi_\pi$ and $\Upsilon_\pi$ are

2270:   determined by the same equivalence relations \eqref{eq:equivGeoi}

2271:   using the marks which hit the tree according to Definition

2272:   \ref{def:4} and Table \ref{tab:marks}. Hence our proof consists of

2273:   two steps. First, we show that the probabilities given in

2274:   \eqref{Split} and \eqref{eq:Y2} differ only by

2275:   $\mathcal{O}\left((\log \alpha)^{-2} \right)$. Second, we show that

2276:   the error caused by generating the $SL$-, $LR$- and $SLR$-marks

2277:   using \eqref{eq:Y3} instead of Definitions \ref{def:4} is

2278:   $\mathcal{O}\left((\log \alpha)^{-2} \right)$.

2279:

2280:   Both assertions rely on the same calculation. Assume a line in the

2281:   Yule tree starts when the full Yule tree has $i_1$ lines for the

2282:   last time and ends when the full Yule tree has $i_2>i_1$ lines for the

2283:   last time.  Additionally, the line is hit by a Poisson process with

2284:   rate $\rho = \gamma\frac{\alpha}{\log\alpha}$. The probability that

2285:   the line is not hit by the Poisson process during the time the Yule

2286:   process has $i$ lines, $i_1 < i\leq i_2$, is

2287:   \[

2288:   \frac{i \alpha}{i\alpha + \rho}

2289:   \]

2290:   because of competing exponential clocks. Analogously, the

2291:   probability that the whole line is not hit, is, by a Taylor

2292:   approximation,

2293:   \begin{equation}\label{eq:rec5}\begin{aligned}

2294:       \prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho} &= \exp\left(

2295:         \sum_{i=i_1+1}^{i_2} \log \left( 1 -

2296:           \frac{\rho}{i\alpha+\rho}\right) \right) \\& = \exp\left(

2297:         -\frac{\gamma}{\log\alpha} \sum_{i=i_1+1}^{i_2}

2298:         \frac{1}{i+\rho/\alpha}\right) + \mathcal

2299:       O\left(\frac{1}{(\log\alpha)^2}\right) \\ & = \exp\left(

2300:         -\frac{\gamma}{\log\alpha} \sum_{i=i_1+1}^{i_2}

2301:         \frac{1}{i}\right)+ \mathcal

2302:       O\left(\frac{1}{(\log\alpha)^2}\right)= p_{i_1}^{i_2}(\gamma)+

2303:       \mathcal O\left(\frac{1}{(\log\alpha)^2}\right),\end{aligned}

2304:   \end{equation}

2305:   since the neglected terms in the Taylor

2306:   series are of order $\mathcal O \big( \rho^{2}/\alpha^2\big)=

2307:   \mathcal O\big((\log \alpha)^{-2}\big)$ and higher.

2308:

2309:   To prove that \eqref{Split} and \eqref{eq:Y2} coincide

2310:   approximately, observe that

2311:  \[ \mathbb E\left[ \exp\left( - \rho\cdot \int_0^T X_s ds \right)\right] =

2312:   \mathbb E\left[ \exp\left( - \rho\cdot \int_0^T (1-X_s) ds \right)\right]

2313:   \]

2314:   by the time-reversibility of $\mathcal X$. Additionally, the right

2315:   hand side gives the probability that a Poisson process with rate

2316:   $\rho(1-X)$ does not hit a line by time $T$. By the random time

2317:   change $d\tau = (1-X_t)dt$ this is approximately the same as the

2318:   probability that a Poisson process with rate $\rho$ does not hit one

2319:   line in a Yule tree until it has $\lfloor 2\alpha\rfloor$ lines and

2320:   is hence given by $p_{0}^{\lfloor 2\alpha \rfloor}(\gamma)$.

2321:

2322: %   We start by picking a partition element from the original partition

2323: %   $\pi$. For the moment forget about realizing the split before any of

2324: %   the other events. Then the left and right loci in this partition

2325: %   element would be separated at rate $\rho_{LR}X$.  Because of the

2326: %   time reversibility of the sample path $\mathcal{X}$ this rate is

2327: %   equivalent to $\rho_{LR}(1-X)$.  Thus the random time change

2328: %   \eqref{eq:timechange} would also result in a constant rate

2329: %   $\rho_{LR}$. Now consider a process at this rate. Note that for the

2330: %   moment the marks we are looking at are none of those mentioned in

2331: %   Definition \ref{def:4} or Definition \ref{def:2}, but indicating

2332: %   splits in the beneficial background. Afterwards we are returning to

2333: %   realizing these splits beforehand. Realizing events according to the

2334: %   given rate along the tree in Yule time scale instead of the true

2335: %   time scale of Definition \ref{def:4} produces an error of order

2336: %   $\mathcal{O}\left((\log \alpha)^{-2} \right)$ as was shown in the

2337: %   proof of Prop. 4.7 of \cite{EtheridgePfaffelhuberWakolbinger2006}.

2338: %   We start doing so by considering a branch in the Yule tree

2339: %   $\mathcal{Y}$ between the time when there are $i_{1}$ branches on

2340: %   the whole just until there are $i_{1}+1$ branches (see Figure

2341: %   \ref{Sample} for an example of counting in Yule time).

2342: % \begin{figure}

2343: % \begin{center}

2344: % \includegraphics[width=3cm]{YuleSmall.ps}

2345: % \end{center}

2346: % \caption{\label{Sample} Example for Yule times}

2347: % \end{figure}

2348: % Two events may occur on the branch we have picked. It may be marked or one of the branches may be divided where these events are determined by independent Poisson processes at rate $(i_{1}+1) \alpha$ and $\rho_{LR}$. No mark will fall on the chosen branch if the Poisson process at rate $(i_{1}+1) \alpha$ jumps first. The probability for this to happen is equal to

2349: % \[

2350: % \frac{(i_{1}+1) \alpha}{(i_{1}+1)\alpha + \rho_{LR}}

2351: % \]

2352: % Hence the probability that no mark falls on a branch between the times $0$ and $\lfloor 2\alpha \rfloor$ is equal to

2353: % \[

2354: % \prod\limits_{i=2}^{\lfloor 2 \alpha \rfloor} \frac{i \alpha}{i\alpha + \rho_{LR}}

2355: % \]

2356: % By Taylor approximation we further get

2357: % \begin{equation}\label{eq:rec5}\begin{aligned}

2358: % \prod_{i=2}^{\lfloor 2 \alpha \rfloor}

2359: %     \frac{i\alpha}{i\alpha+\rho} &= \exp\left( \sum_{i=2}^{\lfloor 2 \alpha \rfloor} \log

2360: %     \left( 1 - \frac{\rho}{i\alpha+\rho}\right) \right) \\&

2361: %     \approx \exp\left( -\frac \rho\alpha \sum_{i=2}^{\lfloor 2 \alpha \rfloor}

2362: %     \frac{1}{i+\rho/\alpha}\right) \approx \exp\left( -\frac \rho\alpha

2363: %     \sum_{i=2}^{\lfloor 2 \alpha \rfloor} \frac{1}{i}\right),\end{aligned}

2364: %  \end{equation}

2365: %  where the error we are making is of order $\mathcal{O}\left((\log\alpha)^{-2}\right)$ since the neglected terms in the Taylor series are of order $\rho_{LR}^{2}/\alpha= (\log \alpha)^{-2}$ and higher. It follows that the probability for the left and right loci to get separated while they are in the beneficial background according to Definition \ref{def:4} equals

2366: %  \[

2367: %  1-\exp\left(-\rho_{LR}\int_{0}^{T}X_{s}ds \right)=1- p_{1}^{\lfloor 2\alpha \rfloor}(\gamma_{LR}) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}} \right)

2368: %  \]

2369: %  where the error term is due to the change from true time to Yule time as well as the approximative calculations above. Since the left hand size equals the probability defined by \eqref{eq:Y2}, we see that by returning to realizing the splits in the beneficial background beforehand to get the partition $\pi'$ according to \eqref{Split} and \eqref{eq:Y2}, we are only producing an error of order $\mathcal{O}\left((\log \alpha)^{-2}\right)$ on the whole.

2370:

2371:   Next, we are considering the generation of the $SL$-, $LR$- and

2372:   $SLR$-marks along the Yule tree. The probability that more than one

2373:   event with rate $\rho_{SL}$ and $\rho_{LR}$ hits the Yule tree

2374:   during the time it has $i$ lines is

2375:   \[ \frac{\rho^2}{(i\alpha + \rho)^2} = \mathcal O\left(

2376:     \frac{1}{(\log\alpha)^2}\right). \] Hence we can ignore this

2377:   event. Together with the Markov property of the Poisson process we

2378:   see that the marks on different lines in a sample tree may be

2379:   generated independently once the topology and the total number of

2380:   lines in the full Yule tree is known.

2381:

2382: Consider a branch which starts when the full Yule tree has $i_1$ lines

2383: and ends when it has $i_2$ lines. Using Definition \ref{def:4} this

2384: line is hit by an $SL$-mark iff it is hit by the Poisson process at

2385: rate $\rho_{SL}$ and an independent Poisson process with rate

2386: $\rho_{LR}$ produces no mark between time $0$ and the time the Yule

2387: tree has $i_2$ lines. Hence the probability for an $SL$-mark in

2388: $\Xi_\pi$ is approximately given by

2389: \begin{eqnarray}\begin{aligned} \nonumber

2390:     &\left( 1 - \prod_{i=i_1+1}^{i_2}

2391:       \frac{i\alpha}{i\alpha+\rho_{SL}}\right)\left( \prod_{i=1}^{i_2}

2392:       \frac{i\alpha}{i\alpha+\rho_{LR}}\right) =

2393:     \big(1-p_{i_{1}}^{i_{2}}(\gamma_{SL})\big)

2394:     p_{0}^{i_{2}}(\gamma_{LR}) + \mathcal{O}\left(\frac{1}{(\log

2395:         \alpha)^{2}}\right)

2396:   % & \qquad \qquad \qquad = \left( 1 - \exp\left( -\frac {\rho_{SL}}\alpha

2397:    % \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\right) \exp\left( -\frac {\rho_{LR}}\alpha

2398:    % \sum_{i=1}^{i_2} \frac{1}{i}\right) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)

2399: \end{aligned}\end{eqnarray}

2400: If a branch is hit by the Poisson process with rate $\rho_{SL}$ but

2401: did not obtain an $SL$-mark, it obtains an $SLR$-mark. Hence the

2402: probability for such a mark is given by

2403: \begin{eqnarray}\begin{aligned} \nonumber

2404:     & \left( 1 - \prod_{i=i_1+1}^{i_2}

2405:       \frac{i\alpha}{i\alpha+\rho_{SL}}\right)\left( 1

2406:       -\prod_{i=1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{LR}}\right) =

2407:     \left(1-p_{i_{1}}^{i_{2}}(\gamma_{SL}\right)\left(1-

2408:       p_{0}^{i_{2}}(\gamma_{LR})\right) +

2409:     \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)

2410: %   & \qquad \qquad    = \left( 1 - \exp\left( -\frac {\rho_{SL}}\alpha

2411:  %   \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\right) \left( 1 - \exp\left( -\frac {\rho_{LR}}\alpha

2412:   %  \sum_{i=1}^{i_2} \frac{1}{i}\right)\right)+ \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)

2413: \end{aligned}\end{eqnarray}

2414: The branch is hit by an $LR$-mark if it is hit by the Poisson process

2415: at rate $\rho_{LR}$ but not by the Poisson process with rate

2416: $\rho_{SL}$. Hence the probability for an $LR$-mark is

2417: \begin{eqnarray}\begin{aligned} \nonumber

2418:     &\prod_{i=i_1+1}^{i_2}

2419:     \frac{i\alpha}{i\alpha+\rho_{SL}}\left( 1 -\prod_{i=i_1+1}^{i_2}

2420:     \frac{i\alpha}{i\alpha+\rho_{LR}}\right)

2421:      = p_{i_{1}}^{i_{2}}(\gamma_{SL})\left(1- p_{i_{1}}^{i_{2}}(\gamma_{LR})\right) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)

2422:  %    & \qquad \qquad  = \exp\left( -\frac {\rho_{SL}}\alpha

2423:   %  \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\left( 1 - \exp\left( -\frac {\rho_{LR}}\alpha

2424:   %  \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\right)+ \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)

2425: \end{aligned}\end{eqnarray}

2426: % \item no mark

2427: % \begin{eqnarray}\begin{aligned} \nonumber

2428: % &\prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{SL}+ \rho_{LR}}

2429: % %\approx

2430: % %\prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{SL}}

2431: % %\prod_{i=i_1+1}^{i_2} \frac{i\alpha}{i\alpha+\rho_{LR}} \\

2432: %  = p_{i_{1}+1}^{i_{2}}(\gamma_{SL}) \; p_{i_{1}+1}^{i_{2}}(\gamma_{LR}) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)

2433: % % = \exp\left( -\frac {\rho_{SL}}\alpha

2434: %  %   \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right)\exp\left( -\frac {\rho_{LR}}\alpha

2435: %  %   \sum_{i=i_1+1}^{i_2} \frac{1}{i}\right) + \mathcal{O}\left(\frac{1}{(\log \alpha)^{2}}\right)

2436: % \end{aligned}\end{eqnarray}

2437: % where we apply \eqref{eq:rec5} with $\rho= \rho_{SL}+ \rho_{LR}$ and after splitting the summation get the product in the last equation.

2438:

2439: As a consequence, the marks in $\mathbf Y_{|\pi'|}$ and

2440: $\mathcal{Y}_{|\pi'|}$ coincide approximately (cf. Table

2441: \ref{tab:marks}) and we are done.

2442:  \end{proof}

2443:

2444: \subsubsection*{Acknowledgement}

2445: We thank Bernhard Haubold and Joachim Hermisson for comments on the

2446: manuscript and Anton Wakolbinger and Franz Merkl for fruitful

2447: discussion. We are grateful to Andy Lehnert, not only for help with

2448: Figure \ref{sim}.

2449:

2450: \newcommand{\etalchar}[1]{$^{#1}$}

2451: \begin{thebibliography}{NWK{\etalchar{+}}05}

2452:

2453: \bibitem[Bar98]{Barton1998}

2454: N.~Barton.

2455: \newblock The effect of hitch-hiking on neutral genealogies.

2456: \newblock {\em Gen. Res.}, 72:123--133, 1998.

2457:

2458: \bibitem[Con05]{hapmap2005}

2459: International~HapMap Consortium.

2460: \newblock {{A} haplotype map of the human genome}.

2461: \newblock {\em Nature}, 437(7063):1299--1320, 2005.

2462:

2463: \bibitem[EO94]{EvansOConnell1994}

2464: S.N. Evans and N.~O'Connell.

2465: \newblock Weighted occupation time for branching particle systems and a

2466:   representation for the supercritical superprocess.

2467: \newblock {\em Canad. Math. Bull.}, 37(2):187--196, 1994.

2468:

2469: \bibitem[EPW06]{EtheridgePfaffelhuberWakolbinger2006}

2470: A.~Etheridge, P.~Pfaffelhuber, and A.~Wakolbinger.

2471: \newblock An approximate sampling formula under genetic hitchhiking.

2472: \newblock {\em Ann. Appl. Probab.}, 15:685--729, 2006.

2473:

2474: \bibitem[Ewe04]{Ewens2004}

2475: W.J. Ewens.

2476: \newblock {\em Mathematical {P}opulation {G}enetics. I. Theoretical

2477:   introduction. Second edition}.

2478: \newblock Springer, 2004.

2479:

2480: \bibitem[FW00]{FayWu2000}

2481: J.C. Fay and C.-I. Wu.

2482: \newblock Hitchhiking under positive darwinian selection.

2483: \newblock {\em Genetics}, 155:1405--1413, 2000.

2484:

2485: \bibitem[GM97]{GriffithsMarjoram1997}

2486: R.C. Griffiths and P.~Marjoram.

2487: \newblock An ancestral recombination graph.

2488: \newblock In {\em Progress in Population Genetics and Human Evolution, IMA

2489:   volumes in Mathematics and its Applications, 87. Springer Verlag, Berlin},

2490:   pages 257--270, 1997.

2491:

2492: \bibitem[Gri03]{Griffiths2003}

2493: R.C. Griffiths.

2494: \newblock The frequency spectrum of a mutation and its age, in a general

2495:   diffusion model.

2496: \newblock {\em Theo. Pop. Biol.}, 64(2):241--251, 2003.

2497:

2498: \bibitem[Hud83]{Hudson1983}

2499: R.R. Hudson.

2500: \newblock Properties of a neutral allele model with intragenic recombination.

2501: \newblock {\em Theo. Pop. Biol.}, 23:183--201, 1983.

2502:

2503: \bibitem[KDH88]{KaplanDardenHudson1988}

2504: N.L. Kaplan, T.~Darden, and R.R. Hudson.

2505: \newblock The {C}oalescent {P}rocess in {M}odels with {S}election.

2506: \newblock {\em Genetics}, 120:819--829, 1988.

2507:

2508: \bibitem[KHL89]{KaplanHudsonLangley1989}

2509: N.L. Kaplan, R.R. Hudson, and C.H. Langley.

2510: \newblock The '{H}itchhiking effect' revisited.

2511: \newblock {\em Genetics}, 123:887--899, 1989.

2512:

2513: \bibitem[KS02]{KimStephan2002}

2514: Y.~Kim and W.~Stephan.

2515: \newblock Detecting a local signature of genetic hitchhiking along a

2516:   recombining chromosome.

2517: \newblock {\em Genetics}, 160:765--777, 2002.

2518:

2519: \bibitem[KT81]{KarlinTaylor1981}

2520: S.~Karlin and H.M. Taylor.

2521: \newblock {\em A second course in stochastic processes}.

2522: \newblock Academic Press London, 1981.

2523:

2524: \bibitem[Lew64]{Lewontin1964}

2525: R.C. Lewontin.

2526: \newblock {The interaction of selection and linkage. I. General considerations;

2527:   Heterotic models}.

2528: \newblock {\em Genetics}, 49:49--67, 1964.

2529:

2530: \bibitem[LS05]{LiStephan2005}

2531: H.~Li and W.~Stephan.

2532: \newblock Maximum-likelihood methods for detecting recent positive selection

2533:   and localizing the selected site in a genome.

2534: \newblock {\em Genetics}, 171:377--384, 2005.

2535:

2536: \bibitem[LSP06]{LehnertStephanPfaffelhuber2006}

2537: A.~Lehnert, W.~Stephan, and P.~Pfaffelhuber.

2538: \newblock A stochastic analysis of linkage disequilibrium under selective

2539:   sweeps.

2540: \newblock {\em submitted}, 2006.

2541:

2542: \bibitem[MSH74]{MaynardSmithHaigh1974}

2543: J.~Maynard~Smith and J.~Haigh.

2544: \newblock The hitch-hiking effect of a favorable gene.

2545: \newblock {\em Gen. Res.}, 23:23--35, 1974.

2546:

2547: \bibitem[Nur05]{Nurminsky2005}

2548: D.~Nurminsky.

2549: \newblock {\em Selective Sweep}.

2550: \newblock Kluwer, 2005.

2551:

2552: \bibitem[NWK{\etalchar{+}}05]{NielsenEtAl2005}

2553: R.~Nielsen, S.~Williamson, Y.~Kim, M.J. Hubisz, A.G. Clark, and C.~Bustamante.

2554: \newblock {{G}enomic scans for selective sweeps using {S}{N}{P} data}.

2555: \newblock {\em Genome Res.}, 15(11):1566--1575, 2005.

2556:

2557: \bibitem[O'C93]{OConnell1993}

2558: N.~O'Connell.

2559: \newblock Yule {P}rocess {A}pproximaion for the {S}keleton of a {B}ranching

2560:   {P}rocess.

2561: \newblock {\em J. Appl. Prob.}, 30:725--729, 1993.

2562:

2563: \bibitem[PHW06]{PfaffelhuberHauboldWakolbinger2006}

2564: P.~Pfaffelhuber, B.~Haubold, and A.~Wakolbinger.

2565: \newblock Approximate genealogies under genetic hitchhiking.

2566: \newblock {\em Genetics}, to appear, 2006.

2567:

2568: \bibitem[RT06]{ReedTishkoff2006}

2569: F.A. Reed and S.A. Tishkoff.

2570: \newblock {{P}ositive selection can create false hotspots of recombination}.

2571: \newblock {\em Genetics}, 172(3):2011--2014, 2006.

2572:

2573: \bibitem[SD05]{SchweinsbergDurrett2005}

2574: J.~Schweinsberg and R.~Durrett.

2575: \newblock Random partitions approximating the coalescence of lineages during a

2576:   selective sweep.

2577: \newblock {\em Ann. Appl. Probab.}, 15:1591--1651, 2005.

2578:

2579: \bibitem[SSL06]{StephanSongLangley2006}

2580: W.~Stephan, Y.~Song, and C.~Langley.

2581: \newblock The hitchhiking effect on linkage disequilibrium between linked

2582:   neutral loci.

2583: \newblock {\em Genetics}, 172:2647--2663, 2006.

2584:

2585: \bibitem[STW84]{Saundersetal1984}

2586: I.W. Saunders, S.~Tavar\'e, and G.A. Watterson.

2587: \newblock On the genealogy of nested subsamples from a haploid population.

2588: \newblock {\em Adv. Appl. Probab.}, 16:471--491, 1984.

2589:

2590: \bibitem[SWL92]{StephanWieheLenz1992}

2591: W.~Stephan, T.~Wiehe, and M.~Lenz.

2592: \newblock The effect of strongly selected substitutions on neutral

2593:   polymorphism: analytical results based on diffusion theory.

2594: \newblock {\em Theo. Pop. Biol.}, 41:237--254, 1992.

2595:

2596: \end{thebibliography}

2597:

2598: %\bibliography{PS}

2599: %\bibliographystyle{alpha}

2600:

2601:

2602: \end{document}

2603: