0401:cs0401003/cs0401003

1: \documentclass[12pt]{article}

2: % All margin dimensions are measured from a point one inch from top

3: % and left side of page.

4: \oddsidemargin=0in  % Left margin on odd-numbered pages.

5: \evensidemargin=0in % Left margin on even-numbered pages.

6: \textheight=8.9in   % Height of text (excluding head and foot).

7: \textwidth=6.35in   % Width of text on page.

8: \topmargin=-0.5in   % Extra space added to top of page.

9: % 1pc = 0.421751 cm, 1 cm = 2.37106 pc, 1 pt = 0.0351459 cm

10: \title{Randomized selection with tripartitioning}

11: \author{Krzysztof C. Kiwiel\thanks{Systems Research Institute,

12: %       Polish Academy of Sciences,

13:         Newelska 6, 01--447 Warsaw, Poland

14:         ({\tt kiwiel@ibspan.waw.pl})}}

15: \date{January 4, 2004}

16:

17: % ersatz blackboard characters

18: \newcommand{\BbbF}{{\rm\normalcolor I\kern-.18em F}}

19: \newcommand{\BbbR}{{\rm\normalcolor I\kern-.18em R}}

20: \newcommand{\eqref}[1]{{\normalfont\normalcolor(\ref{#1})}}

21: \makeatletter

22: % the proof environment

23: \def\proof{%

24:    \def\a##1{\begin{trivlist}\item[]{\bf\ignorespaces{##1}.}%

25:     \enspace\ignorespaces}%

26:    \def\b[##1]{\a{Proof\ \ignorespaces{##1}}}%

27:    \@ifnextchar[{\b}{\a{Proof}}}

28: \def\endproof{\end{trivlist}}

29: % end-of-proof symbol

30: \def\qed{\relax\protect\ifmmode\ifinner\else\quad\fi\fi

31:     \hbox{\vbox{\hrule height.4pt\hbox{\vbox{\hrule height.4pt

32:     \hbox{\vrule width.4pt\vphantom{\normalsize A}\kern.5em

33:     \vrule width.4pt}\hrule height.4pt}}}}}

34: % subequations

35: \newtoks\@stequation

36: \def\subequations{\refstepcounter{equation}%

37: \edef\@savedequation{\the\c@equation}%

38: \@stequation=\expandafter{\theequation}%   %only want \theequation

39: \edef\@savedtheequation{\the\@stequation}% %expanded once

40: \edef\oldtheequation{\theequation}%

41: \setcounter{equation}{0}%

42: \def\theequation{\oldtheequation\alph{equation}}}%

43: \def\endsubequations{%

44: \setcounter{equation}{\@savedequation}%

45: \@stequation=\expandafter{\@savedtheequation}%

46: \edef\theequation{\the\@stequation}\global\@ignoretrue}

47: % modifed theorem environment

48: \def\@begintheorem#1#2{\trivlist

49:     \item[\hskip \labelsep{\bfseries #1\ #2.}]\itshape}

50: \def\@opargbegintheorem#1#2#3{\trivlist

51:     \item[\hskip \labelsep{\bfseries #1\ #2\ (#3).}]\itshape}

52: % numbering equations, figures and tables

53: \@addtoreset{equation}{section}% Makes \section reset `equation' counter.

54: \def\theequation{\thesection.\arabic{equation}}

55: \@addtoreset{figure}{section}

56: \def\thefigure{\thesection.\arabic{figure}}

57: \@addtoreset{table}{section}

58: \def\thetable{\thesection.\arabic{table}}

59: % fix up of the eqnarray environment

60: \let\@@eqnsel=\relax

61: \def\@tempa{%

62:     \stepcounter{equation}%

63:     \def\@currentlabel{\p@equation\theequation}%

64:     \global\@eqnswtrue\m@th

65:     \global\@eqcnt\z@

66:     \tabskip\mathindent

67:     \let\\=\@eqncr

68:     \setlength\abovedisplayskip{\topsep}%

69:     \ifvmode

70:       \addtolength\abovedisplayskip{\partopsep}%

71:     \fi

72:     \addtolength\abovedisplayskip{\parskip}%

73:     \setlength\belowdisplayskip{\abovedisplayskip}%

74:     \setlength\belowdisplayshortskip{\abovedisplayskip}%

75:     \setlength\abovedisplayshortskip{\abovedisplayskip}%

76:     $$\everycr{}\halign to\linewidth% $$

77:     \bgroup

78:       \hskip\@centering

79:       $\displaystyle\tabskip\z@skip{##}$\@eqnsel&%

80:       \global\@eqcnt\@ne \hskip \tw@\arraycolsep \hfil${##}$\hfil&%

81:       \global\@eqcnt\tw@ \hskip \tw@\arraycolsep

82:         $\displaystyle{##}$\hfil \tabskip\@centering&%

83:       \global\@eqcnt\thr@@

84:         \hb@xt@\z@\bgroup\hss##\egroup\tabskip\z@skip\cr}%

85: \def\@tempb{%

86:    \stepcounter{equation}%

87:    \def\@currentlabel{\p@equation\theequation}%

88:    \global\@eqnswtrue

89:    \m@th

90:    \global\@eqcnt\z@

91:    \tabskip\@centering

92:    \let\\\@eqncr

93:    $$\everycr{}\halign to\displaywidth\bgroup

94:        \hskip\@centering$\displaystyle\tabskip\z@skip{##}$\@eqnsel

95:       &\global\@eqcnt\@ne\hskip \tw@\arraycolsep \hfil${##}$\hfil

96:       &\global\@eqcnt\tw@ \hskip \tw@\arraycolsep

97:          $\displaystyle{##}$\hfil\tabskip\@centering

98:       &\global\@eqcnt\thr@@ \hb@xt@\z@\bgroup\hss##\egroup

99:          \tabskip\z@skip

100:       \cr

101: }

102: %

103: \ifx\eqnarray\@tempa%     If the fleqn document-class option is in effect

104:     \def\eqnarray{%

105:     \stepcounter{equation}%

106:     \def\@currentlabel{\p@equation\theequation}%

107:     \global\@eqnswtrue\m@th

108:     \global\@eqcnt\z@

109:     \tabskip\mathindent

110:     \let\\=\@eqncr

111:     \setlength\abovedisplayskip{\topsep}%

112:     \ifvmode

113:       \addtolength\abovedisplayskip{\partopsep}%

114:     \fi

115:     \addtolength\abovedisplayskip{\parskip}%

116:     \setlength\belowdisplayskip{\abovedisplayskip}%

117:     \setlength\belowdisplayshortskip{\abovedisplayskip}%

118:     \setlength\abovedisplayshortskip{\abovedisplayskip}%

119:     $$\everycr{}\halign to\linewidth% $$

120:     \bgroup

121:       \hskip\@centering

122:       $\displaystyle\tabskip\z@skip{##}$\@eqnsel&%

123:       \global\@eqcnt\@ne

124:       \@@eqnsel%            \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!

125:       \hfil${{}##{}}$\hfil&%              as in fixup.sty but textstyle!!!

126:       \global\@eqcnt\tw@

127:       \@@eqnsel%           \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!

128:         $\displaystyle{##}$\hfil \tabskip\@centering&%

129:       \global\@eqcnt\thr@@

130:         \hb@xt@\z@\bgroup\hss##\egroup\tabskip\z@skip\cr}%

131: \else\ifx\eqnarray\@tempb%       Else try the default eqnarray environment.

132:    \def\eqnarray{%

133:    \stepcounter{equation}%

134:    \def\@currentlabel{\p@equation\theequation}%

135:    \global\@eqnswtrue

136:    \m@th

137:    \global\@eqcnt\z@

138:    \tabskip\@centering

139:    \let\\\@eqncr

140:    $$\everycr{}\halign to\displaywidth\bgroup

141:        \hskip\@centering$\displaystyle\tabskip\z@skip{##}$\@eqnsel

142:       &\global\@eqcnt\@ne

143:       \@@eqnsel%           \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!

144:       \hfil${{}##{}}$\hfil%              as in fixup.sty but textstyle!!!

145:       &\global\@eqcnt\tw@

146:       \@@eqnsel%           \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!

147:          $\displaystyle{##}$\hfil\tabskip\@centering

148:       &\global\@eqcnt\thr@@ \hb@xt@\z@\bgroup\hss##\egroup

149:          \tabskip\z@skip

150:       \cr}

151: \else \typeout{Warning: Unable to fix unknown version of \string\eqnarray.}

152: \fi\fi

153: \def\@tempa{}			% Free up TeX's memory

154: \def\@tempb{}

155: % closed format bibliography

156: \@ifundefined{chapter}{%

157:   \renewenvironment{thebibliography}[1]

158:      {\section*{\refname

159:         \@mkboth{\MakeUppercase\refname}{\MakeUppercase\refname}}%

160:       \list{\@biblabel{\@arabic\c@enumiv}}%

161:            {\settowidth\labelwidth{\@biblabel{#1}}%

162:             \leftmargin\labelwidth

163:             \advance\leftmargin\labelsep

164:             \itemsep \z@                 % Suppresses vertical separation.

165:             \@openbib@code

166:             \usecounter{enumiv}%

167:             \let\p@enumiv\@empty

168:             \renewcommand\theenumiv{\@arabic\c@enumiv}}%

169:       \sloppy

170:       \clubpenalty4000

171:       \@clubpenalty \clubpenalty

172:       \widowpenalty4000%

173:       \sfcode`\.\@m}

174:      {\def\@noitemerr

175:        {\@latex@warning{Empty `thebibliography' environment}}%

176:       \endlist}}%

177: {\renewenvironment{thebibliography}[1]

178:      {\section*{\bibname

179:         \@mkboth{\MakeUppercase\bibname}{\MakeUppercase\bibname}}%

180:       \list{\@biblabel{\@arabic\c@enumiv}}%

181:            {\settowidth\labelwidth{\@biblabel{#1}}%

182:             \leftmargin\labelwidth

183:             \advance\leftmargin\labelsep

184:             \itemsep \z@                 % Suppresses vertical separation.

185:             \@openbib@code

186:             \usecounter{enumiv}%

187:             \let\p@enumiv\@empty

188:             \renewcommand\theenumiv{\@arabic\c@enumiv}}%

189:       \sloppy

190:       \clubpenalty4000

191:       \@clubpenalty \clubpenalty

192:       \widowpenalty4000%

193:       \sfcode`\.\@m}

194:      {\def\@noitemerr

195:        {\@latex@warning{Empty `thebibliography' environment}}%

196:       \endlist}}%

197: % Mathematical definitions

198: \newcommand{\Argmax}{{\operator@font Arg}\max}

199: \newcommand{\Argmin}{{\operator@font Arg}\min}

200: \newcommand{\argmax}{{\operator@font arg}\max}

201: \newcommand{\argmin}{{\operator@font arg}\min}

202: \newcommand{\Exp}{\mathord{\operator@font E}}

203: \newcommand{\med}{\mathop{\operator@font med}}

204: \newcommand{\Prob}{\mathord{\operator@font P}}

205: \newcommand{\rank}{\mathop{\operator@font rank}}

206: \newcommand{\var}{\mathop{\operator@font var}}

207: \makeatother

208: % Theorem and definition-like environments are numbered together,

209: % starting from number 1 within each section.

210: \newtheorem{theorem}{Theorem}[section]

211: \newtheorem{algorithm}[theorem]{Algorithm}

212: \newtheorem{assumption}[theorem]{Assumption}

213: \newtheorem{corollary}[theorem]{Corollary}

214: \newtheorem{definition}[theorem]{Definition}

215: \newtheorem{example}[theorem]{Example}

216: \newtheorem{examples}[theorem]{Examples}

217: \newtheorem{fact}[theorem]{Fact}

218: \newtheorem{lemma}[theorem]{Lemma}

219: \newtheorem{procedure}[theorem]{Procedure}

220: \newtheorem{proposition}[theorem]{Proposition}

221: \newtheorem{remark}[theorem]{Remark}

222: \newtheorem{remarks}[theorem]{Remarks}

223: % Schemes are numbered alphabetically throughout.

224: \newtheorem{scheme}{Scheme}

225: \renewcommand{\thescheme}{\Alph{scheme}}

226: %-----------------------------------------------------------------------

227: \hyphenation{quick-sel-ect}

228: %-----------------------------------------------------------------------

229:

230: \begin{document}           % End of preamble and beginning of text.

231:

232: \maketitle                 % Produces the title.

233:

234: \begin{abstract}

235: \noindent

236: We show that several versions of Floyd and Rivest's algorithm

237: {\sc Select} [Comm.\ ACM {\bf 18} (1975) 173] for finding the $k$th

238: smallest of $n$ elements require at most $n+\min\{k,n-k\}+o(n)$

239: comparisons on average, even when equal elements occur.  This parallels

240: our recent analysis of another variant due to Floyd and Rivest

241: [Comm.\ ACM {\bf 18} (1975) 165--172].  Our computational results

242: suggest that both variants perform well in practice, and may compete

243: with other selection methods, such as Hoare's {\sc Find} or

244: quickselect with median-of-3 pivots.

245: \end{abstract}

246:

247: \begin{quotation}

248: \noindent{\bf Key words.} Selection, medians, partitioning,

249: computational complexity.

250: \end{quotation}

251:

252: %\begin{quotation}

253: %\noindent{\bf MSC Subject Classifications.} 68W20, 68W05, 68Q25

254: %\end{quotation}

255:

256: %\begin{quotation}

257: %\noindent{\bf Abbreviated title:} Randomized selection.

258: %\end{quotation}

259:

260: %   *** SECTION 1 ***

261: \section{Introduction}

262: \label{s:intro}

263: The {\em selection problem\/} is defined as follows: Given a set

264: $X:=\{x_j\}_{j=1}^n$ of $n$ elements, a total order $<$ on $X$,

265: and an integer $1\le k\le n$, find the {\em $k$th smallest\/}

266: element of $X$, i.e., an element $x$ of $X$ for which there are at

267: most $k-1$ elements $x_j<x$ and at least $k$ elements $x_j\le x$.

268: The {\em median\/} of $X$ is the $\lceil n/2\rceil$th smallest

269: element of $X$.

270:

271: Selection is one of the fundamental problems in computer science;

272: see, e.g., the references in \cite{dohaulzw:lbs,dozw:sm,dozw:msr} and

273: \cite[\S5.3.3]{knu:acpIII2}.  Most references concentrate on the

274: number of comparisons between pairs of elements made in selection

275: algorithms.  In the worst case, selection needs at least

276: $(2+\epsilon)n$ comparisons \cite{dozw:msr}, whereas the algorithm of

277: \cite{blflprrita:tbs} makes at most $5.43n$, that of \cite{scpapi:fm}

278: needs $3n+o(n)$, and that in \cite{dozw:sm} takes $2.95n+o(n)$.  In the

279: average case, for $k\le\lceil n/2\rceil$, at least $n+k-O(1)$

280: comparisons are necessary \cite{cumu:acs}, whereas the best upper bound

281: is $n+k+O(n^{1/2}\ln^{1/2}n)$ \cite[Eq.\ (5.3.3.16)]{knu:acpIII2}.  The

282: classical algorithm {\sc Find} of \cite{hoa:a65}, also known as

283: quickselect, has an upper bound of $3.39n+o(n)$ for $k=\lceil n/2\rceil$

284: in the average case \cite[Ex.\ 5.2.2--32]{knu:acpIII2}, which improves

285: to $2.75n+o(n)$ for median-of-3 pivots \cite{gru:mvh,kimapr:ahf}.

286:

287: In practice {\sc Find} is most popular.  One reason is that the

288: algorithms of \cite{blflprrita:tbs,scpapi:fm} are much slower on the

289: average \cite{mus:iss,val:iss}, whereas \cite{kimapr:ahf} adds that

290: other methods proposed so far, although better than {\sc Find} in

291: theory, are not practical because they are difficult to implement,

292: their constant factors and hidden lower order terms are too large,

293: etc.  It is quite suprising that these references

294: \cite{kimapr:ahf,mus:iss,val:iss} ignore the algorithm {\sc Select}

295: of \cite{flri:etb}, since most textbooks mention that {\sc Select} is

296: asymptotically faster than {\sc Find}.  In contrast, this paper shows

297: that {\sc Select} can compete with {\sc Find} in both theory and

298: practice, even for fairly small values of the input size $n$.

299:

300: We now outline our contributions in more detail.  The initial two

301: versions of {\sc Select} \cite{flri:etb} had gaps in their analysis

302: (cf.\ \cite{bro:ra489,poriti:eds}, \cite[Ex.\ 5.3.3--24]{knu:acpIII2});

303: the first version was validated in \cite{kiw:rsq}, and the second one

304: will be addressed elsewhere.  This paper deals with the third version

305: of {\sc Select} from \cite{flri:asf}, which operates as follows.  Using

306: a small random sample, it finds an element $v$ almost sure to be just

307: above the $k$th if $k<n/2$, or below the $k$th if $k\ge n/2$.

308: Partitioning $X$ about $v$ leaves $\min\{k,n-k\}+o(n)$ elements on

309: average for the next recursive call, in which $k$ is near $1$ or $n$

310: with high probability, so this second call eliminates almost all the

311: remaining elements.

312:

313: Apparently this version of {\sc Select} has not been analyzed in the

314: literature, even in the case of distinct elements.  We first revise it

315: slightly to simplify our analysis.  Then, without assuming that the

316: elements are distinct, we show that {\sc Select} needs at most

317: $n+\min\{k,n-k\}+O(n^{2/3}\ln^{1/3}n)$ comparisons on average, with

318: $\ln^{1/3}n$ replaced by $\ln^{1/2}n$ for the original samples of

319: \cite{flri:asf}.  Thus the average cost of {\sc Select} reaches the

320: lower bounds of $1.5n+o(n)$ for median selection and $1.25n+o(n)$

321: for selecting an element of random rank.  For the latter task,

322: {\sc Find} has the bound $2n+o(n)$ when its pivot is set to the

323: median of a random sample of $s$ elements, with $s\to\infty$,

324: $s/n\to\infty$ as $n\to\infty$ \cite{maro:oss}; thus {\sc Select}

325: improves upon {\sc Find} mostly by using $k$, the rank of the element

326: to be found, for selecting the pivot $v$ in each recursive call.

327:

328: {\sc Select} can be implemented by using the tripartitioning schemes

329: of \cite[\S5]{kiw:psq}, which include a modified scheme of

330: \cite{bemc:esf}; more traditional bipartitioning schemes

331: \cite[\S2]{kiw:psq} can perform quite poorly in {\sc Select} when

332: equal elements occur.  We add that the implementation of \cite{flri:asf}

333: avoids random number generation by assuming that the input file is in

334: random order, but this results in poor performance on some inputs of

335: \cite{val:iss}; hence our implementation of {\sc Select} employs

336: random sampling.

337:

338: Our computational experience shows that {\sc Select} outperforms even

339: quite sophisticated implementations of {\sc Find} in both comparison

340: counts and computing times.  To save space, only selected results are

341: reported for the version of \cite{val:iss}, but our experience with

342: other versions on many different inputs was similar.  {\sc Select}

343: turned out to be more stable than {\sc Find}, having much smaller

344: variations of solution times and numbers of comparisons.  Quite

345: suprisingly, contrary to the folklore saying that {\sc Select} is only

346: asymptotically faster than {\sc Find}, {\sc Select} makes significantly

347: fewer comparisons even for small inputs

348: (cf.\ Tab.\ \ref{tab:comp_small}).

349:

350: To relate our results with those of \cite{kiw:rsq}, let's call

351: {\sc qSelect} the quintary method of \cite{kiw:rsq} stemming from

352: \cite[\S2.1]{flri:etb}.  {\sc qSelect} eliminates almost all

353: elements on its first call by using two pivots, almost sure to be

354: just below and above the $k$th element, in a quintary partitioning

355: scheme.  Thus most work occurs on the first call of {\sc qSelect},

356: which corresponds to the first two calls of {\sc Select}.  Hence

357: {\sc Select} and {\sc qSelect} share the same efficiency estimates,

358: and in practice make similarly many comparisons. However, {\sc qSelect}

359: tends to be slightly faster on median finding: although its quintary

360: scheme is more complex, most of its work is spent on the first pass

361: through $X$, whereas {\sc Select} first partitions $X$ and then the

362: remaining part (about half) of $X$ on its second call to achieve a

363: similar problem reduction.  On the other hand, {\sc Select} makes

364: fewer comparisons on small inputs.  Of course, future work should assess

365: more fully the relative merits of {\sc Select} and {\sc qSelect}.  For

366: now, the tests reported in \cite{kiw:psq,kiw:rsq} and in \S\ref{s:exp}

367: suggest that both {\sc Select} and {\sc qSelect} can compete

368: successfully with refined implementations of {\sc Find}.

369:

370: The paper is organized as follows.  A general version of {\sc Select} is

371: introduced in \S\ref{s:alg}, and its basic features are analyzed in

372: \S\ref{s:sample}.  The average performance of {\sc Select} is studied

373: in \S\ref{s:average}.  A modification that improves practical

374: performance is introduced in \S\ref{s:modmed}.

375: Partitioning schemes are discussed in \S\ref{s:ternpart}.

376: Finally, our computational results are reported in \S\ref{s:exp}.

377: %The Appendix contains proofs of certain technical results.

378: %Finally, we have a conclusion section.

379:

380: Our notation is fairly standard.

381: $|A|$ denotes the cardinality of a set $A$.

382: In a given probability space, $\Prob$ is the probability measure,

383: $\Exp$ is the mean-value operator and $\Prob[\cdot|{\cal E}]$ is the

384: probability conditioned on an event ${\cal E}$; the complement of

385: ${\cal E}$ is denoted by ${\cal E}'$.

386: %

387: %   *** SECTION 2 ***

388: \section{The algorithm {\sc Select}}

389: \label{s:alg}

390: In this section we describe a general version of {\sc Select} in terms

391: of two auxiliary functions $s(n)$ and $g(n)$ (the sample size and rank

392: gap), which will be chosen later.  We omit their arguments in general,

393: as no confusion can arise.

394: %

395: %   *** ALGORITHM 2.1 ***

396: \begin{algorithm}

397: \label{alg:sel3}

398: \rm

399: \hfil\newline\noindent{\bf {\sc Select}$(X,k)$}

400: (Selects the $k$th smallest element of $X$, with $1\le k\le n:=|X|$)

401: \medbreak\noindent{\bf Step 1} ({\em Initiation\/}).

402: If $n=1$, return $x_1$.

403: %Choose the sample size $s\in\{1\colon n-1\}$ and gap $g>0$.

404: Choose the sample size $s\le n-1$ and gap $g>0$.

405: \medbreak\noindent{\bf Step 2} ({\em Sample selection\/}).

406: Pick randomly a sample $S:=\{y_1,\ldots,y_s\}$ from $X$.

407: \medbreak\noindent{\bf Step 3} ({\em Pivot selection\/}).

408: Let $v$ be the output of {\sc Select}$(S,i_v)$, where

409: \begin{equation}

410: i_v:=\left\{\begin{array}{ll}

411: \rlap{$\min$}\phantom{\max}\left\{\,\lceil ks/n+g\rceil,s\,\right\}&

412: \mbox{if}\ k<n/2,\\

413: \max\left\{\,\lceil ks/n-g\rceil,1\,\right\}&

414: \mbox{if}\ k\ge n/2.

415: \end{array}\right.

416: \label{iv}

417: \end{equation}

418: \medbreak\noindent{\bf Step 4} ({\em Partitioning\/}).

419: By comparing each element $x$ of $X\setminus S$ to $v$, partition $X$

420: into the three sets $L:=\{x\in X:x<v\}$, $E:=\{x\in X:x=v\}$ and

421: $R:=\{x\in X:v<x\}$.

422: \medbreak\noindent{\bf Step 5} ({\em Stopping test\/}).

423: If $|L|<k\le|L\cup E|$, return $v$.

424: \medbreak\noindent{\bf Step 6} ({\em Reduction\/}).

425: If $k\le|L|$, set $\hat X:=L$, $\hat n:=|\hat X|$ and $\hat k:=k$;

426: else set $\hat X:=R$, $\hat n:=|\hat X|$ and $\hat k:=k-|L\cup E|$.

427: \medbreak\noindent{\bf Step 7} ({\em Recursion\/}).

428: Return {\sc Select}$(\hat X,\hat k)$.

429: \end{algorithm}

430:

431: A few remarks on the algorithm are in order.

432: %

433: %   *** REMARKS 2.2 ***

434: \begin{remarks}

435: \label{r:sel3}

436: \rm

437: (a)

438: The correctness and finiteness of {\sc Select} stem by induction from

439: the following observations.  The returns of Steps 1 and 5 deliver the

440: desired element.  At Step 6, $\hat X$ and $\hat k$ are chosen so that

441: the $k$th smallest element of $X$ is the $\hat k$th smallest element

442: of $\hat X$, and $\hat n<n$ (since $v\not\in\hat X$).  Also $|S|<n$ for

443: the recursive call at Step 3.

444: \par(b)

445: When Step 5 returns $v$, {\sc Select} may also return information about

446: the positions of the elements of $X$ relative to $v$.  For instance, if

447: $X$ is stored as an array, its $k$ smallest elements may be placed first

448: via interchanges at Step 4 (cf.\ \S\ref{s:ternpart}).  Hence Step 4 need

449: only compare $v$ with the elements of $X\setminus S$.

450: \par(c)

451: The following elementary property is needed in \S\ref{s:average}.

452: Let $c_n$ denote the maximum number of comparisons taken by {\sc Select}

453: on any input of size $n$.  Since Step 3 makes at most $c_s$

454: comparisons with $s<n$, Step 4 needs at most $n-s$, and Step 7 takes

455: at most $c_{\hat n}$ with $\hat n<n$, by induction $c_n<\infty$ for

456: all $n$.

457: \end{remarks}

458: %

459: %   *** SECTION 3 ***

460: \section{Sampling deviations}

461: \label{s:sample}

462: In this section we analyze general features of sampling used by

463: {\sc Select}.

464: Our analysis hinges on the following bound on the tail of the

465: hypergeometric distribution established in \cite{hoe:pis} and

466: rederived shortly in \cite{chv:thd}.

467: %

468: %   *** FACT 3.1 ***

469: \begin{fact}

470: \label{f:balls3}

471: Let\/ $s$ balls be chosen uniformly at random from a set of\/ $n$ balls,

472: of which\/ $r$ are red, and\/ $r'$ be the random variable representing

473: the number of red balls drawn.  Let\/ $p:=r/n$.  Then

474: \begin{equation}

475: \Prob\left[\,r'\ge ps+g\,\right]\le e^{-2g^2\!/s}\quad\forall g\ge0.

476: \label{Pexpg}

477: \end{equation}

478: \end{fact}

479:

480: Denote by $x_1^*\le\ldots\le x_n^*$ and $y_1^*\le\ldots\le y_s^*$ the

481: sorted elements of the input set $X$ and the sample set $S$,

482: respectively, so that $v=y_{i_v}^*$.  The following result will give

483: bounds on the position of $v$ in the sorted input sequence.

484: %

485: %   *** LEMMA 3.2 ***

486: \begin{lemma}

487: \label{l:rankgen}

488: Suppose\/ $\bar\imath:=\max\{1,\min(\lceil\kappa s\rceil,s)\}$,

489: $\bar\jmath_l:=\max\{\lceil\kappa n-gn/s\rceil,1\}$, and\/

490: $\bar\jmath_r:=\min\{\lceil\kappa n+gn/s\rceil,n\}$, where\/

491: $-g<\kappa s\le s+g$, $1\le s\le n$ and $g\ge0$.  Then\/{\rm:}

492: %

493: \par\indent\rlap{\rm(a)}\hphantom{\rm(a)}

494: $\Prob[y_{\bar\imath}^*<x_{\bar\jmath_l}^*]\le e^{-2g^2\!/s}$ if\/

495: $\bar\imath\ge\lceil\kappa s\rceil$.

496: %

497: \par\indent\rlap{\rm(b)}\hphantom{\rm(a)}

498: $\Prob[x_{\bar\jmath_r}^*<y_{\bar\imath}^*]\le e^{-2g^2\!/s}$ if\/

499: $\bar\imath\le\lceil\kappa s\rceil$.

500: \end{lemma}

501: \begin{proof}

502: Note that $-g<\kappa s\le s+g$ implies that $\bar\jmath_l\le n$ and

503: $\bar\jmath_r\ge1$ are well-defined.

504:

505: (a) If $y_{\bar\imath}^*<x_{\bar\jmath_l}^*$, at least $\bar\imath$

506: samples satisfy $y_i\le x_r^*$, where

507: $r:=\max_{x_j^*<x_{\bar\jmath_l}^*}j$.

508: In the setting of Fact \ref{f:balls3}, we have $r$ red elements

509: $x_j\le x_r^*$, $ps=rs/n$ and $r'\ge\bar\imath$.  Now,

510: $1\le r\le\bar\jmath_l-1$ implies

511: $2\le\bar\jmath_l=\lceil\kappa n-gn/s\rceil<\kappa n-gn/s+1$,

512: so $-rs/n>-\kappa s+g$.  Hence

513: $\bar\imath-ps-g>\kappa s-\kappa s+g-g=0$, i.e., $r'>ps+g$.

514: Thus $\Prob[y_{\bar\imath}^*<x_{\bar\jmath_l}^*]\le e^{-2g^2\!/s}$

515: by \eqref{Pexpg}.

516:

517: (b) If $x_{\bar\jmath_r}^*<y_{\bar\imath}^*$, $s-\bar\imath+1$ samples

518: are at least $x_{\bar\jmath+1}^*$ with

519: $\bar\jmath:=\max_{x_j^*=x_{\bar\jmath_r}^*}j$.  Thus we have

520: $r:=n-\bar\jmath$ red elements $x_j\ge x_{\bar\jmath+1}^*$,

521: $ps=s-\bar\jmath s/n$ and $r'\ge s-\bar\imath+1$.  Since

522: $\bar\imath<\kappa s+1$ and

523: $n>\bar\jmath\ge\bar\jmath_r\ge\kappa n+gn/s$,

524: we get $s-\bar\imath+1-ps-g>\bar\jmath s/n-\kappa s-g\ge\kappa s+g-

525: \kappa s-g=0$.  Hence $r'>ps+g$ and

526: $\Prob[x_{\bar\jmath_r}^*<y_{\bar\imath}^*]\le

527: \Prob[r'\ge ps+g]\le e^{-2g^2\!/s}$ by \eqref{Pexpg}.

528: \qed

529: \end{proof}

530:

531: We now bound the position of $v$ relative to $x_k^*$, $x_{k_l}^*$ and

532: $x_{k_r}^*$, where

533: \begin{equation}

534: k_l:=\max\left\{\,\lceil k-2gn/s\rceil,1\,\right\}

535: \quad\mbox{and}\quad

536: k_r:=\min\left\{\,\lceil k+2gn/s\rceil,n\,\right\}.

537: \label{klkr3}

538: \end{equation}

539: %

540: %   *** COROLLARY 3.3 ***

541: \begin{corollary}

542: \label{c:rankdir3}

543: {\rm(a)}

544: $\Prob[v<x_k^*]\le e^{-2g^2\!/s}$ if\/ $i_v=\lceil ks/n+g\rceil$

545: and\/ $k<n/2$.

546: %

547: \par\indent\rlap{\rm(b)}\hphantom{\rm(a)}

548: $\Prob[x_{k_r}^*<v]\le e^{-2g^2\!/s}$

549: if\/ $k<n/2$.

550: %

551: \par\indent\rlap{\rm(c)}\hphantom{\rm(a)}

552: $\Prob[x_k^*<v]\le e^{-2g^2\!/s}$ if\/ $i_v=\lceil ks/n-g\rceil$

553: and\/ $k\ge n/2$.

554: %

555: \par\indent\rlap{\rm(d)}\hphantom{\rm(a)}

556: $\Prob[v<x_{k_l}^*]\le e^{-2g^2\!/s}$

557: if\/ $k\ge n/2$.

558: %

559: \par\indent\rlap{\rm(e)}\hphantom{\rm(a)}

560: If\/ $k<n/2$, then\/

561: $i_v\ne\lceil ks/n+g\rceil$ iff\/ $n<k+gn/s${\rm;}

562: similarly, if\/ $k\ge n/2$, then\/

563: $i_v\ne\lceil ks/n-g\rceil$ iff\/ $k\le gn/s$.

564: \end{corollary}

565: \begin{proof}

566: Use Lem.\ \ref{l:rankgen} with $\kappa s=ks/n+g$ for (a,b), and

567: $\kappa s=ks/n-g$ for (c,d).

568: \qed

569: \end{proof}

570: %

571: %   *** SECTION 4 ***

572: \section{Average case performance}

573: \label{s:average}

574: In this section we analyze the average performance of {\sc Select} for

575: various sample sizes.

576: %

577: %   *** SUBSECTION 4.1 ***

578: \subsection{Floyd-Rivest's samples}

579: \label{ss:FRsample}

580: %

581: For positive constants $\alpha$ and $\beta$, consider choosing

582: $s=s(n)$ and $g=g(n)$ as

583: \begin{equation}

584: s:=\min\left\{\lceil\alpha f(n)\rceil,n-1\right\}\ \mbox{and}\

585: g:=(\beta s\ln n)^{1/2}\ \mbox{with}\ f(n):=n^{2/3}\ln^{1/3}n.

586: \label{sgf}

587: \end{equation}

588: This form of $g$ gives a probability bound

589: $e^{-2g^2\!/s}=n^{-2\beta}$ for Cor.\ \ref{c:rankdir3}.

590: To get more feeling, suppose $\alpha=\beta=1$ and $s=f(n)$.

591: Let $\phi(n):=f(n)/n$.  Then $s/n=g/s=\phi(n)$ and it will be seen

592: that the recursive call reduces $n$ at least by the factor $4\phi(n)$

593: on average, i.e., $\phi(n)$ is a contraction factor; note that

594: $\phi(n)\approx2.4\%$ for $n=10^6$ (cf.\ Tab.\ \ref{tab:fnphin}).

595: %

596: %   *** TABLE 4.1 ***

597: \begin{table}

598: \caption{Sample size $f(n):=n^{2/3}\ln^{1/3}n$ and relative sample size

599: $\phi(n):=f(n)/n$.}

600: \label{tab:fnphin}

601: \footnotesize

602: \begin{center}

603: \begin{tabular}{ccccccccc}

604: \hline

605: \vphantom{$1^{2^3}$} % Need more vertical space!

606: $n$     & $10^3$ & $10^4$ & $10^5$ & $10^6$ & $5\cdot10^6$ & $10^7$

607:         & $5\cdot10^7$    & $10^8$ \\

608: \hline

609: $f(n)$  & 190.449& 972.953& 4864.76& 23995.0&       72287.1& 117248

610:         & 353885 & 568986 \\

611: $\phi(n)$

612:         & .190449& .097295& .048648& .023995&       .014557& .011725

613:         & .007078& .005690\\

614: \hline

615: \end{tabular}

616: \end{center}

617: \end{table}

618: %

619: %   *** THEOREM 4.1 ***

620: \begin{theorem}

621: \label{t:selFR}

622: Let\/ $C_{nk}$ denote the expected number of comparisons made by

623: {\sc Select} for $s$ and\/ $g$ chosen as in\/ \eqref{sgf} with\/

624: $\beta\ge1/6$.  There exists a positive constant\/ $\gamma$ such

625: that

626: \begin{equation}

627: C_{nk}\le n+\min\{\,k,n-k\,\}+\gamma f(n)\quad\forall1\le k\le n.

628: \label{CnkFR}

629: \end{equation}

630: \end{theorem}

631: \begin{proof}

632: We need a few preliminary facts.

633: The function $\phi(t):=f(t)/t=(\ln t/t)^{1/3}$ decreases to $0$ on

634: $[e,\infty)$, whereas $f(t)$ grows to infinity on $[2,\infty)$.

635: Let $\delta:=4(\beta/\alpha)^{1/2}$.

636: Pick $\bar n\ge3$ large enough so that

637: $e-1\le\alpha f(\bar n)\le\bar n-1$ and $e\le\delta f(\bar n)$.

638: Let $\bar\alpha:=\alpha+1/f(\bar n)$.

639: Then, by \eqref{sgf} and the monotonicity of $f$ and $\phi$, we have

640: for $n\ge\bar n$

641: \begin{equation}

642: s\le\bar\alpha f(n)\quad\mbox{and}\quad

643: f(s)\le\bar\alpha\phi(\bar\alpha f(\bar n))f(n),

644: \label{sfsFR}

645: \end{equation}

646: \begin{equation}

647: f(\lfloor\delta f(n)\rfloor)\le f(\delta f(n))\le

648: \delta\phi(\delta f(\bar n))f(n).

649: \label{flfloordeltaFR}

650: \end{equation}

651: For instance, the first inequality of \eqref{sfsFR} yields

652: $f(s)\le f(\bar\alpha f(n))$, whereas

653: $$

654: f(\bar\alpha f(n))=\bar\alpha\phi(\bar\alpha f(n))f(n)\le

655: \bar\alpha\phi(\bar\alpha f(\bar n))f(n).

656: $$

657: Also for $n\ge\bar n$,

658: we have $s=\lceil\alpha f(n)\rceil=\alpha f(n)+\epsilon$ with

659: $\epsilon\in[0,1)$ in \eqref{sgf}.  Writing $s=\tilde\alpha f(n)$ with

660: $\tilde\alpha:=\alpha+\epsilon/f(n)\in[\alpha,\bar\alpha)$, we deduce

661: from \eqref{sgf} that

662: \begin{equation}

663: gn/s=(\beta/\tilde\alpha)^{1/2}f(n)\le(\beta/\alpha)^{1/2}f(n).

664: \label{gnsboundFR}

665: \end{equation}

666: In particular, $4gn/s\le\delta f(n)$, since

667: $\delta:=4(\beta/\alpha)^{1/2}$.  Next, \eqref{sgf} implies

668: \begin{equation}

669: ne^{-2g^2\!/s}\le

670: n^{1-2\beta}=f(n)n^{1/3-2\beta}\ln^{-1/3}n.

671: \label{ne2g2sFR}

672: \end{equation}

673: Using the monotonicity of $f$ and $\phi$, increase $\bar n$ if necessary

674: to get for all $n\ge\bar n$

675: \begin{equation}

676: 2\bar\alpha\phi(\bar\alpha f(\bar n))+

677: \delta\phi(\delta f(\bar n))+2n^{-2\beta}+

678: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},

679: n^{-2\beta}\,\right\}\le0.95.

680: \label{0.95FR}

681: \end{equation}

682: By Rem.\ \ref{r:sel3}(c), there is $\gamma$ such that \eqref{CnkFR}

683: holds for all $n\le\bar n$; increasing $\gamma$ if necessary, and

684: using the monotonicity of $f$ and the assumption $\beta\ge1/6$,

685: we have for all $n\ge\bar n$

686: \begin{equation}

687: 2\bar\alpha+2\delta+5n^{1/3-2\beta}\ln^{-1/3}n+

688: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},

689: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}\le0.05\gamma.

690: \label{0.05FR}

691: \end{equation}

692:

693: Let $n'\ge\bar n$.  Assuming \eqref{CnkFR} holds for all $n\le n'$,

694: for induction let $n=n'+1$.

695:

696: We need to consider the following two cases in the first call of

697: {\sc Select}.

698:

699: {\em Left case\/}: $k<n/2$.

700: First, suppose the event

701: ${\cal E}_l:=\{x_k^*\le v\le x_{k_r}^*\}$ occurs.  By the rules of

702: Steps 4--6, we have $\hat X=L$ (from $x_k^*\le v$), $\hat k=k$ and

703: $\hat n:=|\hat X|\le k_r-1$ (from $v\le x_{k_r}^*$); since

704: $k_r<k+2gn/s+1$ by \eqref{klkr3}, we get the two (equivalent) bounds

705: \begin{equation}

706: \hat n<k+2gn/s\quad\mbox{and}\quad \hat n-\hat k<2gn/s.

707: \label{hatnleft}

708: \end{equation}

709: Note that if $i_v=\lceil ks/n+g\rceil$ then,

710: by Cor.\ \ref{c:rankdir3}(a,b), the Boole-Benferroni inequality and the

711: choice \eqref{sgf}, the complement ${\cal E}_l'$ of ${\cal E}_l$ has

712: $\Prob[{\cal E}_l']\le2e^{-2g^2\!/s}=2n^{-2\beta}$.

713: Second, if $i_v\ne\lceil ks/n+g\rceil$, then $n<k+gn/s$

714: (Cor.\ \ref{c:rankdir3}(e)) combined with $k<n/2$ gives $n<2gn/s$;

715: hence $\hat n-\hat k<\hat n<n<2gn/s$ implies \eqref{hatnleft}.  Since

716: also ${\cal E}_l$ implies \eqref{hatnleft}, we have

717: \begin{equation}

718: \Prob[{\cal A}_l']\le2n^{-2\beta}\quad\mbox{for}\quad

719: {\cal A}_l:=\left\{\,\hat n-\hat k<2gn/s\,\right\}.

720: \label{Al}

721: \end{equation}

722:

723: {\em Right case\/}: $k\ge n/2$.

724: First, suppose the event

725: ${\cal E}_r:=\{x_{k_l}^*\le v\le x_k^*\}$ occurs.  By the rules of

726: Steps 4--6, we have $\hat X=R$ (from $v\le x_k^*$),

727: $\hat n-\hat k=n-k$ and $\hat n:=|\hat X|\le n-k_l$ (from

728: $x_{k_l}^*\le v$); since $k_l\ge k-2gn/s$ by \eqref{klkr3}, we get

729: the two (equivalent) bounds

730: \begin{equation}

731: \hat n\le n-k+2gn/s\quad\mbox{and}\quad\hat k\le2gn/s,

732: \label{hatnright}

733: \end{equation}

734: using $\hat n-\hat k=n-k$.

735: If $i_v=\lceil ks/n-g\rceil$ then, by Cor.\ \ref{c:rankdir3}(c,d),

736: the complement ${\cal E}_r'$ of ${\cal E}_r$ has

737: $\Prob[{\cal E}_r']\le2e^{-2g^2\!/s}=2n^{-2\beta}$.

738: Second, if $i_v\ne\lceil ks/n-g\rceil$, then $k\le gn/s$

739: (Cor.\ \ref{c:rankdir3}(e)) combined with $k\ge n/2$ gives

740: $n\le2gn/s$; hence $\hat k\le\hat n<n\le2gn/s$ implies

741: \eqref{hatnright}.  Thus

742: \begin{equation}

743: \Prob[{\cal A}_r']\le2n^{-2\beta}\quad\mbox{for}\quad

744: {\cal A}_r:=\left\{\,\hat k\le2gn/s\,\right\}.

745: \label{Ar}

746: \end{equation}

747:

748: Since $k<n-k$ if $k<n/2$, $n-k\le k$ if $k\ge n/2$, \eqref{hatnleft} and

749: \eqref{hatnright} yield

750: \begin{equation}

751: \Prob[{\cal B}']\le2n^{-2\beta}\quad\mbox{for}\quad

752: {\cal B}:=\left\{\,\hat n\le\min\{\,k,n-k\,\}+2gn/s\,\right\}.

753: \label{B}

754: \end{equation}

755: Note that $\min\{k,n-k\}\le\lfloor n/2\rfloor\le n/2$; this relation

756: will be used implicitly below.

757:

758: For the recursive call of Step 7, let $\hat s$, $\hat g$ and

759: $\hat\imath_v$ denote the quantities generated as in \eqref{sgf} and

760: \eqref{iv} with $n$ and $k$ replaced by $\hat n$ and $\hat k$, let

761: $\hat v$ be the pivot found at Step 3, and let $\check X$, $\check n$

762: and $\check k$ correspond to $\hat X$, $\hat n$ and $\hat k$ at Step 7,

763: so that $\check n:=|\check X|<\hat n$.

764:

765: The cost of selecting $v$ and $\hat v$ at Step 3 may be estimated as

766: \begin{equation}

767: C_{si_v}+C_{\hat s\hat\imath_v}\le

768: 1.5s+\gamma f(s)+1.5\hat s+\gamma f(\hat s)\le 3s+2\gamma f(s),

769: \label{CsivFR}

770: \end{equation}

771: since $f$ is increasing and \eqref{CnkFR} holds for

772: $\hat s\le s\le n-1=n'$ (cf.\ \eqref{sgf}) from $\hat n<n$.

773:

774: Let $c:=n-s$ and $\hat c:=\hat n-\hat s$ denote the costs of Step 4

775: for the two calls.  Since $0\le\hat c<n$ and

776: $\Exp\hat c=\Exp[\hat c|{\cal B}]\Prob[{\cal B}]+

777: \Exp[\hat c|{\cal B}']\Prob[{\cal B}']\le

778: \Exp[\hat c|{\cal B}]+n\Prob[{\cal B}']$, by \eqref{B} we have

779: \begin{equation}

780: c+\Exp\hat c\le n-s+\min\{\,k,n-k\,\}+2gn/s+2n^{1-2\beta}.

781: \label{cEhatc}

782: \end{equation}

783:

784: Using \eqref{CnkFR} again with $\check n<n$,

785: the cost of finishing up at Step 7 is at most

786: \begin{equation}

787: \Exp C_{\check n\check k}\le

788: \Exp\left[\,1.5\check n+\gamma f(\check n)\,\right]=

789: 1.5\Exp \check n+\gamma\Exp f(\check n).

790: \label{ECcheckn}

791: \end{equation}

792: Thus we need suitable bounds for $\Exp\check n$ and $\Exp f(\check n)$,

793: which may be derived as follows.

794:

795: To generalize \eqref{B} to the recursive call, consider the events

796: \begin{equation}

797: \hat{\cal B}:=\left\{\,\check n\le\min\{\,\hat k,\hat n-\hat k\,\}+

798: 2\hat g\hat n/\hat s\,\right\}

799: \quad\mbox{and}\quad

800: {\cal C}:=\left\{\,\check n\le\lfloor\delta f(n)\rfloor\,\right\}.

801: \label{hatBC}

802: \end{equation}

803: By \eqref{Al} and \eqref{Ar}, $\hat{\cal B}\cap{\cal A}_l$ and

804: $\hat{\cal B}\cap{\cal A}_r$ imply ${\cal C}$, since

805: $2gn/s+2\hat g\hat n/\hat s\le\delta f(n)$ by \eqref{gnsboundFR} with

806: $\hat n<n$ and $\delta:=4(\beta/\alpha)^{1/2}$.  For the recursive

807: call, proceeding as in the derivation of \eqref{B} with $n$ replaced

808: by $\hat n=i$, $k$ by $\hat k$, etc., shows that, due to random

809: sampling,

810: \begin{equation}

811: \Prob[\hat{\cal B}'|{\cal A}_l,\hat n=i]\le2i^{-2\beta}

812: \quad\mbox{and}\quad

813: \Prob[\hat{\cal B}'|{\cal A}_r,\hat n=i]\le2i^{-2\beta}.

814: \label{PB'AlB'Ar}

815: \end{equation}

816:

817: In the left case of $k<n/2$, using $\check n<n$ and

818: $\Prob[{\cal A}_l']\le2n^{-2\beta}$ (cf.\ \eqref{Al}), we get

819: $$

820: \Exp\check n=\Exp[\check n|{\cal A}_l]\Prob[{\cal A}_l]+

821: \Exp[\check n|{\cal A}_l']\Prob[{\cal A}_l']\le

822: \Exp[\check n|{\cal A}_l]+n2n^{-2\beta}.

823: $$

824: Partitioning ${\cal A}_l$ into the events

825: ${\cal D}_i:={\cal A}_l\cap\{\hat n=i\}$, $i=0\colon n-1$

826: ($\hat n<n$ always), we have

827: $$

828: \Exp[\check n|{\cal A}_l]=\sum_{i=0}^{n-1}

829: \Exp[\check n|{\cal D}_i]\Prob[{\cal D}_i|{\cal A}_l]\le

830: \max_{i=0\colon n-1}\Exp[\check n|{\cal D}_i],

831: $$

832: where $\Exp[\check n|{\cal D}_i]\le\lfloor\delta f(n)\rfloor$ if

833: $i\le\lfloor\delta f(n)\rfloor+1$, because $\check n<\hat n$ always.

834: As for the remaining terms,

835: $\hat{\cal B}\cap{\cal A}_l\subset{\cal C}$ implies

836: $\Prob[{\cal C}'|{\cal D}_i]\le\Prob[\hat{\cal B}'|{\cal D}_i]\le

837: 2i^{-2\beta}$ by \eqref{PB'AlB'Ar}, where

838: ${\cal C}:=\{\check n\le\lfloor\delta f(n)\rfloor\}$

839: and $\check n<\hat n=i$ when the event ${\cal D}_i$ occurs, so

840: $\Exp[\check n|{\cal D}_i]\le\lfloor\delta f(n)\rfloor+i2i^{-2\beta}$.

841: Hence

842: $$

843: \max_{i=0\colon n-1}\Exp[\check n|{\cal D}_i]\le

844: \lfloor\delta f(n)\rfloor+

845: \max_{i=\lfloor\delta f(n)\rfloor+2\colon n-1}2i^{1-2\beta},

846: $$

847: where the final term is omitted if $\lfloor\delta f(n)\rfloor>n-3$;

848: otherwise it is at most

849: $$

850: 2\max\left\{\,(\lfloor\delta f(n)\rfloor+1)^{1-2\beta},

851: n^{1-2\beta}\,\right\}\le

852: 2\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},

853: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n),

854: $$

855: since $\max_{i=\lfloor\delta f(n)\rfloor+1\colon n}2i^{1-2\beta}$

856: is bounded as above (consider $\beta\ge1/2$, then $\beta<1/2$ and use

857: $\delta f(n)<\lfloor\delta f(n)\rfloor+1$, the monotonicity of $f$ and

858: \eqref{ne2g2sFR} for the final inequality).

859: Collecting the preceding estimates, we obtain

860: \begin{equation}

861: \Exp\check n\le\lfloor\delta f(n)\rfloor+2n^{1-2\beta}+

862: 2\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},

863: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n).

864: \label{Echeckn}

865: \end{equation}

866: Similarly, replacing $\check n$ by $f(\check n)$ in our derivations

867: and using the monotonicity of $f$ yields

868: \begin{subequations}

869: \label{Efcheckn}

870: \begin{equation}

871: \Exp f(\check n)\le f(\lfloor\delta f(n)\rfloor)+2f(n)n^{-2\beta}+

872: \max_{i=\lfloor\delta f(n)\rfloor+2\colon n-1}2f(i)i^{-2\beta},

873: \label{Efcheckn:a}

874: \end{equation}

875: where the final term is omitted if $\lfloor\delta f(n)\rfloor>n-3$;

876: otherwise it is at most

877: \begin{equation}

878: 2\max\left\{\,

879: \frac{f(\lfloor\delta f(n)\rfloor+1)}

880: {(\lfloor\delta f(n)\rfloor+1)^{2\beta}},

881: \frac{f(n)}{n^{2\beta}}\,\right\}\le

882: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},

883: n^{-2\beta}\,\right\}f(n).

884: \label{Efcheckn:b}

885: \end{equation}

886: \end{subequations}

887: To see this, use the monotonicity of $f$ and the fact that for $i\le n$

888: (cf.\ \eqref{sgf})

889: $$

890: f(i)i^{-2\beta}\!/f(n)=i^{2/3-2\beta}n^{-2/3}(\ln i/\ln n)^{1/3}\le

891: i^{2/3-2\beta}n^{-2/3}.

892: $$

893:

894: For the right case, replace ${\cal A}_l$ by ${\cal A}_r$ in the

895: preceding paragraph to get \eqref{Echeckn}--\eqref{Efcheckn}.

896:

897: Add the costs \eqref{CsivFR}, \eqref{cEhatc} and \eqref{ECcheckn},

898: using \eqref{Echeckn}--\eqref{Efcheckn}, to get

899: \begin{eqnarray*}

900: C_{nk}&\le&3s+2\gamma f(s)+n-s+\min\{\,k,n-k\,\}+2gn/s+2n^{1-2\beta}\\

901: &&{}+1.5\lfloor\delta f(n)\rfloor+3n^{1-2\beta}+

902: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},

903: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n)\\

904: &&{}+\gamma f(\lfloor\delta f(n)\rfloor)+

905: 2\gamma f(n)n^{-2\beta}+

906: 2\gamma\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},

907: n^{-2\beta}\,\right\}f(n).

908: \end{eqnarray*}

909: Now, using the bounds \eqref{sfsFR}--\eqref{flfloordeltaFR},

910: $2gn/s\le\frac12\delta f(n)$ (cf.\ \eqref{gnsboundFR}) and

911: \eqref{ne2g2sFR} gives

912: \begin{eqnarray*}

913: \lefteqn{C_{nk}\le n+\min\{\,k,n-k\,\}}\\

914: &&{}+\Big[2\bar\alpha+2\delta+5n^{1/3-2\beta}\ln^{-1/3}n+

915: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},

916: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}\Big]f(n)\\

917: &&{}+\left[2\bar\alpha\phi(\bar\alpha f(\bar n))+

918: \delta\phi(\delta f(\bar n))+2n^{-2\beta}+

919: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},

920: n^{-2\beta}\,\right\}\right]\gamma f(n).

921: \end{eqnarray*}

922: By \eqref{0.95FR}--\eqref{0.05FR}, the two bracketed terms above are

923: at most $0.05\gamma f(n)$ and $0.95\gamma f(n)$, respectively; thus

924: \eqref{CnkFR} holds as required.

925: \qed

926: \end{proof}

927: %

928: %   *** SUBSECTION 4.2 ***

929: \subsection{Other sampling strategies}

930: \label{ss:othersample}

931: %

932: We now indicate briefly how to adapt the proof of Thm \ref{t:selFR}

933: to several variations on \eqref{sgf}; a choice similar to

934: \eqref{sgfFRsn2/3} below was used in \cite{flri:asf}.

935: %

936: %   *** REMARKS 4.2 ***

937: \begin{remarks}

938: \label{r:selFR}

939: \rm

940: (a)

941: Theorem \ref{t:selFR} remains true for $\beta\ge1/6$ and

942: \eqref{sgf} replaced by

943: \begin{equation}

944: s:=\min\left\{\left\lceil\alpha n^{2/3}\right\rceil,n-1\right\},\

945: g:=(\beta s\ln n)^{1/2}\ \mbox{and}\

946: f(n):=n^{2/3}\ln^{1/2}n.

947: \label{sgfFRsn2/3}

948: \end{equation}

949: Indeed, using $e^{3/2}-1\le\alpha\bar n^{2/3}\le\bar n-1$,

950: $e^{3/2}\le\delta f(\bar n)$, $\bar\alpha:=\alpha+\bar n^{-2/3}$

951: and $s=\tilde\alpha n^{2/3}$ with $\tilde\alpha\in[\alpha,\bar\alpha)$

952: yields \eqref{sfsFR}--\eqref{gnsboundFR} as before, and $\ln^{-1/2}$

953: replaces $\ln^{-1/3}$ in \eqref{ne2g2sFR}, \eqref{0.05FR} and

954: \eqref{Echeckn}.

955: \par(b)

956: Theorem \ref{t:selFR} holds for the following modification of

957: \eqref{sgf} with $\epsilon_l>1$

958: \begin{equation}

959: s:=\min\left\{\lceil\alpha f(n)\rceil,n-1\right\}\ \mbox{and}\

960: g:=(\beta s\ln^{\epsilon_l}n)^{1/2}\ \mbox{with}\

961: f(n):=n^{2/3}\ln^{\epsilon_l/3}n.

962: \label{sgfFRlneps}

963: \end{equation}

964: First, using $e^{\epsilon_l}-1\le\alpha f(\bar n)\le\bar n-1$ and

965: $e^{\epsilon_l}\le\delta f(\bar n)$ gives

966: \eqref{sfsFR}--\eqref{gnsboundFR} as before.  Next, fix

967: $\tilde\beta\ge1/6$.  Let $\beta_n:=\beta\ln^{\epsilon_l-1}n$.

968: Increase $\bar n$ if necessary so that $\beta_i\ge\tilde\beta$ for

969: all $i\ge\min\{\bar n,\lceil\delta f(\bar n)\rceil\}$; then

970: replace $\beta$ by $\tilde\beta$ and $\ln^{-1/3}$ by

971: $\ln^{-\epsilon_l/3}$ in \eqref{ne2g2sFR} and below.

972: \par(c)

973: Several other replacements for \eqref{sgf} may be analyzed as in

974: \cite[\S\S4.1--4.2]{kiw:rsq}.

975: \par(d)

976: None of these choices gives $f(n)$ better than that in \eqref{sgf} for

977: the bound \eqref{CnkFR}.

978: \end{remarks}

979:

980: We now comment briefly on the possible use of sampling with

981: replacement.

982: %

983: %   *** REMARKS 4.3 ***

984: \begin{remarks}

985: \label{r:binsample}

986: \rm

987: (a)

988: Suppose Step 2 of {\sc Select} employs sampling with replacement.

989: Since the tail bound \eqref{Pexpg} remais valid for the binomial

990: distribution \cite{chv:thd,hoe:pis}, Lemma \ref{l:rankgen} is not

991: affected.  However, when Step 4 no longer skips comparisons with

992: the elements of $S$, $-s$ in \eqref{cEhatc} is replaced by $0$; the

993: resulting change in the bound on $C_{nk}$ only needs replacing

994: $2\bar\alpha$ in \eqref{0.05FR} by $3\bar\alpha$.  Hence the

995: preceding results remain valid.

996: \par(b)

997: Of course, sampling with replacement needs additional storage for

998: $S$.  However, the increase in both storage and the number of

999: comparisons may be tolerated because the sample sizes are relatively

1000: small.

1001: \end{remarks}

1002: %

1003: %   *** SUBSECTION 4.3 ***

1004: \subsection{Handling small subfiles}

1005: \label{ss:subfile}

1006: %

1007: Since the sampling efficiency decreases when $X$ shrinks, consider the

1008: following modification.  For a fixed cut-off parameter

1009: $n_{\rm cut}\ge1$, let sSelect$(X,k)$ be a ``small-select'' routine that

1010: finds the $k$th smallest element of $X$ in at most $C_{\rm cut}<\infty$

1011: comparisons when $|X|\le n_{\rm cut}$ (even bubble sort will do).  Then

1012: {\sc Select} is modified to start with the following

1013: \medbreak\noindent{\bf Step 0} ({\em Small file case\/}).

1014: If $n:=|X|\le n_{\rm cut}$, return sSelect$(X,k)$.

1015:

1016: Our preceding results remain valid for this modification.  In fact it

1017: suffices if $C_{\rm cut}$ bounds the {\em expected\/} number of

1018: comparisons of sSelect$(X,k)$ for $n\le n_{\rm cut}$.  For instance,

1019: \eqref{CnkFR} holds for $n\le n_{\rm cut}$ and $\gamma\ge C_{\rm cut}$,

1020: and by induction as in Rem.\ \ref{r:sel3}(c) we have $C_{nk}<\infty$

1021: for all $n$, which suffices for the proof of Thm \ref{t:selFR}.

1022:

1023: Another advantage is that even small $n_{\rm cut}$ ($1000$ say) limits

1024: nicely the stack space for recursion.  Specifically, the tail

1025: recursion of Step 7 is easily eliminated (set $X:=\hat X$, $k:=\hat k$

1026: and go to Step 0), and the calls of Step 3 deal with subsets whose

1027: sizes quickly reach $n_{\rm cut}$.  For example, for the choice of

1028: \eqref{sgf} with $\alpha=1$ and $n_{\rm cut}=600$, at most four

1029: recursive levels occur for $n\le2^{31}\approx2.15\cdot10^9$.

1030: %

1031: %   *** SECTION 5 ***

1032: \section{A modified version}

1033: \label{s:modmed}

1034: We now consider a modification inspired by a remark of

1035: \cite{bro:ra489}.  For $k$ close to $\lceil n/2\rceil$, by symmetry

1036: it is best to choose $v$ as the sample median with

1037: $i_v=\lceil s/2\rceil$, thus attempting to get $v$ close to $x_k^*$

1038: instead of $x_{\lceil k-gn/s\rceil}^*$ or $x_{\lceil k+gn/s\rceil}^*$;

1039: then more elements are eliminated.  Hence we may let

1040: \begin{equation}

1041: i_v:=\left\{\begin{array}{ll}

1042: \lceil ks/n+g\rceil&\mbox{if}\ k<n/2-gn/s,\\

1043: \lceil s/2\rceil&

1044: \mbox{if}\ n/2-gn/s\le k\le n/2+gn/s,\\

1045: \lceil ks/n-g\rceil&\mbox{if}\ k>n/2+gn/s.

1046: \end{array}\right.

1047: \label{iv3}

1048: \end{equation}

1049: Note that \eqref{iv3} coincides with \eqref{iv} in the {\em left\/} case

1050: of $k<n/2-gn/s$ and the {\em right\/} case of $k>n/2+gn/s$, but the

1051: {\em middle\/} case of $n/2-gn/s\le k\le n/2+gn/s$ fixes $i_v$

1052: at the median position $\lceil s/2\rceil$; in fact $i_v$ is the median

1053: of the three values in \eqref{iv3}:

1054: \begin{equation}

1055: i_v:=\max\left\{\,\min\left(\,\lceil ks/n+g\rceil,

1056: \lceil s/2\rceil\,\right),\lceil ks/n-g\rceil\,\right\}.

1057: \label{iv3med}

1058: \end{equation}

1059: Corollary \ref{c:rankdir3} remains valid for the left and right cases.

1060: For the middle case, letting

1061: \begin{equation}

1062: j_l:=\max\left\{\,\lceil n/2-gn/s\rceil,1\,\right\}

1063: \quad\mbox{and}\quad

1064: j_r:=\min\left\{\,\lceil n/2+gn/s\rceil,n\,\right\},

1065: \label{jljr3}

1066: \end{equation}

1067: we obtain from Lemma \ref{l:rankgen} with $\kappa=1/2$ the following

1068: complement of Corollary \ref{c:rankdir3}.

1069: %

1070: %   *** COROLLARY 5.1 ***

1071: \begin{corollary}

1072: \label{c:iv3}

1073: $\Prob[v<x_{j_l}^*]\le e^{-2g^2\!/s}$ and\/

1074: $\Prob[x_{j_r}^*<v]\le e^{-2g^2\!/s}$

1075: if\/ $n/2-gn/s\le k\le n/2+gn/s$.

1076: \end{corollary}

1077: %

1078: %   *** THEOREM 5.2 ***

1079: \begin{theorem}

1080: \label{t:selFRmed}

1081: Theorem\/ {\rm\ref{t:selFR}}

1082: holds for {\sc Select} with Step\/ $3$ using\/ \eqref{iv3}.

1083: \end{theorem}

1084: \begin{proof}

1085: We only indicate how to adapt the proof of Thm \ref{t:selFR} following

1086: \eqref{0.05FR}.  As noted after \eqref{iv3}, the left case now has

1087: $k<n/2-gn/s$ and the right case has $k>n/2+gn/s$, so we only need to

1088: discuss the middle case.

1089:

1090: {\em Middle case\/}:

1091: $n/2-gn/s\le k\le n/2+gn/s$.  Suppose

1092: the event ${\cal E}_m:=\{x_{j_l}^*\le v\le x_{j_r}^*\}$ occurs

1093: (note that $\Prob[{\cal E}_m']\le 2e^{-2g^2\!/s}=2n^{-2\beta}$

1094: by Cor.\ \ref{c:iv3}).

1095: If $\hat X=L$ then, by the rules of Steps 4--6, we have $\hat k=k$

1096: and $\hat n\le j_r-1$; since $j_r<n/2+gn/s+1$ by \eqref{jljr3}, we

1097: get $\hat n<n/2+gn/s$.  Hence $k\ge n/2-gn/s$ yields

1098: $\hat n<k+2gn/s$ and $\hat n-\hat k<2gn/s$ as in \eqref{hatnleft}.

1099: Next, if $\hat X=R$ then $\hat n-\hat k=n-k$ and $\hat k:=k-|L\cup E|$,

1100: so $L\cup E=\{x\in X:x\le v\}\ni x_{j_l}^*$ gives $\hat k\le k-j_l$.

1101: Since $k\le n/2+gn/s$ and $j_l\ge n/2-gn/s$ by \eqref{jljr3}, we get

1102: $\hat k\le2gn/s$ and $\hat n\le\hat n-\hat k+2gn/s$ as in

1103: \eqref{hatnright}; further, $\hat n\le n-j_l$ yields

1104: $\hat n\le n/2+gn/s$.  Noticing that $n/2-gn/s\le k\le n/2+gn/s$ implies

1105: $n/2\le\min\{k,n-k\}+gn/s$, we have

1106: $\hat n\le\min\{k,n-k\}+2gn/s$ in both cases.

1107:

1108: Thus in the middle case we again have \eqref{B} and hence

1109: \eqref{cEhatc}; further, by \eqref{Al} and \eqref{Ar}, the event

1110: ${\cal E}_m\subset{\cal A}_l\cup{\cal A}_r$ is partitioned into

1111: ${\cal E}_m\cap{\cal A}_l$ and

1112: ${\cal E}_m\cap{\cal A}_l'\cap{\cal A}_r$.

1113:

1114: Next, reasoning as before, we see that \eqref{PB'AlB'Ar} and hence

1115: \eqref{Echeckn}--\eqref{Efcheckn} remain valid in the left and right

1116: cases, whereas in the middle case we have

1117: \begin{equation}

1118: \Prob[\hat{\cal B}'|{\cal E}_m,{\cal A}_l,\hat n=i]\le2i^{-2\beta}

1119: \quad\mbox{and}\quad

1120: \Prob[\hat{\cal B}'|{\cal E}_m,{\cal A}_l',{\cal A}_r,\hat n=i]\le

1121: 2i^{-2\beta}.

1122: \label{PB'EmAl}

1123: \end{equation}

1124:

1125: In the middle case,

1126: $\Exp\check n=\Exp[\check n|{\cal E}_m]\Prob[{\cal E}_m]+

1127: \Exp[\check n|{\cal E}_m']\Prob[{\cal E}_m']$ is bounded by

1128: $\Exp[\check n|{\cal E}_m]+2n^{1-2\beta}$, since

1129: $\Prob[{\cal E}_m']\le2n^{-2\beta}$ and $\check n<n$ always.  Next,

1130: partitioning ${\cal E}_m$ into ${\cal E}_m\cap{\cal A}_l$ and

1131: ${\cal E}_m\cap{\cal A}_l'\cap{\cal A}_r$, we obtain

1132: $\Exp[\check n|{\cal E}_m]\le

1133: \max\{\Exp[\check n|{\cal E}_m,{\cal A}_l],

1134: \Exp[\check n|{\cal E}_m,{\cal A}_l',{\cal A}_r]\}$, where

1135: $\Exp[\check n|{\cal E}_m,{\cal A}_l]$ and

1136: $\Exp[\check n|{\cal E}_m,{\cal A}_l',{\cal A}_r]$ may be bounded like

1137: $\Exp[\check n|{\cal A}_l]$ and $\Exp[\check n|{\cal A}_r]$ in the left

1138: and right cases to get \eqref{Echeckn}.

1139: Then \eqref{Efcheckn} is obtained similarly, and the conclusion follows

1140: as before.

1141: \qed

1142: \end{proof}

1143: %

1144: %   *** SECTION 6 ***

1145: \section{Ternary partitions}

1146: \label{s:ternpart}

1147: In this section we discuss ways of implementing {\sc Select} when

1148: the input set is given as an array $x[1\colon n]$.  We employ the

1149: following notation.

1150:

1151: Each stage works with a segment $x[l\colon r]$ of the input array

1152: $x[1\colon n]$, where $1\le l\le r\le n$ are such that $x_i<x_l$ for

1153: $i=1\colon l-1$, $x_r<x_i$ for $i=r+1\colon n$, and the $k$th smallest

1154: element of $x[1\colon n]$ is the $(k-l+1)$th smallest element of

1155: $x[l\colon r]$.  The task of {\sc Select} is {\em extended\/}: given

1156: $x[l\colon r]$ and $l\le k\le r$,

1157: {\sc Select}$(x,l,r,k,k_-,k_+)$ permutes $x[l\colon r]$ and finds

1158: $l\le k_-\le k\le k_+\le r$

1159: such that $x_i<x_k$ for all $l\le i<k_-$, $x_i=x_k$ for all

1160: $k_-\le i\le k_+$, $x_i>x_k$ for all $k_+<i\le r$.  The initial call

1161: is {\sc Select}$(x,1,n,k,k_-,k_+)$.

1162:

1163: A vector swap denoted by $x[a\colon b]\leftrightarrow x[b+1\colon c]$

1164: means that the first $d:=\min(b+1-a,c-b)$ elements of array

1165: $x[a\colon c]$ are exchanged with its last $d$ elements in arbitrary

1166: order if $d>0$; e.g., we may exchange

1167: $x_{a+i}\leftrightarrow x_{c-i}$ for $0\le i<d$, or

1168: $x_{a+i}\leftrightarrow x_{c-d+1+i}$ for $0\le i<d$.

1169: %

1170: %   *** SUBSECTION 6.1 ***

1171: \subsection{Tripartitioning schemes}

1172: \label{ss:tripart}

1173: For a given pivot $v:=x_l$ from the array $x[l\colon r]$, the following

1174: {\em ternary\/} scheme \cite[\S5.1]{kiw:psq} partitions the array into

1175: three blocks, with $x_m<v$ for $l\le m<a$, $x_m=v$ for $a\le m\le b$,

1176: $x_m>v$ for $b<m\le r$.

1177: After comparing the pivot $v$ to $x_r$ to produce the initial setup

1178: \begin{equation}

1179: \begin{tabular}{llrlrlrr}

1180: \hline

1181: \multicolumn{1}{|c|}{$x=v$} &

1182: \multicolumn{2}{|c|}{$x<v$} &

1183: \multicolumn{2}{|c|}{?} &

1184: \multicolumn{2}{|c|}{$x>v$} &

1185: \multicolumn{1}{|c|}{$x=v$} \\

1186: \hline

1187: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!

1188: $l$ & $p$ & $i$ & & & $j$ & $q$ & $r$\\

1189: \end{tabular}

1190: \label{ternini}

1191: \end{equation}

1192: with $i:=l$ and $j:=r$,

1193: we work with the three inner blocks of the array

1194: \begin{equation}

1195: \begin{tabular}{lllrrr}

1196: \hline

1197: \multicolumn{1}{|c|}{$x=v$} &

1198: \multicolumn{1}{|c|}{$x<v$} &

1199: \multicolumn{2}{|c|}{?} &

1200: \multicolumn{1}{|c|}{$x>v$} &

1201: \multicolumn{1}{|c|}{$x=v$} \\

1202: \hline

1203: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!

1204: $l$ & $p$ & $i$ & $j$ & $q$ & $r$\\

1205: \end{tabular}\ ,

1206: \label{ternbeg}

1207: \end{equation}

1208: until the middle part is empty or just contains an element equal to the

1209: pivot

1210: \begin{equation}

1211: \begin{tabular}{llrclrr}

1212: \hline

1213: \multicolumn{1}{|c|}{$x=v$} &

1214: \multicolumn{2}{|c|}{$x<v$} &

1215: \multicolumn{1}{|c|}{$x=v$} &

1216: \multicolumn{2}{|c|}{$x>v$} &

1217: \multicolumn{1}{|c|}{$x=v$} \\

1218: \hline

1219: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!

1220: $l$ & $p$ & $j$ & & $i$ & $q$ & $r$ \\

1221: \end{tabular}

1222: \label{ternmid}

1223: \end{equation}

1224: (i.e., $j=i-1$ or $j=i-2$),

1225: then swap the ends into the middle for the final arrangement

1226: \begin{equation}

1227: \begin{tabular}{llrr}

1228: \hline

1229: \multicolumn{1}{|c|}{$x<v$} &

1230: \multicolumn{2}{|c|}{$x=v$} &

1231: \multicolumn{1}{|c|}{$x>v$} \\

1232: \hline

1233: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!

1234: $l$ & $a$ & $b$ & $r$\\

1235: \end{tabular}\ .

1236: \label{ternend}

1237: \end{equation}

1238: %

1239: %   *** SCHEME A ***

1240: \begin{scheme}[Safeguarded ternary partition]

1241: \label{sts}

1242: \rm

1243: \begin{description}

1244: \itemsep0pt

1245: \item[]

1246: \item[\ref{sts}1.] [Initialize.]

1247: Set $i:=l$, $p:=i+1$, $j:=r$ and $q:=j-1$.

1248: If $v>x_j$, exchange $x_i\leftrightarrow x_j$ and set $p:=i$;

1249: else if $v<x_j$, set $q:=j$.

1250: \item[\ref{sts}2.] [Increase $i$ until $x_i\ge v$.]

1251: Increase $i$ by $1$; then if $x_i<v$, repeat this step.

1252: \item[\ref{sts}3.] [Decrease $j$ until $x_j\le v$.]

1253: Decrease $j$ by $1$; then if $x_j>v$, repeat this step.

1254: \item[\ref{sts}4.] [Exchange.]

1255: (Here $x_j\le v\le x_i$.)

1256: If $i<j$, exchange $x_i\leftrightarrow x_j$; then

1257: if $x_i=v$, exchange $x_i\leftrightarrow x_p$ and increase $p$ by $1$;

1258: if $x_j=v$, exchange $x_j\leftrightarrow x_q$ and decrease $q$ by $1$;

1259: return to \ref{sts}2.

1260: If $i=j$ (so that $x_i=x_j=v$), increase $i$ by $1$ and

1261: decrease $j$ by $1$.

1262: \item[\ref{sts}5.] [Cleanup.]

1263: Set $a:=l+j-p+1$ and $b:=r-q+i-1$.

1264: Exchange $x[l\colon p-1]\leftrightarrow x[p\colon j]$ and

1265: $x[i\colon q]\leftrightarrow x[q+1\colon r]$.

1266: \end{description}

1267: \end{scheme}

1268:

1269: Step \ref{sts}1 ensures that $x_l\le v\le x_r$, so steps \ref{sts}2 and

1270: \ref{sts}3 don't need to test whether $i\le j$.  This scheme makes two

1271: extraneous comparisons (only one when $i=j$ at \ref{sts}4).  Spurious

1272: comparisons are avoided in the following modification

1273: \cite[\S5.3]{kiw:psq} of the scheme of \cite{bemc:esf}

1274: (cf.\ \cite[Ex.\ 5.2.2--41]{knu:acpIII2}),

1275: for which $i=j+1$ in \eqref{ternmid}.

1276: %

1277: %   *** SCHEME B ***

1278: \begin{scheme}[Double-index controlled ternary partition]

1279: \label{stind2}

1280: \rm

1281: \begin{description}

1282: \itemsep0pt

1283: \item[]

1284: \item[\ref{stind2}1.] [Initialize.]

1285: Set $i:=p:=l+1$ and $j:=q:=r$.

1286: \item[\ref{stind2}2.] [Increase $i$ until $x_i>v$.]

1287: If $i\le j$ and $x_i<v$, increase $i$ by $1$ and repeat this step.

1288: If $i\le j$ and $x_i=v$, exchange $x_p\leftrightarrow x_i$, increase

1289: $p$ and $i$ by $1$, and repeat this step.

1290: \item[\ref{stind2}3.] [Decrease $j$ until $x_j<v$.]

1291: If $i<j$ and $x_j>v$, decrease $j$ by $1$ and repeat this step.

1292: If $i<j$ and $x_j=v$, exchange $x_j\leftrightarrow x_q$, decrease

1293: $j$ and $q$ by $1$, and repeat this step.

1294: If $i\ge j$, set $j:=i-1$ and go to \ref{stind2}5.

1295: \item[\ref{stind2}4.] [Exchange.]

1296: Exchange $x_i\leftrightarrow x_j$, increase $i$ by $1$,

1297: decrease $j$ by $1$, and return to \ref{stind2}2.

1298: \item[\ref{stind2}5.] [Cleanup.]

1299: Set $a:=l+i-p$ and $b:=r-q+j$.

1300: Swap $x[l\colon p-1]\leftrightarrow x[p\colon j]$ and

1301: $x[i\colon q]\leftrightarrow x[q+1\colon r]$.

1302: \end{description}

1303: \end{scheme}

1304: %

1305: %   *** SUBSECTION 6.2 ***

1306: \subsection{Preparing for ternary partitions}

1307: \label{ss:preptern}

1308: At Step 1, $r-l+1$ replaces $n$ in finding $s$ and $g$.

1309: At Step 2, it is convenient to place the sample in the initial part of

1310: $x[l\colon r]$ by exchanging $x_i\leftrightarrow x_{i+{\rm rand}(r-i)}$

1311: for $l\le i\le r_s:=l+s-1$, where ${\rm rand}(r-i)$ denotes a random

1312: integer, uniformly distributed between $0$ and $r-i$.

1313:

1314: Step 3 uses $i:=k-l+1$ and $m:=r-l+1$ instead of $k$ and $n$

1315: to find the pivot position

1316: \begin{equation}

1317: k_v:=\left\{\begin{array}{ll}

1318: \rlap{$\min$}\phantom{\max}

1319: \left\{\,\lceil l-1+is/m+g\rceil,r_s\,\right\}&

1320: \mbox{if}\ i<m/2,\\

1321: \max\left\{\,\lceil l-1+is/m-g\rceil,l\,\right\}&

1322: \mbox{if}\ i\ge m/2,

1323: \end{array}\right.

1324: \label{kv}

1325: \end{equation}

1326: so that the recursive call of {\sc Select}$(x,l,r_s,k_v,k_v^-,k_v^+)$

1327: produces $v:=x_{k_v}$.

1328:

1329: After $v$ has been found, our array looks as follows

1330: \begin{equation}

1331: \begin{tabular}{llrrccr}

1332: \hline

1333: \multicolumn{1}{|c|}{$x<v$} &

1334: \multicolumn{2}{|c|}{$x=v$} &

1335: \multicolumn{1}{|c|}{$x>v$} &

1336: \multicolumn{2}{|c|}{?}\\

1337: \hline

1338: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!

1339: $l$ & $k_v^-$ & $k_v^+$ & $r_s$ & & $r$\\

1340: \end{tabular}\ .

1341: \label{partrec}

1342: \end{equation}

1343: Setting $\bar l:=k_v^-$ and $\bar r:=r-r_s+k_v^+$, we swap

1344: $x[k_v^++1\colon r_s]\leftrightarrow x[r_s+1\colon r]$ in

1345: \eqref{partrec} to get

1346: \begin{equation}

1347: \begin{tabular}{llrlrr}

1348: \hline

1349: \multicolumn{1}{|c|}{$x<v$} &

1350: \multicolumn{2}{|c|}{$x=v$} &

1351: \multicolumn{2}{|c|}{?} &

1352: \multicolumn{1}{|c|}{$x>v$} \\

1353: \hline

1354: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!

1355: $l$ & $\bar l$ & $k_v^+$ & & $\bar r$ & $r$\\

1356: \end{tabular}\ .

1357: \label{partini}

1358: \end{equation}

1359: If $k_v^+=r_s$, we use scheme \ref{sts} with $l$ replaced by $k_v^+$

1360: in \ref{sts}1 (cf.\ \eqref{ternini}) and by $\bar l$ in \ref{sts}5

1361: (cf.\ \eqref{ternmid}); for $k_v^+<r_s$, we set

1362: $i:=k_v^+$, $p:=i+1$, $j:=\bar r+1$, $q:=\bar r$, omit \ref{sts}1

1363: and replace $l$, $r$ by $\bar l$, $\bar r$ in \ref{sts}5.

1364: Similarly, for scheme \ref{stind2}, we replace $l$, $r$ by

1365: $k_v^+$, $\bar r$ in \ref{stind2}1, and by $\bar l$, $\bar r$ in

1366: \ref{stind2}5.

1367:

1368: After partitioning $l$ and $r$ are updated by setting $l:=b+1$ if

1369: $a\le k$, $r:=a-1$ if $k\le b$.  If $l\ge r$,

1370: {\sc Select} may return $k_-:=k_+:=k$ if $l=r$, $k_-:=r+1$ and

1371: $k_+:=l-1$ if $l>r$.  Otherwise, instead of calling {\sc Select}

1372: recursively, Step 6 may jump back to Step 1, or to Step 0 if sSelect

1373: is used (cf.\ \S\ref{ss:subfile}).

1374:

1375: A simple version of sSelect is obtained if Steps 2 and 3 choose $v:=x_k$

1376: when $r-l+1\le n_{\rm cut}$ (this choice of \cite{flri:asf} works well

1377: in practice, but more sophisticated pivots could be tried); then the

1378: ternary partitioning code can be used by sSelect as well.

1379: %

1380: %   *** SECTION 7 ***

1381: \section{Experimental results}

1382: \label{s:exp}

1383: %

1384: %   *** SUBSECTION 7.1 ***

1385: \subsection{Implemented algorithms}

1386: \label{ss:impl}

1387: %

1388: An implementation of {\sc Select} was programmed in Fortran 77 and

1389: run on a notebook PC (Pentium 4M 2 GHz, 768 MB RAM) under MS

1390: Windows XP.  The input set $X$ was specified as a double precision

1391: array.  For efficiency, the recursion was removed and small arrays with

1392: $n\le n_{\rm cut}$ were handled as if Steps 2 and 3 chose $v:=x_k$;

1393: the resulting version of sSelect (cf.\ \S\S\ref{ss:subfile} and

1394: \ref{ss:preptern}) typically required less than $3.5n$ comparisons.

1395: The choice of \eqref{sgfFRsn2/3} was employed, with the parameters

1396: $\alpha=0.5$, $\beta=0.25$ and $n_{\rm cut}=600$ as proposed in

1397: \cite{flri:asf}; future work should test other sample sizes and

1398: parameters.

1399: %

1400: %   *** SUBSECTION 7.2 ***

1401: \subsection{Testing examples}

1402: \label{ss:examp}

1403: %

1404: As in \cite{kiw:rsq}, we used minor modifications of the input sequences

1405: of \cite{val:iss}:

1406: \begin{description}

1407: \itemsep0pt

1408: \item[random]

1409: A random permutation of the integers $1$ through $n$.

1410: \item[onezero]

1411: A random permutation of $\lceil n/2\rceil$ ones and $\lfloor n/2\rfloor$

1412: zeros.

1413: \item[sorted]

1414: The integers $1$ through $n$ in increasing order.

1415: \item[rotated]

1416: A sorted sequence rotated left once; i.e., $(2,3,\ldots,n,1)$.

1417: \item[organpipe]

1418: %The integers $1$ through $n/2$ in increasing order, followed by $n/2$

1419: %through $1$ in decreasing order.

1420: The integers $(1,2,\ldots,n/2,n/2,\ldots,2,1)$.

1421: \item[m3killer]

1422: Musser's ``median-of-3 killer'' sequence with $n=4j$ and $k=n/2$:

1423: $$

1424: \left(\begin{array}{ccccccccccccc}

1425: 1&  2 & 3&  4 & \ldots&  k-2& k-1& k& k+1& \ldots& 2k-2& 2k-1& 2k\\

1426: 1& k+1& 3& k+3& \ldots& 2k-3& k-1& 2&  4 & \ldots& 2k-2& 2k-1& 2k

1427: \end{array}\right).

1428: $$

1429: \item[twofaced]

1430: Obtained by randomly permuting the

1431: elements of an m3killer sequence in positions $4\lfloor\log_2n\rfloor$

1432: through $n/2-1$ and $n/2+4\lfloor\log_2n\rfloor-1$ through $n-2$.

1433: \end{description}

1434: For each input sequence, its (lower) median element was selected

1435: for $k:=\lceil n/2\rceil$.

1436: %

1437: %   *** SUBSECTION 7.3 ***

1438: \subsection{Computational results}

1439: \label{ss:result}

1440: %

1441: We varied the input size $n$ from $50{,}000$ to $16{,}000{,}000$.  For

1442: the random, onezero and twofaced sequences, for each input size,

1443: 20 instances were randomly generated; for the deterministic

1444: sequences, 20 runs were made to measure the solution time.

1445:

1446: The performance of {\sc Select} on randomly generated inputs is

1447: summarized in Table \ref{tab:Selrand},

1448: %

1449: %   *** TABLE 7.1 ***

1450: \begin{table}[t!]

1451: \caption{Performance of {\sc Select} on randomly generated inputs.}

1452: \label{tab:Selrand}

1453: \footnotesize

1454: \begin{center}

1455: \begin{tabular}{lrrrrrrrrrrrrr}

1456: \hline

1457: Sequence &\multicolumn{1}{c}{Size}

1458: &\multicolumn{3}{c}{Time $[{\rm msec}]$%

1459: \vphantom{$1^{2^3}$}} % Need more vertical space!

1460: &\multicolumn{3}{c}{Comparisons $[n]$}

1461: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}

1462: &\multicolumn{1}{c}{$L_{\rm avg}$}

1463: &\multicolumn{1}{c}{$P_{\rm avg}$}

1464: &\multicolumn{1}{c}{$N_{\rm avg}$}

1465: &\multicolumn{1}{c}{$p_{\rm avg}$}

1466: &\multicolumn{1}{c}{$s_{\rm avg}$}\\

1467: &\multicolumn{1}{c}{$n$}

1468: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1469: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1470: & &\multicolumn{1}{c}{$[n]$}

1471: &\multicolumn{1}{c}{$[\ln n]$}

1472: &\multicolumn{1}{c}{$[\ln n]$} &

1473: &\multicolumn{1}{c}{$[\%n]$}\\

1474: \hline

1475: %dsel20/dsel20x alpha=0.5 beta=0.25 cutoff=600

1476: random     &  50K

1477: &    2&   10&    0& 1.66& 1.77& 1.61& 1.74& 1.65& 0.46& 0.55& 8.33& 2.59\\

1478:            & 100K

1479: &    3&   10&    0& 1.63& 1.71& 1.55& 1.76& 1.63& 0.60& 0.69& 7.58& 2.12\\

1480:            & 500K

1481: &   13&   20&   10& 1.56& 1.61& 1.54& 1.36& 1.56& 0.67& 0.74& 8.05& 1.19\\

1482:            &   1M

1483: &   23&   30&   20& 1.52& 1.58& 1.00& 0.55& 1.52& 0.66& 0.73& 8.32& 0.91\\

1484:            &   2M

1485: &   46&   51&   40& 1.54& 1.56& 1.52& 1.22& 1.54& 0.75& 0.82& 8.38& 0.72\\

1486:            &   4M

1487: &   88&   91&   80& 1.53& 1.55& 1.52& 1.18& 1.53& 0.86& 0.92& 8.22& 0.57\\

1488:            &   8M

1489: &  172&  181&  160& 1.52& 1.53& 1.51& 1.13& 1.52& 0.92& 0.98& 8.54& 0.44\\

1490:            &  16M

1491: &  336&  341&  320& 1.52& 1.53& 1.51& 1.06& 1.52& 0.95& 1.01& 8.41& 0.35\\

1492: onezero    &  50K

1493: &    2&   10&    0& 1.28& 1.51& 1.00& 0.00& 1.28& 0.24& 0.18& 1.26& 1.91\\

1494:            & 100K

1495: &    3&   10&    0& 1.25& 1.51& 1.00& 0.00& 1.25& 0.26& 0.15& 1.20& 1.49\\

1496:            & 500K

1497: &   15&   20&   10& 1.33& 1.50& 1.00& 0.00& 1.33& 0.29& 0.17& 1.34& 0.93\\

1498:            &   1M

1499: &   30&   41&   20& 1.33& 1.50& 1.00& 0.00& 1.33& 0.27& 0.15& 1.20& 0.73\\

1500:            &   2M

1501: &   60&   71&   41& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\

1502:            &   4M

1503: &  109&  131&   90& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\

1504:            &   8M

1505: &  219&  261&  190& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\

1506:            &  16M

1507: &  436&  501&  370& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\

1508: twofaced   &  50K

1509: &    1&   10&    0& 1.67& 1.77& 1.59& 1.87& 1.67& 0.47& 0.56& 8.24& 2.63\\

1510:            & 100K

1511: &    3&   11&    0& 1.62& 1.73& 1.56& 1.67& 1.62& 0.60& 0.69& 7.61& 2.11\\

1512:            & 500K

1513: &   12&   20&   10& 1.56& 1.59& 1.53& 1.23& 1.56& 0.63& 0.71& 8.33& 1.18\\

1514:            &   1M

1515: &   24&   31&   20& 1.55& 1.57& 1.53& 1.23& 1.55& 0.69& 0.76& 8.22& 0.92\\

1516:            &   2M

1517: &   45&   51&   40& 1.54& 1.57& 1.52& 1.23& 1.54& 0.78& 0.85& 8.36& 0.73\\

1518:            &   4M

1519: &   88&   91&   80& 1.53& 1.54& 1.52& 1.17& 1.53& 0.88& 0.94& 8.05& 0.57\\

1520:            &   8M

1521: &  170&  180&  160& 1.52& 1.53& 1.51& 1.12& 1.52& 0.90& 0.97& 8.51& 0.44\\

1522:            &  16M

1523: &  332&  341&  320& 1.52& 1.53& 1.51& 1.04& 1.52& 0.96& 1.02& 8.55& 0.35\\

1524: \hline

1525: \end{tabular}

1526: \end{center}

1527: \end{table}

1528: %

1529: where the average, maximum and minimum solution times are in

1530: milliseconds, and the comparison counts are in multiples of $n$; e.g.,

1531: column six gives $C_{\rm avg}/n$, where $C_{\rm avg}$ is the average

1532: number of comparisons made over all instances.  Thus

1533: $\gamma_{\rm avg}:=(C_{\rm avg}-1.5n)_+/f(n)$ estimates the constant

1534: $\gamma$ in the bound \eqref{CnkFR}; moreover, we have

1535: $C_{\rm avg}\approx L_{\rm avg}$, where $L_{\rm avg}$ is the average

1536: sum of sizes of partitioned arrays.  Further,

1537: $P_{\rm avg}$ is the average number of {\sc Select} partitions, whereas

1538: $N_{\rm avg}$ is the average number of calls to sSelect and

1539: $p_{\rm avg}$ is the average number of sSelect partitions per call;

1540: both $P_{\rm avg}$ and $N_{\rm avg}$ grow slowly with $\ln n$

1541: (linearly on the onezero inputs).

1542: Finally, $s_{\rm avg}$ is the average sum of sample sizes;

1543: $s_{\rm avg}/n^{2/3}$ drops from $0.95$ for $n=50{\rm K}$ to $0.88$ for

1544: $n=16{\rm M}$ on the random and twofaced inputs, and oscillates about

1545: $0.7$ on the onezero inputs, whereas the initial

1546: $s/n^{2/3}\approx\alpha=0.5$.

1547: The results for the random and twofaced sequences are very similar:

1548: the average solution times grow linearly with $n$ (except for small

1549: inputs whose solution times couldn't be measured accurately), and the

1550: differences between maximum and minimum times are quite small (and also

1551: partly due to the operating system).  Except for the smallest inputs,

1552: the maximum and minimum numbers of comparisons are quite close, and

1553: $C_{\rm avg}$ nicely approaches the theoretical lower bound of $1.5n$;

1554: this is reflected in the values of $\gamma_{\rm avg}$.  The results for

1555: the onezero inputs essentially average two cases: the first pass

1556: eliminates either almost all or about half of the elements.

1557:

1558: Table \ref{tab:Seldet} exhibits similar features of {\sc Select} on

1559: the deterministic inputs.

1560: %

1561: %   *** TABLE 7.2 ***

1562: \begin{table}[t!]

1563: \caption{Performance of {\sc Select} on deterministic inputs.}

1564: \label{tab:Seldet}

1565: \footnotesize

1566: \begin{center}

1567: \tabcolsep=0.98\tabcolsep

1568: \begin{tabular}{lrrrrrrrrrrrrr}

1569: \hline

1570: Sequence &\multicolumn{1}{c}{Size}

1571: &\multicolumn{3}{c}{Time $[{\rm msec}]$%

1572: \vphantom{$1^{2^3}$}} % Need more vertical space!

1573: &\multicolumn{3}{c}{Comparisons $[n]$}

1574: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}

1575: &\multicolumn{1}{c}{$L_{\rm avg}$}

1576: &\multicolumn{1}{c}{$P_{\rm avg}$}

1577: &\multicolumn{1}{c}{$N_{\rm avg}$}

1578: &\multicolumn{1}{c}{$p_{\rm avg}$}

1579: &\multicolumn{1}{c}{$s_{\rm avg}$}\\

1580: &\multicolumn{1}{c}{$n$}

1581: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1582: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1583: & &\multicolumn{1}{c}{$[n]$}

1584: &\multicolumn{1}{c}{$[\ln n]$}

1585: &\multicolumn{1}{c}{$[\ln n]$} &

1586: &\multicolumn{1}{c}{$[\%n]$}\\

1587: \hline

1588: %dsel10o/dsel10ox alpha=0.5 beta=0.25 cutoff=600

1589: sorted     &  50K

1590: &    1&   10&    0& 1.67& 1.76& 1.59& 1.85& 1.66& 0.48& 0.57& 7.24& 2.65\\

1591:            & 100K

1592: &    2&   10&    0& 1.62& 1.69& 1.55& 1.70& 1.62& 0.60& 0.69& 6.76& 2.12\\

1593:            & 500K

1594: &    8&   10&    0& 1.56& 1.62& 1.53& 1.35& 1.56& 0.67& 0.74& 7.52& 1.19\\

1595:            &   1M

1596: &   15&   20&   10& 1.54& 1.58& 1.53& 1.19& 1.54& 0.68& 0.75& 7.87& 0.92\\

1597:            &   2M

1598: &   27&   31&   20& 1.54& 1.56& 1.52& 1.23& 1.54& 0.74& 0.81& 7.61& 0.73\\

1599:            &   4M

1600: &   51&   61&   40& 1.53& 1.55& 1.52& 1.19& 1.53& 0.87& 0.93& 7.34& 0.57\\

1601:            &   8M

1602: &   98&  111&   90& 1.52& 1.53& 1.51& 1.10& 1.52& 0.89& 0.95& 8.03& 0.44\\

1603:            &  16M

1604: &  186&  200&  170& 1.52& 1.52& 1.51& 1.04& 1.52& 0.95& 1.01& 7.99& 0.35\\

1605: rotated    &  50K

1606: &    1&   10&    0& 1.67& 1.78& 1.59& 1.86& 1.66& 0.48& 0.57& 9.45& 2.64\\

1607:            & 100K

1608: &    2&   10&    0& 1.63& 1.73& 1.58& 1.76& 1.63& 0.61& 0.69& 9.12& 2.12\\

1609:            & 500K

1610: &    8&   10&    0& 1.56& 1.62& 1.54& 1.39& 1.56& 0.65& 0.73&10.03& 1.18\\

1611:            &   1M

1612: &   15&   20&   10& 1.55& 1.58& 1.53& 1.29& 1.55& 0.69& 0.76& 9.56& 0.92\\

1613:            &   2M

1614: &   27&   31&   20& 1.54& 1.55& 1.52& 1.19& 1.54& 0.78& 0.84& 8.69& 0.72\\

1615:            &   4M

1616: &   51&   60&   50& 1.53& 1.54& 1.52& 1.18& 1.53& 0.87& 0.94& 8.92& 0.57\\

1617:            &   8M

1618: &   98&  111&   90& 1.52& 1.53& 1.51& 1.12& 1.52& 0.89& 0.96& 9.29& 0.44\\

1619:            &  16M

1620: &  185&  210&  170& 1.52& 1.53& 1.51& 1.04& 1.52& 0.93& 0.99& 8.96& 0.35\\

1621: organpipe  &  50K

1622: &    1&   10&    0& 1.67& 1.78& 1.59& 1.94& 1.67& 0.45& 0.55& 8.21& 2.62\\

1623:            & 100K

1624: &    3&   10&    0& 1.62& 1.69& 1.57& 1.68& 1.62& 0.60& 0.69& 7.61& 2.11\\

1625:            & 500K

1626: &   10&   10&   10& 1.57& 1.60& 1.54& 1.43& 1.56& 0.67& 0.75& 8.18& 1.19\\

1627:            &   1M

1628: &   20&   20&   10& 1.55& 1.58& 1.52& 1.24& 1.55& 0.70& 0.77& 8.21& 0.93\\

1629:            &   2M

1630: &   37&   41&   30& 1.53& 1.55& 1.52& 1.15& 1.53& 0.78& 0.85& 8.48& 0.72\\

1631:            &   4M

1632: &   68&   80&   60& 1.53& 1.54& 1.52& 1.13& 1.53& 0.84& 0.91& 8.21& 0.57\\

1633:            &   8M

1634: &  130&  150&  120& 1.52& 1.54& 1.51& 1.07& 1.52& 0.88& 0.94& 8.64& 0.44\\

1635:            &  16M

1636: &  240&  260&  230& 1.52& 1.53& 1.51& 1.02& 1.52& 0.94& 1.00& 8.44& 0.35\\

1637: m3killer   &  50K

1638: &    1&   10&    0& 1.67& 1.76& 1.60& 1.89& 1.67& 0.47& 0.55& 8.82& 2.62\\

1639:            & 100K

1640: &    4&   10&    0& 1.63& 1.71& 1.57& 1.80& 1.63& 0.60& 0.69& 7.69& 2.13\\

1641:            & 500K

1642: &   11&   20&   10& 1.57& 1.62& 1.53& 1.44& 1.57& 0.66& 0.73& 8.61& 1.19\\

1643:            &   1M

1644: &   20&   20&   20& 1.55& 1.59& 1.52& 1.40& 1.55& 0.72& 0.79& 8.33& 0.93\\

1645:            &   2M

1646: &   38&   41&   30& 1.54& 1.56& 1.52& 1.25& 1.54& 0.78& 0.85& 8.30& 0.73\\

1647:            &   4M

1648: &   73&   81&   70& 1.53& 1.54& 1.52& 1.28& 1.53& 0.87& 0.94& 8.22& 0.57\\

1649:            &   8M

1650: &  137&  150&  130& 1.52& 1.53& 1.51& 1.05& 1.52& 0.91& 0.97& 8.37& 0.44\\

1651:            &  16M

1652: &  248&  260&  230& 1.52& 1.52& 1.51& 0.96& 1.52& 0.92& 0.97& 8.42& 0.35\\

1653: \hline

1654: \end{tabular}

1655: \end{center}

1656: \end{table}

1657: %

1658: The results for the sorted and rotated sequences are very similar,

1659: whereas the solution times on the organpipe and m3killer sequences

1660: are between those for the sorted and random sequences.

1661:

1662: The results of Tabs.\ \ref{tab:Selrand}--\ref{tab:Seldet} were obtained

1663: with scheme \ref{sts} of \S\ref{ss:preptern}; to save space,

1664: Table \ref{tab:SelpartB} gives only selected results for scheme

1665: \ref{stind2},

1666: %

1667: %   *** TABLE 7.3 ***

1668: \begin{table}[t!]

1669: \caption{Performance of {\sc Select} with ternary scheme \ref{stind2}.}

1670: \label{tab:SelpartB}

1671: \footnotesize

1672: \begin{center}

1673: \begin{tabular}{lrrrrrrrrrrrrr}

1674: \hline

1675: Sequence &\multicolumn{1}{c}{Size}

1676: &\multicolumn{3}{c}{Time $[{\rm msec}]$%

1677: \vphantom{$1^{2^3}$}} % Need more vertical space!

1678: &\multicolumn{3}{c}{Comparisons $[n]$}

1679: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}

1680: &\multicolumn{1}{c}{$L_{\rm avg}$}

1681: &\multicolumn{1}{c}{$P_{\rm avg}$}

1682: &\multicolumn{1}{c}{$N_{\rm avg}$}

1683: &\multicolumn{1}{c}{$p_{\rm avg}$}

1684: &\multicolumn{1}{c}{$s_{\rm avg}$}\\

1685: &\multicolumn{1}{c}{$n$}

1686: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1687: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1688: & &\multicolumn{1}{c}{$[n]$}

1689: &\multicolumn{1}{c}{$[\ln n]$}

1690: &\multicolumn{1}{c}{$[\ln n]$} &

1691: &\multicolumn{1}{c}{$[\%n]$}\\

1692: \hline

1693: %dsel20b/dsel20bx alpha=0.5 beta=0.25 cutoff=600

1694: random     &  2M

1695: &   43&   51&   40& 1.53& 1.54& 1.52& 1.02& 1.53& 0.76& 0.83& 8.31& 0.72\\

1696:            &   4M

1697: &   93&  101&   90& 1.53& 1.55& 1.52& 1.09& 1.53& 0.85& 0.92& 8.42& 0.57\\

1698:            &   8M

1699: &  177&  190&  170& 1.52& 1.54& 1.51& 1.03& 1.52& 0.87& 0.93& 8.15& 0.44\\

1700:            &  16M

1701: &  343&  350&  340& 1.51& 1.53& 1.51& 0.88& 1.51& 0.91& 0.97& 8.50& 0.35\\

1702: onezero    &  2M

1703: &   82&   91&   70& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\

1704:            &   4M

1705: &  149&  180&  130& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\

1706:            &   8M

1707: &  304&  351&  270& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\

1708:            &  16M

1709: &  621&  711&  531& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\

1710: sorted     &  2M

1711: &   23&   30&   20& 1.54& 1.55& 1.52& 1.18& 1.54& 0.78& 0.85& 7.61& 0.72\\

1712:            &   4M

1713: &   43&   50&   40& 1.53& 1.54& 1.51& 1.18& 1.53& 0.86& 0.92& 7.76& 0.57\\

1714:            &   8M

1715: &   82&   90&   80& 1.52& 1.53& 1.51& 1.10& 1.52& 0.89& 0.95& 8.01& 0.44\\

1716:            &  16M

1717: &  156&  160&  150& 1.52& 1.53& 1.51& 1.04& 1.52& 0.97& 1.03& 8.12& 0.35\\

1718: \hline

1719: \end{tabular}

1720: \end{center}

1721: \end{table}

1722: %

1723: whereas Table \ref{tab:SelpartB}

1724: %

1725: %   *** TABLE 7.4 ***

1726: \begin{table}%[t!]

1727: \caption{Performance of {\sc Select} with the hybrid scheme of

1728: \cite[\S5.6]{kiw:psq}.}

1729: \label{tab:SelpartI}

1730: \footnotesize

1731: \begin{center}

1732: \begin{tabular}{lrrrrrrrrrrrrr}

1733: \hline

1734: Sequence &\multicolumn{1}{c}{Size}

1735: &\multicolumn{3}{c}{Time $[{\rm msec}]$%

1736: \vphantom{$1^{2^3}$}} % Need more vertical space!

1737: &\multicolumn{3}{c}{Comparisons $[n]$}

1738: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}

1739: &\multicolumn{1}{c}{$L_{\rm avg}$}

1740: &\multicolumn{1}{c}{$P_{\rm avg}$}

1741: &\multicolumn{1}{c}{$N_{\rm avg}$}

1742: &\multicolumn{1}{c}{$p_{\rm avg}$}

1743: &\multicolumn{1}{c}{$s_{\rm avg}$}\\

1744: &\multicolumn{1}{c}{$n$}

1745: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1746: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1747: & &\multicolumn{1}{c}{$[n]$}

1748: &\multicolumn{1}{c}{$[\ln n]$}

1749: &\multicolumn{1}{c}{$[\ln n]$} &

1750: &\multicolumn{1}{c}{$[\%n]$}\\

1751: \hline

1752: %dsel20d/dsel20dx alpha=0.5 beta=0.25 cutoff=600

1753: random     &  2M

1754: &   44&   50&   40& 1.53& 1.54& 1.52& 1.03& 1.53& 0.76& 0.83& 8.31& 0.72\\

1755:            &   4M

1756: &   86&  100&   80& 1.53& 1.55& 1.52& 1.10& 1.53& 0.85& 0.92& 8.42& 0.57\\

1757:            &   8M

1758: &  163&  171&  160& 1.52& 1.54& 1.51& 1.03& 1.52& 0.87& 0.93& 8.15& 0.44\\

1759:            &  16M

1760: &  317&  321&  310& 1.51& 1.53& 1.51& 0.88& 1.51& 0.91& 0.97& 8.50& 0.35\\

1761: onezero    &  2M

1762: &   74&   80&   70& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\

1763:            &   4M

1764: &  141&  151&  130& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\

1765:            &   8M

1766: &  285&  301&  270& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\

1767:            &  16M

1768: &  578&  621&  541& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\

1769: sorted     &  2M

1770: &   23&   30&   20& 1.54& 1.55& 1.52& 1.18& 1.54& 0.78& 0.85& 7.61& 0.72\\

1771:            &   4M

1772: &   42&   50&   40& 1.53& 1.54& 1.51& 1.19& 1.53& 0.86& 0.92& 7.76& 0.57\\

1773:            &   8M

1774: &   80&   80&   80& 1.52& 1.53& 1.51& 1.11& 1.52& 0.89& 0.95& 8.01& 0.44\\

1775:            &  16M

1776: &  153&  170&  150& 1.52& 1.53& 1.51& 1.04& 1.52& 0.97& 1.03& 8.12& 0.35\\

1777: \hline

1778: \end{tabular}

1779: \end{center}

1780: \end{table}

1781: %

1782: presents results for the hybrid scheme I of \cite[\S5.6]{kiw:psq},

1783: which combines some features of schemes \ref{sts} and \ref{stind2}.

1784: The hybrid scheme is quite competitive, although slower than scheme

1785: \ref{sts} on the onezero inputs.

1786:

1787: The preceding results were obtained with the modified choice \eqref{iv3}

1788: of $i_v$.  For brevity, Table \ref{tab:Seliv} gives results for

1789: {\sc Select} with scheme \ref{sts} and the standard choice \eqref{iv}

1790: of $i_v$ on the random inputs only, since these inputs are most

1791: frequently used in theory and practice for evaluating sorting and

1792: selection methods.

1793: %

1794: %   *** TABLE 7.5 ***

1795: \begin{table}%[t!]

1796: \caption{Performance of {\sc Select} with the standard choice of $i_v$.}

1797: \label{tab:Seliv}

1798: \footnotesize

1799: \begin{center}

1800: \begin{tabular}{lrrrrrrrrrrrrr}

1801: \hline

1802: Sequence &\multicolumn{1}{c}{Size}

1803: &\multicolumn{3}{c}{Time $[{\rm msec}]$%

1804: \vphantom{$1^{2^3}$}} % Need more vertical space!

1805: &\multicolumn{3}{c}{Comparisons $[n]$}

1806: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}

1807: &\multicolumn{1}{c}{$L_{\rm avg}$}

1808: &\multicolumn{1}{c}{$P_{\rm avg}$}

1809: &\multicolumn{1}{c}{$N_{\rm avg}$}

1810: &\multicolumn{1}{c}{$p_{\rm avg}$}

1811: &\multicolumn{1}{c}{$s_{\rm avg}$}\\

1812: &\multicolumn{1}{c}{$n$}

1813: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1814: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1815: & &\multicolumn{1}{c}{$[n]$}

1816: &\multicolumn{1}{c}{$[\ln n]$}

1817: &\multicolumn{1}{c}{$[\ln n]$} &

1818: &\multicolumn{1}{c}{$[\%n]$}\\

1819: \hline

1820: %dsel20/dsel20x alpha=0.5 beta=0.25 cutoff=600

1821: random     &  50K

1822: &    4&   10&    0& 1.83& 1.97& 1.74& 3.73& 1.83& 0.57& 0.67& 8.49& 2.96\\

1823:            & 100K

1824: &    4&   10&    0& 1.73& 1.83& 1.61& 3.13& 1.73& 0.73& 0.82& 7.80& 2.32\\

1825:            & 500K

1826: &   14&   20&   10& 1.65& 1.69& 1.61& 3.25& 1.65& 0.82& 0.90& 8.40& 1.30\\

1827:            &   1M

1828: &   25&   30&   20& 1.61& 1.65& 1.58& 2.83& 1.60& 0.89& 0.97& 8.28& 0.99\\

1829:            &   2M

1830: &   46&   50&   40& 1.59& 1.61& 1.56& 2.92& 1.59& 0.99& 1.06& 8.01& 0.77\\

1831:            &   4M

1832: &   90&  100&   80& 1.56& 1.58& 1.54& 2.61& 1.56& 1.15& 1.22& 8.34& 0.60\\

1833:            &   8M

1834: &  174&  181&  170& 1.55& 1.57& 1.54& 2.70& 1.55& 1.21& 1.27& 8.09& 0.47\\

1835:            &  16M

1836: &  341&  351&  330& 1.54& 1.56& 1.53& 2.68& 1.54& 1.21& 1.28& 8.33& 0.36\\

1837: \hline

1838: \end{tabular}

1839: \end{center}

1840: \end{table}

1841: %

1842: The modified choice typically requires fewer comparisons for small

1843: inputs, but its advantages are less pronounced for larger inputs.

1844: A similar behavior was observed for {\sc Select} with scheme

1845: \ref{stind2}. % and for {\sc bSelect}.

1846:

1847: For comparison, Table \ref{tab:qSel} extracts from \cite{kiw:rsq}

1848: some results of {\sc qSelect} for the samples \eqref{sgf}.

1849: %

1850: %   *** TABLE 7.6 ***

1851: \begin{table}

1852: \caption{Performance of quintary {\sc qSelect} on random inputs.}

1853: \label{tab:qSel}

1854: \footnotesize

1855: \begin{center}

1856: \begin{tabular}{lrrrrrrrrrrrrr}

1857: \hline

1858: Sequence &\multicolumn{1}{c}{Size}

1859: &\multicolumn{3}{c}{Time $[{\rm msec}]$%

1860: \vphantom{$1^{2^3}$}} % Need more vertical space!

1861: &\multicolumn{3}{c}{Comparisons $[n]$}

1862: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}

1863: &\multicolumn{1}{c}{$L_{\rm avg}$}

1864: &\multicolumn{1}{c}{$P_{\rm avg}$}

1865: &\multicolumn{1}{c}{$N_{\rm avg}$}

1866: &\multicolumn{1}{c}{$p_{\rm avg}$}

1867: &\multicolumn{1}{c}{$s_{\rm avg}$}\\

1868: &\multicolumn{1}{c}{$n$}

1869: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1870: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1871: & &\multicolumn{1}{c}{$[n]$}

1872: &\multicolumn{1}{c}{$[\ln n]$}

1873: &\multicolumn{1}{c}{$[\ln n]$} &

1874: &\multicolumn{1}{c}{$[\%n]$}\\

1875: \hline

1876: %dsel10o/dsel10ox alpha=0.5 beta=0.25 cutoff=600

1877: random     &  50K

1878: &    3&   10&    0& 1.81& 1.85& 1.77& 5.23& 1.22& 0.46& 1.01& 7.62& 4.11\\

1879:            & 100K

1880: &    4&   10&    0& 1.72& 1.76& 1.65& 4.50& 1.15& 0.45& 0.99& 8.05& 3.20\\

1881:            & 500K

1882: &   13&   20&   10& 1.62& 1.63& 1.60& 4.14& 1.08& 0.59& 1.27& 7.59& 1.86\\

1883:            &   1M

1884: &   24&   30&   20& 1.59& 1.60& 1.57& 3.93& 1.06& 0.64& 1.35& 8.18& 1.47\\

1885:            &   2M

1886: &   46&   50&   40& 1.57& 1.58& 1.56& 3.73& 1.04& 0.76& 1.59& 7.67& 1.16\\

1887:            &   4M

1888: &   86&   91&   80& 1.56& 1.56& 1.55& 3.61& 1.03& 0.94& 1.94& 7.21& 0.91\\

1889:            &   8M

1890: &  163&  171&  160& 1.54& 1.55& 1.54& 3.45& 1.03& 0.98& 1.99& 7.45& 0.72\\

1891:            &  16M

1892: &  316&  321&  310& 1.53& 1.54& 1.53& 3.44& 1.02& 0.99& 2.02& 7.55& 0.57\\

1893: \hline

1894: \end{tabular}

1895: \end{center}

1896: \end{table}

1897: %

1898: As noted in \S\ref{s:intro}, {\sc qSelect} is slightly faster than

1899: {\sc Select} on larger inputs because most of its work occurs on the

1900: first partition (cf.\ $L_{\rm avg}$ in Tabs.\ \ref{tab:Selrand} and

1901: \ref{tab:qSel}).  In Table \ref{tab:riSel}

1902: %

1903: %   *** TABLE 7.7 ***

1904: \begin{table}[t!]

1905: \caption{Performance of {\sc riSelect} on random inputs.}

1906: \label{tab:riSel}

1907: \footnotesize

1908: \begin{center}

1909: \begin{tabular}{lrrrrrrrrrr}

1910: \hline

1911: Sequence &\multicolumn{1}{c}{Size}

1912: &\multicolumn{3}{c}{Time $[{\rm msec}]$%

1913: \vphantom{$1^{2^3}$}} % Need more vertical space!

1914: &\multicolumn{3}{c}{Comparisons $[n]$}

1915: &\multicolumn{1}{c}{$L_{\rm avg}$}

1916: &\multicolumn{1}{c}{$P_{\rm avg}$}

1917: &\multicolumn{1}{c}{$N_{\rm rnd}$}\\

1918: &\multicolumn{1}{c}{$n$}

1919: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1920: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}

1921: &\multicolumn{1}{c}{$[\ln n]$}

1922: &\multicolumn{1}{c}{$[n]$}&\\

1923: \hline

1924: %dsel08

1925: random     &  50K

1926: &    2&   10&    0& 3.10& 4.32& 1.88& 3.10& 1.63& 0.45\\

1927:            & 100K

1928: &    4&   10&    0& 2.61& 4.19& 1.77& 2.61& 1.60& 0.20\\

1929:            & 500K

1930: &   17&   20&   10& 2.91& 4.45& 1.69& 2.91& 1.57& 0.25\\

1931:            &   1M

1932: &   33&   41&   20& 2.81& 3.79& 1.84& 2.81& 1.57& 0.40\\

1933:            &   2M

1934: &   62&   90&   40& 2.60& 3.57& 1.83& 2.60& 1.61& 0.35\\

1935:            &   4M

1936: &  135&  191&   90& 2.86& 4.38& 1.83& 2.86& 1.65& 0.55\\

1937:            &   8M

1938: &  249&  321&  190& 2.60& 3.48& 1.80& 2.60& 1.58& 0.40\\

1939:            &  16M

1940: &  553&  762&  331& 2.99& 4.49& 1.73& 2.99& 1.58& 0.40\\

1941: \hline

1942: \end{tabular}

1943: \end{center}

1944: \end{table}

1945: %

1946: we give corresponding results for {\sc riSelect}, a Fortran version of

1947: the algorithm of \cite{val:iss}.  For these inputs, {\sc riSelect}

1948: behaves like {\sc Find} with median-of-3 pivots (because the

1949: average numbers of randomization steps, $N_{\rm rnd}$, are negligible);

1950: hence the expected value of $C_{\rm avg}$ is of order $2.75n$

1951: \cite{kimapr:ahf}.

1952:

1953: Our final Table \ref{tab:comp_small}

1954: %

1955: %   *** TABLE 7.8 ***

1956: \begin{table}

1957: \caption{Numbers of comparisons per element made on small random

1958: inputs.}

1959: \label{tab:comp_small}

1960: \footnotesize

1961: \begin{center}

1962: \begin{tabular}{lccccccccccc}

1963: \hline

1964: %dsel20x, dsel10x, dsel08x alpha=0.5 beta=0.25 cutoff=600

1965: Size%

1966: \vphantom{$1^{2^3}$} % Need more vertical space!

1967: &

1968: &  1000&  2500&  5000&  7500& 10000& 12500& 15000& 17500& 20000& 25000\\

1969: \hline

1970: &avg

1971: &  2.48&  2.06&  1.93&  1.87&  1.81&  1.79&  1.77&  1.76&  1.74&  1.71\\

1972: {\sc Select}

1973: &max

1974: &  4.25&  3.03&  2.28&  2.22&  2.09&  2.05&  1.95&  1.93&  1.93&  1.93\\

1975: &min

1976: &  1.55&  1.06&  1.03&  1.64&  1.62&  1.61&  1.64&  1.63&  1.59&  1.60\\

1977: \hline

1978: &avg

1979: &  2.86&  2.55&  2.24&  2.16&  2.07&  2.03&  1.98&  1.98&  1.94&  1.90\\

1980: {\sc qSelect}

1981: &max

1982: &  3.97&  3.55&  2.57&  2.38&  2.28&  2.21&  2.16&  2.13&  2.11&  2.31\\

1983: &min

1984: &  2.29&  1.97&  1.98&  1.95&  1.87&  1.86&  1.82&  1.83&  1.82&  1.75\\

1985: \hline

1986: &avg

1987: &  2.72&  2.85&  2.66&  2.71&  2.72&  2.83&  2.78&  2.75&  2.75&  2.84\\

1988: {\sc riSelect}

1989: &max

1990: &  4.40&  4.51&  4.69&  4.43&  4.62&  4.76&  4.64&  4.40&  5.10&  4.77\\

1991: &min

1992: &  1.68&  1.83&  1.75&  1.59&  1.70&  1.77&  1.78&  1.67&  1.90&  1.71\\

1993: \hline

1994: \end{tabular}

1995: \end{center}

1996: \end{table}

1997: %

1998: shows that {\sc Select} beats its competitors with respect to the

1999: numbers of comparisons made on small random inputs (100 instances for

2000: each input size $n$).

2001:

2002: Our computational results, combined with those in

2003: \cite{kiw:psq,kiw:rsq},

2004: suggest that both {\sc Select} and {\sc qSelect} may compete with

2005: {\sc Find} in practice.

2006:

2007: %{\bf Acknowledgment}.  I would like to thank the Associate Editor and

2008: %the two anonymous referees for their helpful comments.

2009: {\bf Acknowledgment}.  I would like to thank Olgierd Hryniewicz,

2010: Roger Koenker, Ronald L. Rivest and John D. Valois for useful

2011: discussions.

2012:

2013: %\clearpage

2014:

2015: %

2016: %   *** REFERENCES ***

2017: \footnotesize

2018: %\bibliography{kckabbr,kalg,kbk,kck,kint,kth}

2019: %\bibliographystyle{kck}

2020: \newcommand{\etalchar}[1]{$^{#1}$}

2021: \newcommand{\noopsort}[1]{} \newcommand{\printfirst}[2]{#1}

2022:   \newcommand{\singleletter}[1]{#1} \newcommand{\switchargs}[2]{#2#1}

2023: \ifx\undefined\bysame

2024: \newcommand{\bysame}{\leavevmode\hbox to3em{\hrulefill}\,}

2025: \fi

2026: \begin{thebibliography}{PRKT83}

2027:

2028: \bibitem[BeM93]{bemc:esf}

2029: J.~L. Bentley and M.~D. McIlroy, {\em Engineering a sort function},

2030:   Software--Practice and Experience {\bf 23} (1993) 1249--1265.

2031:

2032: \bibitem[BFP{\etalchar{+}}72]{blflprrita:tbs}

2033: M.~R. Blum, R.~W. Floyd, V.~R. Pratt, R.~L. Rivest and R.~E. Tarjan, {\em Time

2034:   bounds for selection}, J. Comput. System Sci. {\bf 7} (1972) 448--461.

2035:

2036: \bibitem[Bro76]{bro:ra489}

2037: T.~Brown, {\em Remark on {A}lgorithm 489}, ACM Trans. Math. Software {\bf 3}

2038:   (1976) 301--304.

2039:

2040: \bibitem[Chv79]{chv:thd}

2041: V.~Chv{\'a}tal, {\em The tail of the hypergeometric distribution}, Discrete

2042:   Math. {\bf 25} (1979) 285--287.

2043:

2044: \bibitem[CuM89]{cumu:acs}

2045: W.~Cunto and J.~I. Munro, {\em Average case selection}, J. of the ACM {\bf 36}

2046:   (1989) 270--279.

2047:

2048: \bibitem[DHUZ01]{dohaulzw:lbs}

2049: D.~Dor, J.~H{\aa}stad, S.~Ulfberg and U.~Zwick, {\em On lower bounds for

2050:   selecting the median}, SIAM J. Discrete Math. {\bf 14} (2001) 299--311.

2051:

2052: \bibitem[DoZ99]{dozw:sm}

2053: D.~Dor and U.~Zwick, {\em Selecting the median}, SIAM J. Comput. {\bf 28}

2054:   (1999) 1722--1758.

2055:

2056: \bibitem[DoZ01]{dozw:msr}

2057: \bysame, {\em Median selection requires $(2+\epsilon){N}$ comparisons}, SIAM J.

2058:   Discrete Math. {\bf 14} (2001) 312--325.

2059:

2060: \bibitem[FlR75a]{flri:asf}

2061: R.~W. Floyd and R.~L. Rivest, {\em The algorithm {SELECT}---for finding the

2062:   $i$th smallest of $n$ elements ({A}lgorithm 489)}, Comm. ACM {\bf 18} (1975)

2063:   173.

2064:

2065: \bibitem[FlR75b]{flri:etb}

2066: \bysame, {\em Expected time bounds for selection}, Comm. ACM {\bf 18} (1975)

2067:   165--172.

2068:

2069: \bibitem[Gr{\"u}99]{gru:mvh}

2070: R.~Gr{\"u}bel, {\em On the median-of-$k$ version of {H}oare's selection

2071:   algorithm}, Theor. Inform. Appl. {\bf 33} (1999) 177--192.

2072:

2073: \bibitem[Hoa61]{hoa:a65}

2074: C.~A.~R. Hoare, {\em Algorithm 65: {\sc Find}}, Comm. ACM {\bf 4} (1961)

2075:   321--322.

2076:

2077: \bibitem[Hoe63]{hoe:pis}

2078: W.~Hoeffding, {\em Probability inequalities for sums of bounded random

2079:   variables}, J. Amer. Statist. Assoc. {\bf 58} (1963) 13--30.

2080:

2081: \bibitem[Kiw03a]{kiw:psq}

2082: K.~C. Kiwiel, {\em Partitioning schemes for quicksort and quickselect}, Tech.

2083:   report, Systems Research Institute, Warsaw, 2003.

2084: \newblock Available at the URL http://arxiv.org/abs/cs.DS/0312054.

2085:

2086: \bibitem[Kiw03b]{kiw:rsq}

2087: \bysame, {\em Randomized selection with quintary partitions}, Tech. report,

2088:   Systems Research Institute, Warsaw, 2003.

2089: \newblock Available at the URL http://arxiv.org/abs/cs.DS/0312055.

2090:

2091: \bibitem[KMP97]{kimapr:ahf}

2092: P.~Kirschenhofer, C.~Mart{\'\i}nez and H.~Prodinger, {\em Analysis of {H}oare's

2093:   {\sc find} algorithm with median-of-three partition}, Random Stuctures and

2094:   Algorithms {\bf 10} (1997) 143--156.

2095:

2096: \bibitem[Knu98]{knu:acpIII2}

2097: D.~E. Knuth, {\em The Art of Computer Programming. Volume III: Sorting and

2098:   Searching}, second ed., Addison-Wesley, Reading, MA, 1998.

2099:

2100: \bibitem[MaR01]{maro:oss}

2101: C.~Mart{\'\i}nez and S.~Roura, {\em Optimal sampling strategies in quicksort

2102:   and quickselect}, SIAM J. Comput. {\bf 31} (2001) 683--705.

2103:

2104: \bibitem[Mus97]{mus:iss}

2105: D.~R. Musser, {\em Introspective sorting and selection algorithms},

2106:   Software--Practice and Experience {\bf 27} (1997) 983--993.

2107:

2108: \bibitem[PRKT83]{poriti:eds}

2109: J.~T. Postmus, A.~H.~G. Rinnooy~Kan and G.~T. Timmer, {\em An efficient dynamic

2110:   selection method}, Comm. ACM {\bf 26} (1983) 878--881.

2111:

2112: \bibitem[SPP76]{scpapi:fm}

2113: A.~Sch{\"o}nhage, M.~Paterson and N.~Pippenger, {\em Finding the median}, J.

2114:   Comput. System Sci. {\bf 13} (1976) 184--199.

2115:

2116: \bibitem[Val00]{val:iss}

2117: J.~D. Valois, {\em Introspective sorting and selection revisited},

2118:   Software--Practice and Experience {\bf 30} (2000) 617--638.

2119:

2120: \end{thebibliography}

2121: \normalsize

2122: %   *** END OF REFERENCES ***

2123: %

2124: \end{document}             % End of document.

2125: