cs0401003/cs0401003
1: \documentclass[12pt]{article}
2: % All margin dimensions are measured from a point one inch from top
3: % and left side of page.
4: \oddsidemargin=0in  % Left margin on odd-numbered pages.
5: \evensidemargin=0in % Left margin on even-numbered pages.
6: \textheight=8.9in   % Height of text (excluding head and foot).
7: \textwidth=6.35in   % Width of text on page.
8: \topmargin=-0.5in   % Extra space added to top of page.
9: % 1pc = 0.421751 cm, 1 cm = 2.37106 pc, 1 pt = 0.0351459 cm
10: \title{Randomized selection with tripartitioning}
11: \author{Krzysztof C. Kiwiel\thanks{Systems Research Institute,
12: %       Polish Academy of Sciences,
13:         Newelska 6, 01--447 Warsaw, Poland
14:         ({\tt kiwiel@ibspan.waw.pl})}}
15: \date{January 4, 2004}
16: 
17: % ersatz blackboard characters
18: \newcommand{\BbbF}{{\rm\normalcolor I\kern-.18em F}}
19: \newcommand{\BbbR}{{\rm\normalcolor I\kern-.18em R}}
20: \newcommand{\eqref}[1]{{\normalfont\normalcolor(\ref{#1})}}
21: \makeatletter
22: % the proof environment
23: \def\proof{%
24:    \def\a##1{\begin{trivlist}\item[]{\bf\ignorespaces{##1}.}%
25:     \enspace\ignorespaces}%
26:    \def\b[##1]{\a{Proof\ \ignorespaces{##1}}}%
27:    \@ifnextchar[{\b}{\a{Proof}}}
28: \def\endproof{\end{trivlist}}
29: % end-of-proof symbol
30: \def\qed{\relax\protect\ifmmode\ifinner\else\quad\fi\fi
31:     \hbox{\vbox{\hrule height.4pt\hbox{\vbox{\hrule height.4pt
32:     \hbox{\vrule width.4pt\vphantom{\normalsize A}\kern.5em
33:     \vrule width.4pt}\hrule height.4pt}}}}}
34: % subequations
35: \newtoks\@stequation
36: \def\subequations{\refstepcounter{equation}%
37: \edef\@savedequation{\the\c@equation}%
38: \@stequation=\expandafter{\theequation}%   %only want \theequation
39: \edef\@savedtheequation{\the\@stequation}% %expanded once
40: \edef\oldtheequation{\theequation}%
41: \setcounter{equation}{0}%
42: \def\theequation{\oldtheequation\alph{equation}}}%
43: \def\endsubequations{%
44: \setcounter{equation}{\@savedequation}%
45: \@stequation=\expandafter{\@savedtheequation}%
46: \edef\theequation{\the\@stequation}\global\@ignoretrue}
47: % modifed theorem environment
48: \def\@begintheorem#1#2{\trivlist
49:     \item[\hskip \labelsep{\bfseries #1\ #2.}]\itshape}
50: \def\@opargbegintheorem#1#2#3{\trivlist
51:     \item[\hskip \labelsep{\bfseries #1\ #2\ (#3).}]\itshape}
52: % numbering equations, figures and tables
53: \@addtoreset{equation}{section}% Makes \section reset `equation' counter.
54: \def\theequation{\thesection.\arabic{equation}}
55: \@addtoreset{figure}{section}
56: \def\thefigure{\thesection.\arabic{figure}}
57: \@addtoreset{table}{section}
58: \def\thetable{\thesection.\arabic{table}}
59: % fix up of the eqnarray environment
60: \let\@@eqnsel=\relax
61: \def\@tempa{%
62:     \stepcounter{equation}%
63:     \def\@currentlabel{\p@equation\theequation}%
64:     \global\@eqnswtrue\m@th
65:     \global\@eqcnt\z@
66:     \tabskip\mathindent
67:     \let\\=\@eqncr
68:     \setlength\abovedisplayskip{\topsep}%
69:     \ifvmode
70:       \addtolength\abovedisplayskip{\partopsep}%
71:     \fi
72:     \addtolength\abovedisplayskip{\parskip}%
73:     \setlength\belowdisplayskip{\abovedisplayskip}%
74:     \setlength\belowdisplayshortskip{\abovedisplayskip}%
75:     \setlength\abovedisplayshortskip{\abovedisplayskip}%
76:     $$\everycr{}\halign to\linewidth% $$
77:     \bgroup
78:       \hskip\@centering
79:       $\displaystyle\tabskip\z@skip{##}$\@eqnsel&%
80:       \global\@eqcnt\@ne \hskip \tw@\arraycolsep \hfil${##}$\hfil&%
81:       \global\@eqcnt\tw@ \hskip \tw@\arraycolsep
82:         $\displaystyle{##}$\hfil \tabskip\@centering&%
83:       \global\@eqcnt\thr@@
84:         \hb@xt@\z@\bgroup\hss##\egroup\tabskip\z@skip\cr}%
85: \def\@tempb{%
86:    \stepcounter{equation}%
87:    \def\@currentlabel{\p@equation\theequation}%
88:    \global\@eqnswtrue
89:    \m@th
90:    \global\@eqcnt\z@
91:    \tabskip\@centering
92:    \let\\\@eqncr
93:    $$\everycr{}\halign to\displaywidth\bgroup
94:        \hskip\@centering$\displaystyle\tabskip\z@skip{##}$\@eqnsel
95:       &\global\@eqcnt\@ne\hskip \tw@\arraycolsep \hfil${##}$\hfil
96:       &\global\@eqcnt\tw@ \hskip \tw@\arraycolsep
97:          $\displaystyle{##}$\hfil\tabskip\@centering
98:       &\global\@eqcnt\thr@@ \hb@xt@\z@\bgroup\hss##\egroup
99:          \tabskip\z@skip
100:       \cr
101: }
102: %
103: \ifx\eqnarray\@tempa%     If the fleqn document-class option is in effect
104:     \def\eqnarray{%
105:     \stepcounter{equation}%
106:     \def\@currentlabel{\p@equation\theequation}%
107:     \global\@eqnswtrue\m@th
108:     \global\@eqcnt\z@
109:     \tabskip\mathindent
110:     \let\\=\@eqncr
111:     \setlength\abovedisplayskip{\topsep}%
112:     \ifvmode
113:       \addtolength\abovedisplayskip{\partopsep}%
114:     \fi
115:     \addtolength\abovedisplayskip{\parskip}%
116:     \setlength\belowdisplayskip{\abovedisplayskip}%
117:     \setlength\belowdisplayshortskip{\abovedisplayskip}%
118:     \setlength\abovedisplayshortskip{\abovedisplayskip}%
119:     $$\everycr{}\halign to\linewidth% $$
120:     \bgroup
121:       \hskip\@centering
122:       $\displaystyle\tabskip\z@skip{##}$\@eqnsel&%
123:       \global\@eqcnt\@ne
124:       \@@eqnsel%            \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
125:       \hfil${{}##{}}$\hfil&%              as in fixup.sty but textstyle!!!
126:       \global\@eqcnt\tw@
127:       \@@eqnsel%           \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
128:         $\displaystyle{##}$\hfil \tabskip\@centering&%
129:       \global\@eqcnt\thr@@
130:         \hb@xt@\z@\bgroup\hss##\egroup\tabskip\z@skip\cr}%
131: \else\ifx\eqnarray\@tempb%       Else try the default eqnarray environment.
132:    \def\eqnarray{%
133:    \stepcounter{equation}%
134:    \def\@currentlabel{\p@equation\theequation}%
135:    \global\@eqnswtrue
136:    \m@th
137:    \global\@eqcnt\z@
138:    \tabskip\@centering
139:    \let\\\@eqncr
140:    $$\everycr{}\halign to\displaywidth\bgroup
141:        \hskip\@centering$\displaystyle\tabskip\z@skip{##}$\@eqnsel
142:       &\global\@eqcnt\@ne
143:       \@@eqnsel%           \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
144:       \hfil${{}##{}}$\hfil%              as in fixup.sty but textstyle!!!
145:       &\global\@eqcnt\tw@
146:       \@@eqnsel%           \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
147:          $\displaystyle{##}$\hfil\tabskip\@centering
148:       &\global\@eqcnt\thr@@ \hb@xt@\z@\bgroup\hss##\egroup
149:          \tabskip\z@skip
150:       \cr}
151: \else \typeout{Warning: Unable to fix unknown version of \string\eqnarray.}
152: \fi\fi
153: \def\@tempa{}			% Free up TeX's memory
154: \def\@tempb{}
155: % closed format bibliography
156: \@ifundefined{chapter}{%
157:   \renewenvironment{thebibliography}[1]
158:      {\section*{\refname
159:         \@mkboth{\MakeUppercase\refname}{\MakeUppercase\refname}}%
160:       \list{\@biblabel{\@arabic\c@enumiv}}%
161:            {\settowidth\labelwidth{\@biblabel{#1}}%
162:             \leftmargin\labelwidth
163:             \advance\leftmargin\labelsep
164:             \itemsep \z@                 % Suppresses vertical separation.
165:             \@openbib@code
166:             \usecounter{enumiv}%
167:             \let\p@enumiv\@empty
168:             \renewcommand\theenumiv{\@arabic\c@enumiv}}%
169:       \sloppy
170:       \clubpenalty4000
171:       \@clubpenalty \clubpenalty
172:       \widowpenalty4000%
173:       \sfcode`\.\@m}
174:      {\def\@noitemerr
175:        {\@latex@warning{Empty `thebibliography' environment}}%
176:       \endlist}}%
177: {\renewenvironment{thebibliography}[1]
178:      {\section*{\bibname
179:         \@mkboth{\MakeUppercase\bibname}{\MakeUppercase\bibname}}%
180:       \list{\@biblabel{\@arabic\c@enumiv}}%
181:            {\settowidth\labelwidth{\@biblabel{#1}}%
182:             \leftmargin\labelwidth
183:             \advance\leftmargin\labelsep
184:             \itemsep \z@                 % Suppresses vertical separation.
185:             \@openbib@code
186:             \usecounter{enumiv}%
187:             \let\p@enumiv\@empty
188:             \renewcommand\theenumiv{\@arabic\c@enumiv}}%
189:       \sloppy
190:       \clubpenalty4000
191:       \@clubpenalty \clubpenalty
192:       \widowpenalty4000%
193:       \sfcode`\.\@m}
194:      {\def\@noitemerr
195:        {\@latex@warning{Empty `thebibliography' environment}}%
196:       \endlist}}%
197: % Mathematical definitions
198: \newcommand{\Argmax}{{\operator@font Arg}\max}
199: \newcommand{\Argmin}{{\operator@font Arg}\min}
200: \newcommand{\argmax}{{\operator@font arg}\max}
201: \newcommand{\argmin}{{\operator@font arg}\min}
202: \newcommand{\Exp}{\mathord{\operator@font E}}
203: \newcommand{\med}{\mathop{\operator@font med}}
204: \newcommand{\Prob}{\mathord{\operator@font P}}
205: \newcommand{\rank}{\mathop{\operator@font rank}}
206: \newcommand{\var}{\mathop{\operator@font var}}
207: \makeatother
208: % Theorem and definition-like environments are numbered together,
209: % starting from number 1 within each section.
210: \newtheorem{theorem}{Theorem}[section]
211: \newtheorem{algorithm}[theorem]{Algorithm}
212: \newtheorem{assumption}[theorem]{Assumption}
213: \newtheorem{corollary}[theorem]{Corollary}
214: \newtheorem{definition}[theorem]{Definition}
215: \newtheorem{example}[theorem]{Example}
216: \newtheorem{examples}[theorem]{Examples}
217: \newtheorem{fact}[theorem]{Fact}
218: \newtheorem{lemma}[theorem]{Lemma}
219: \newtheorem{procedure}[theorem]{Procedure}
220: \newtheorem{proposition}[theorem]{Proposition}
221: \newtheorem{remark}[theorem]{Remark}
222: \newtheorem{remarks}[theorem]{Remarks}
223: % Schemes are numbered alphabetically throughout.
224: \newtheorem{scheme}{Scheme}
225: \renewcommand{\thescheme}{\Alph{scheme}}
226: %-----------------------------------------------------------------------
227: \hyphenation{quick-sel-ect}
228: %-----------------------------------------------------------------------
229: 
230: \begin{document}           % End of preamble and beginning of text.
231: 
232: \maketitle                 % Produces the title.
233: 
234: \begin{abstract}
235: \noindent
236: We show that several versions of Floyd and Rivest's algorithm
237: {\sc Select} [Comm.\ ACM {\bf 18} (1975) 173] for finding the $k$th
238: smallest of $n$ elements require at most $n+\min\{k,n-k\}+o(n)$
239: comparisons on average, even when equal elements occur.  This parallels
240: our recent analysis of another variant due to Floyd and Rivest
241: [Comm.\ ACM {\bf 18} (1975) 165--172].  Our computational results
242: suggest that both variants perform well in practice, and may compete
243: with other selection methods, such as Hoare's {\sc Find} or
244: quickselect with median-of-3 pivots.
245: \end{abstract}
246: 
247: \begin{quotation}
248: \noindent{\bf Key words.} Selection, medians, partitioning,
249: computational complexity.
250: \end{quotation}
251: 
252: %\begin{quotation}
253: %\noindent{\bf MSC Subject Classifications.} 68W20, 68W05, 68Q25
254: %\end{quotation}
255: 
256: %\begin{quotation}
257: %\noindent{\bf Abbreviated title:} Randomized selection.
258: %\end{quotation}
259: 
260: %   *** SECTION 1 ***
261: \section{Introduction}
262: \label{s:intro}
263: The {\em selection problem\/} is defined as follows: Given a set
264: $X:=\{x_j\}_{j=1}^n$ of $n$ elements, a total order $<$ on $X$,
265: and an integer $1\le k\le n$, find the {\em $k$th smallest\/}
266: element of $X$, i.e., an element $x$ of $X$ for which there are at
267: most $k-1$ elements $x_j<x$ and at least $k$ elements $x_j\le x$.
268: The {\em median\/} of $X$ is the $\lceil n/2\rceil$th smallest
269: element of $X$.
270: 
271: Selection is one of the fundamental problems in computer science;
272: see, e.g., the references in \cite{dohaulzw:lbs,dozw:sm,dozw:msr} and
273: \cite[\S5.3.3]{knu:acpIII2}.  Most references concentrate on the
274: number of comparisons between pairs of elements made in selection
275: algorithms.  In the worst case, selection needs at least
276: $(2+\epsilon)n$ comparisons \cite{dozw:msr}, whereas the algorithm of
277: \cite{blflprrita:tbs} makes at most $5.43n$, that of \cite{scpapi:fm}
278: needs $3n+o(n)$, and that in \cite{dozw:sm} takes $2.95n+o(n)$.  In the
279: average case, for $k\le\lceil n/2\rceil$, at least $n+k-O(1)$
280: comparisons are necessary \cite{cumu:acs}, whereas the best upper bound
281: is $n+k+O(n^{1/2}\ln^{1/2}n)$ \cite[Eq.\ (5.3.3.16)]{knu:acpIII2}.  The
282: classical algorithm {\sc Find} of \cite{hoa:a65}, also known as
283: quickselect, has an upper bound of $3.39n+o(n)$ for $k=\lceil n/2\rceil$
284: in the average case \cite[Ex.\ 5.2.2--32]{knu:acpIII2}, which improves
285: to $2.75n+o(n)$ for median-of-3 pivots \cite{gru:mvh,kimapr:ahf}.
286: 
287: In practice {\sc Find} is most popular.  One reason is that the
288: algorithms of \cite{blflprrita:tbs,scpapi:fm} are much slower on the
289: average \cite{mus:iss,val:iss}, whereas \cite{kimapr:ahf} adds that
290: other methods proposed so far, although better than {\sc Find} in
291: theory, are not practical because they are difficult to implement,
292: their constant factors and hidden lower order terms are too large,
293: etc.  It is quite suprising that these references
294: \cite{kimapr:ahf,mus:iss,val:iss} ignore the algorithm {\sc Select}
295: of \cite{flri:etb}, since most textbooks mention that {\sc Select} is
296: asymptotically faster than {\sc Find}.  In contrast, this paper shows
297: that {\sc Select} can compete with {\sc Find} in both theory and
298: practice, even for fairly small values of the input size $n$.
299: 
300: We now outline our contributions in more detail.  The initial two
301: versions of {\sc Select} \cite{flri:etb} had gaps in their analysis
302: (cf.\ \cite{bro:ra489,poriti:eds}, \cite[Ex.\ 5.3.3--24]{knu:acpIII2});
303: the first version was validated in \cite{kiw:rsq}, and the second one
304: will be addressed elsewhere.  This paper deals with the third version
305: of {\sc Select} from \cite{flri:asf}, which operates as follows.  Using
306: a small random sample, it finds an element $v$ almost sure to be just
307: above the $k$th if $k<n/2$, or below the $k$th if $k\ge n/2$.
308: Partitioning $X$ about $v$ leaves $\min\{k,n-k\}+o(n)$ elements on
309: average for the next recursive call, in which $k$ is near $1$ or $n$
310: with high probability, so this second call eliminates almost all the
311: remaining elements.
312: 
313: Apparently this version of {\sc Select} has not been analyzed in the
314: literature, even in the case of distinct elements.  We first revise it
315: slightly to simplify our analysis.  Then, without assuming that the
316: elements are distinct, we show that {\sc Select} needs at most
317: $n+\min\{k,n-k\}+O(n^{2/3}\ln^{1/3}n)$ comparisons on average, with
318: $\ln^{1/3}n$ replaced by $\ln^{1/2}n$ for the original samples of
319: \cite{flri:asf}.  Thus the average cost of {\sc Select} reaches the
320: lower bounds of $1.5n+o(n)$ for median selection and $1.25n+o(n)$
321: for selecting an element of random rank.  For the latter task,
322: {\sc Find} has the bound $2n+o(n)$ when its pivot is set to the
323: median of a random sample of $s$ elements, with $s\to\infty$,
324: $s/n\to\infty$ as $n\to\infty$ \cite{maro:oss}; thus {\sc Select}
325: improves upon {\sc Find} mostly by using $k$, the rank of the element
326: to be found, for selecting the pivot $v$ in each recursive call.
327: 
328: {\sc Select} can be implemented by using the tripartitioning schemes
329: of \cite[\S5]{kiw:psq}, which include a modified scheme of
330: \cite{bemc:esf}; more traditional bipartitioning schemes
331: \cite[\S2]{kiw:psq} can perform quite poorly in {\sc Select} when
332: equal elements occur.  We add that the implementation of \cite{flri:asf}
333: avoids random number generation by assuming that the input file is in
334: random order, but this results in poor performance on some inputs of
335: \cite{val:iss}; hence our implementation of {\sc Select} employs
336: random sampling.
337: 
338: Our computational experience shows that {\sc Select} outperforms even
339: quite sophisticated implementations of {\sc Find} in both comparison
340: counts and computing times.  To save space, only selected results are
341: reported for the version of \cite{val:iss}, but our experience with
342: other versions on many different inputs was similar.  {\sc Select}
343: turned out to be more stable than {\sc Find}, having much smaller
344: variations of solution times and numbers of comparisons.  Quite
345: suprisingly, contrary to the folklore saying that {\sc Select} is only
346: asymptotically faster than {\sc Find}, {\sc Select} makes significantly
347: fewer comparisons even for small inputs
348: (cf.\ Tab.\ \ref{tab:comp_small}).
349: 
350: To relate our results with those of \cite{kiw:rsq}, let's call
351: {\sc qSelect} the quintary method of \cite{kiw:rsq} stemming from
352: \cite[\S2.1]{flri:etb}.  {\sc qSelect} eliminates almost all
353: elements on its first call by using two pivots, almost sure to be
354: just below and above the $k$th element, in a quintary partitioning
355: scheme.  Thus most work occurs on the first call of {\sc qSelect},
356: which corresponds to the first two calls of {\sc Select}.  Hence
357: {\sc Select} and {\sc qSelect} share the same efficiency estimates,
358: and in practice make similarly many comparisons. However, {\sc qSelect}
359: tends to be slightly faster on median finding: although its quintary
360: scheme is more complex, most of its work is spent on the first pass
361: through $X$, whereas {\sc Select} first partitions $X$ and then the
362: remaining part (about half) of $X$ on its second call to achieve a
363: similar problem reduction.  On the other hand, {\sc Select} makes
364: fewer comparisons on small inputs.  Of course, future work should assess
365: more fully the relative merits of {\sc Select} and {\sc qSelect}.  For
366: now, the tests reported in \cite{kiw:psq,kiw:rsq} and in \S\ref{s:exp}
367: suggest that both {\sc Select} and {\sc qSelect} can compete
368: successfully with refined implementations of {\sc Find}.
369: 
370: The paper is organized as follows.  A general version of {\sc Select} is
371: introduced in \S\ref{s:alg}, and its basic features are analyzed in
372: \S\ref{s:sample}.  The average performance of {\sc Select} is studied
373: in \S\ref{s:average}.  A modification that improves practical
374: performance is introduced in \S\ref{s:modmed}.
375: Partitioning schemes are discussed in \S\ref{s:ternpart}.
376: Finally, our computational results are reported in \S\ref{s:exp}.
377: %The Appendix contains proofs of certain technical results.
378: %Finally, we have a conclusion section.
379: 
380: Our notation is fairly standard.
381: $|A|$ denotes the cardinality of a set $A$.
382: In a given probability space, $\Prob$ is the probability measure,
383: $\Exp$ is the mean-value operator and $\Prob[\cdot|{\cal E}]$ is the
384: probability conditioned on an event ${\cal E}$; the complement of
385: ${\cal E}$ is denoted by ${\cal E}'$.
386: %
387: %   *** SECTION 2 ***
388: \section{The algorithm {\sc Select}}
389: \label{s:alg}
390: In this section we describe a general version of {\sc Select} in terms
391: of two auxiliary functions $s(n)$ and $g(n)$ (the sample size and rank
392: gap), which will be chosen later.  We omit their arguments in general,
393: as no confusion can arise.
394: %
395: %   *** ALGORITHM 2.1 ***
396: \begin{algorithm}
397: \label{alg:sel3}
398: \rm
399: \hfil\newline\noindent{\bf {\sc Select}$(X,k)$}
400: (Selects the $k$th smallest element of $X$, with $1\le k\le n:=|X|$)
401: \medbreak\noindent{\bf Step 1} ({\em Initiation\/}).
402: If $n=1$, return $x_1$.
403: %Choose the sample size $s\in\{1\colon n-1\}$ and gap $g>0$.
404: Choose the sample size $s\le n-1$ and gap $g>0$.
405: \medbreak\noindent{\bf Step 2} ({\em Sample selection\/}).
406: Pick randomly a sample $S:=\{y_1,\ldots,y_s\}$ from $X$.
407: \medbreak\noindent{\bf Step 3} ({\em Pivot selection\/}).
408: Let $v$ be the output of {\sc Select}$(S,i_v)$, where
409: \begin{equation}
410: i_v:=\left\{\begin{array}{ll}
411: \rlap{$\min$}\phantom{\max}\left\{\,\lceil ks/n+g\rceil,s\,\right\}&
412: \mbox{if}\ k<n/2,\\
413: \max\left\{\,\lceil ks/n-g\rceil,1\,\right\}&
414: \mbox{if}\ k\ge n/2.
415: \end{array}\right.
416: \label{iv}
417: \end{equation}
418: \medbreak\noindent{\bf Step 4} ({\em Partitioning\/}).
419: By comparing each element $x$ of $X\setminus S$ to $v$, partition $X$
420: into the three sets $L:=\{x\in X:x<v\}$, $E:=\{x\in X:x=v\}$ and
421: $R:=\{x\in X:v<x\}$.
422: \medbreak\noindent{\bf Step 5} ({\em Stopping test\/}).
423: If $|L|<k\le|L\cup E|$, return $v$.
424: \medbreak\noindent{\bf Step 6} ({\em Reduction\/}).
425: If $k\le|L|$, set $\hat X:=L$, $\hat n:=|\hat X|$ and $\hat k:=k$;
426: else set $\hat X:=R$, $\hat n:=|\hat X|$ and $\hat k:=k-|L\cup E|$.
427: \medbreak\noindent{\bf Step 7} ({\em Recursion\/}).
428: Return {\sc Select}$(\hat X,\hat k)$.
429: \end{algorithm}
430: 
431: A few remarks on the algorithm are in order.
432: %
433: %   *** REMARKS 2.2 ***
434: \begin{remarks}
435: \label{r:sel3}
436: \rm
437: (a)
438: The correctness and finiteness of {\sc Select} stem by induction from
439: the following observations.  The returns of Steps 1 and 5 deliver the
440: desired element.  At Step 6, $\hat X$ and $\hat k$ are chosen so that
441: the $k$th smallest element of $X$ is the $\hat k$th smallest element
442: of $\hat X$, and $\hat n<n$ (since $v\not\in\hat X$).  Also $|S|<n$ for
443: the recursive call at Step 3.
444: \par(b)
445: When Step 5 returns $v$, {\sc Select} may also return information about
446: the positions of the elements of $X$ relative to $v$.  For instance, if
447: $X$ is stored as an array, its $k$ smallest elements may be placed first
448: via interchanges at Step 4 (cf.\ \S\ref{s:ternpart}).  Hence Step 4 need
449: only compare $v$ with the elements of $X\setminus S$.
450: \par(c)
451: The following elementary property is needed in \S\ref{s:average}.
452: Let $c_n$ denote the maximum number of comparisons taken by {\sc Select}
453: on any input of size $n$.  Since Step 3 makes at most $c_s$
454: comparisons with $s<n$, Step 4 needs at most $n-s$, and Step 7 takes
455: at most $c_{\hat n}$ with $\hat n<n$, by induction $c_n<\infty$ for
456: all $n$.
457: \end{remarks}
458: %
459: %   *** SECTION 3 ***
460: \section{Sampling deviations}
461: \label{s:sample}
462: In this section we analyze general features of sampling used by
463: {\sc Select}.
464: Our analysis hinges on the following bound on the tail of the
465: hypergeometric distribution established in \cite{hoe:pis} and
466: rederived shortly in \cite{chv:thd}.
467: %
468: %   *** FACT 3.1 ***
469: \begin{fact}
470: \label{f:balls3}
471: Let\/ $s$ balls be chosen uniformly at random from a set of\/ $n$ balls,
472: of which\/ $r$ are red, and\/ $r'$ be the random variable representing
473: the number of red balls drawn.  Let\/ $p:=r/n$.  Then
474: \begin{equation}
475: \Prob\left[\,r'\ge ps+g\,\right]\le e^{-2g^2\!/s}\quad\forall g\ge0.
476: \label{Pexpg}
477: \end{equation}
478: \end{fact}
479: 
480: Denote by $x_1^*\le\ldots\le x_n^*$ and $y_1^*\le\ldots\le y_s^*$ the
481: sorted elements of the input set $X$ and the sample set $S$,
482: respectively, so that $v=y_{i_v}^*$.  The following result will give
483: bounds on the position of $v$ in the sorted input sequence.
484: %
485: %   *** LEMMA 3.2 ***
486: \begin{lemma}
487: \label{l:rankgen}
488: Suppose\/ $\bar\imath:=\max\{1,\min(\lceil\kappa s\rceil,s)\}$,
489: $\bar\jmath_l:=\max\{\lceil\kappa n-gn/s\rceil,1\}$, and\/
490: $\bar\jmath_r:=\min\{\lceil\kappa n+gn/s\rceil,n\}$, where\/
491: $-g<\kappa s\le s+g$, $1\le s\le n$ and $g\ge0$.  Then\/{\rm:}
492: %
493: \par\indent\rlap{\rm(a)}\hphantom{\rm(a)}
494: $\Prob[y_{\bar\imath}^*<x_{\bar\jmath_l}^*]\le e^{-2g^2\!/s}$ if\/
495: $\bar\imath\ge\lceil\kappa s\rceil$.
496: %
497: \par\indent\rlap{\rm(b)}\hphantom{\rm(a)}
498: $\Prob[x_{\bar\jmath_r}^*<y_{\bar\imath}^*]\le e^{-2g^2\!/s}$ if\/
499: $\bar\imath\le\lceil\kappa s\rceil$.
500: \end{lemma}
501: \begin{proof}
502: Note that $-g<\kappa s\le s+g$ implies that $\bar\jmath_l\le n$ and
503: $\bar\jmath_r\ge1$ are well-defined.
504: 
505: (a) If $y_{\bar\imath}^*<x_{\bar\jmath_l}^*$, at least $\bar\imath$
506: samples satisfy $y_i\le x_r^*$, where
507: $r:=\max_{x_j^*<x_{\bar\jmath_l}^*}j$.
508: In the setting of Fact \ref{f:balls3}, we have $r$ red elements
509: $x_j\le x_r^*$, $ps=rs/n$ and $r'\ge\bar\imath$.  Now,
510: $1\le r\le\bar\jmath_l-1$ implies
511: $2\le\bar\jmath_l=\lceil\kappa n-gn/s\rceil<\kappa n-gn/s+1$,
512: so $-rs/n>-\kappa s+g$.  Hence
513: $\bar\imath-ps-g>\kappa s-\kappa s+g-g=0$, i.e., $r'>ps+g$.
514: Thus $\Prob[y_{\bar\imath}^*<x_{\bar\jmath_l}^*]\le e^{-2g^2\!/s}$
515: by \eqref{Pexpg}.
516: 
517: (b) If $x_{\bar\jmath_r}^*<y_{\bar\imath}^*$, $s-\bar\imath+1$ samples
518: are at least $x_{\bar\jmath+1}^*$ with
519: $\bar\jmath:=\max_{x_j^*=x_{\bar\jmath_r}^*}j$.  Thus we have
520: $r:=n-\bar\jmath$ red elements $x_j\ge x_{\bar\jmath+1}^*$,
521: $ps=s-\bar\jmath s/n$ and $r'\ge s-\bar\imath+1$.  Since
522: $\bar\imath<\kappa s+1$ and
523: $n>\bar\jmath\ge\bar\jmath_r\ge\kappa n+gn/s$,
524: we get $s-\bar\imath+1-ps-g>\bar\jmath s/n-\kappa s-g\ge\kappa s+g-
525: \kappa s-g=0$.  Hence $r'>ps+g$ and
526: $\Prob[x_{\bar\jmath_r}^*<y_{\bar\imath}^*]\le
527: \Prob[r'\ge ps+g]\le e^{-2g^2\!/s}$ by \eqref{Pexpg}.
528: \qed
529: \end{proof}
530: 
531: We now bound the position of $v$ relative to $x_k^*$, $x_{k_l}^*$ and
532: $x_{k_r}^*$, where
533: \begin{equation}
534: k_l:=\max\left\{\,\lceil k-2gn/s\rceil,1\,\right\}
535: \quad\mbox{and}\quad
536: k_r:=\min\left\{\,\lceil k+2gn/s\rceil,n\,\right\}.
537: \label{klkr3}
538: \end{equation}
539: %
540: %   *** COROLLARY 3.3 ***
541: \begin{corollary}
542: \label{c:rankdir3}
543: {\rm(a)}
544: $\Prob[v<x_k^*]\le e^{-2g^2\!/s}$ if\/ $i_v=\lceil ks/n+g\rceil$
545: and\/ $k<n/2$.
546: %
547: \par\indent\rlap{\rm(b)}\hphantom{\rm(a)}
548: $\Prob[x_{k_r}^*<v]\le e^{-2g^2\!/s}$
549: if\/ $k<n/2$.
550: %
551: \par\indent\rlap{\rm(c)}\hphantom{\rm(a)}
552: $\Prob[x_k^*<v]\le e^{-2g^2\!/s}$ if\/ $i_v=\lceil ks/n-g\rceil$
553: and\/ $k\ge n/2$.
554: %
555: \par\indent\rlap{\rm(d)}\hphantom{\rm(a)}
556: $\Prob[v<x_{k_l}^*]\le e^{-2g^2\!/s}$
557: if\/ $k\ge n/2$.
558: %
559: \par\indent\rlap{\rm(e)}\hphantom{\rm(a)}
560: If\/ $k<n/2$, then\/
561: $i_v\ne\lceil ks/n+g\rceil$ iff\/ $n<k+gn/s${\rm;}
562: similarly, if\/ $k\ge n/2$, then\/
563: $i_v\ne\lceil ks/n-g\rceil$ iff\/ $k\le gn/s$.
564: \end{corollary}
565: \begin{proof}
566: Use Lem.\ \ref{l:rankgen} with $\kappa s=ks/n+g$ for (a,b), and
567: $\kappa s=ks/n-g$ for (c,d).
568: \qed
569: \end{proof}
570: %
571: %   *** SECTION 4 ***
572: \section{Average case performance}
573: \label{s:average}
574: In this section we analyze the average performance of {\sc Select} for
575: various sample sizes.
576: %
577: %   *** SUBSECTION 4.1 ***
578: \subsection{Floyd-Rivest's samples}
579: \label{ss:FRsample}
580: %
581: For positive constants $\alpha$ and $\beta$, consider choosing
582: $s=s(n)$ and $g=g(n)$ as
583: \begin{equation}
584: s:=\min\left\{\lceil\alpha f(n)\rceil,n-1\right\}\ \mbox{and}\
585: g:=(\beta s\ln n)^{1/2}\ \mbox{with}\ f(n):=n^{2/3}\ln^{1/3}n.
586: \label{sgf}
587: \end{equation}
588: This form of $g$ gives a probability bound
589: $e^{-2g^2\!/s}=n^{-2\beta}$ for Cor.\ \ref{c:rankdir3}.
590: To get more feeling, suppose $\alpha=\beta=1$ and $s=f(n)$.
591: Let $\phi(n):=f(n)/n$.  Then $s/n=g/s=\phi(n)$ and it will be seen
592: that the recursive call reduces $n$ at least by the factor $4\phi(n)$
593: on average, i.e., $\phi(n)$ is a contraction factor; note that
594: $\phi(n)\approx2.4\%$ for $n=10^6$ (cf.\ Tab.\ \ref{tab:fnphin}).
595: %
596: %   *** TABLE 4.1 ***
597: \begin{table}
598: \caption{Sample size $f(n):=n^{2/3}\ln^{1/3}n$ and relative sample size
599: $\phi(n):=f(n)/n$.}
600: \label{tab:fnphin}
601: \footnotesize
602: \begin{center}
603: \begin{tabular}{ccccccccc}
604: \hline
605: \vphantom{$1^{2^3}$} % Need more vertical space!
606: $n$     & $10^3$ & $10^4$ & $10^5$ & $10^6$ & $5\cdot10^6$ & $10^7$
607:         & $5\cdot10^7$    & $10^8$ \\
608: \hline
609: $f(n)$  & 190.449& 972.953& 4864.76& 23995.0&       72287.1& 117248
610:         & 353885 & 568986 \\
611: $\phi(n)$
612:         & .190449& .097295& .048648& .023995&       .014557& .011725
613:         & .007078& .005690\\
614: \hline
615: \end{tabular}
616: \end{center}
617: \end{table}
618: %
619: %   *** THEOREM 4.1 ***
620: \begin{theorem}
621: \label{t:selFR}
622: Let\/ $C_{nk}$ denote the expected number of comparisons made by
623: {\sc Select} for $s$ and\/ $g$ chosen as in\/ \eqref{sgf} with\/
624: $\beta\ge1/6$.  There exists a positive constant\/ $\gamma$ such
625: that
626: \begin{equation}
627: C_{nk}\le n+\min\{\,k,n-k\,\}+\gamma f(n)\quad\forall1\le k\le n.
628: \label{CnkFR}
629: \end{equation}
630: \end{theorem}
631: \begin{proof}
632: We need a few preliminary facts.
633: The function $\phi(t):=f(t)/t=(\ln t/t)^{1/3}$ decreases to $0$ on
634: $[e,\infty)$, whereas $f(t)$ grows to infinity on $[2,\infty)$.
635: Let $\delta:=4(\beta/\alpha)^{1/2}$.
636: Pick $\bar n\ge3$ large enough so that
637: $e-1\le\alpha f(\bar n)\le\bar n-1$ and $e\le\delta f(\bar n)$.
638: Let $\bar\alpha:=\alpha+1/f(\bar n)$.
639: Then, by \eqref{sgf} and the monotonicity of $f$ and $\phi$, we have
640: for $n\ge\bar n$
641: \begin{equation}
642: s\le\bar\alpha f(n)\quad\mbox{and}\quad
643: f(s)\le\bar\alpha\phi(\bar\alpha f(\bar n))f(n),
644: \label{sfsFR}
645: \end{equation}
646: \begin{equation}
647: f(\lfloor\delta f(n)\rfloor)\le f(\delta f(n))\le
648: \delta\phi(\delta f(\bar n))f(n).
649: \label{flfloordeltaFR}
650: \end{equation}
651: For instance, the first inequality of \eqref{sfsFR} yields
652: $f(s)\le f(\bar\alpha f(n))$, whereas
653: $$
654: f(\bar\alpha f(n))=\bar\alpha\phi(\bar\alpha f(n))f(n)\le
655: \bar\alpha\phi(\bar\alpha f(\bar n))f(n).
656: $$
657: Also for $n\ge\bar n$,
658: we have $s=\lceil\alpha f(n)\rceil=\alpha f(n)+\epsilon$ with
659: $\epsilon\in[0,1)$ in \eqref{sgf}.  Writing $s=\tilde\alpha f(n)$ with
660: $\tilde\alpha:=\alpha+\epsilon/f(n)\in[\alpha,\bar\alpha)$, we deduce
661: from \eqref{sgf} that
662: \begin{equation}
663: gn/s=(\beta/\tilde\alpha)^{1/2}f(n)\le(\beta/\alpha)^{1/2}f(n).
664: \label{gnsboundFR}
665: \end{equation}
666: In particular, $4gn/s\le\delta f(n)$, since
667: $\delta:=4(\beta/\alpha)^{1/2}$.  Next, \eqref{sgf} implies
668: \begin{equation}
669: ne^{-2g^2\!/s}\le
670: n^{1-2\beta}=f(n)n^{1/3-2\beta}\ln^{-1/3}n.
671: \label{ne2g2sFR}
672: \end{equation}
673: Using the monotonicity of $f$ and $\phi$, increase $\bar n$ if necessary
674: to get for all $n\ge\bar n$
675: \begin{equation}
676: 2\bar\alpha\phi(\bar\alpha f(\bar n))+
677: \delta\phi(\delta f(\bar n))+2n^{-2\beta}+
678: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
679: n^{-2\beta}\,\right\}\le0.95.
680: \label{0.95FR}
681: \end{equation}
682: By Rem.\ \ref{r:sel3}(c), there is $\gamma$ such that \eqref{CnkFR}
683: holds for all $n\le\bar n$; increasing $\gamma$ if necessary, and
684: using the monotonicity of $f$ and the assumption $\beta\ge1/6$,
685: we have for all $n\ge\bar n$
686: \begin{equation}
687: 2\bar\alpha+2\delta+5n^{1/3-2\beta}\ln^{-1/3}n+
688: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
689: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}\le0.05\gamma.
690: \label{0.05FR}
691: \end{equation}
692: 
693: Let $n'\ge\bar n$.  Assuming \eqref{CnkFR} holds for all $n\le n'$,
694: for induction let $n=n'+1$.
695: 
696: We need to consider the following two cases in the first call of
697: {\sc Select}.
698: 
699: {\em Left case\/}: $k<n/2$.
700: First, suppose the event
701: ${\cal E}_l:=\{x_k^*\le v\le x_{k_r}^*\}$ occurs.  By the rules of
702: Steps 4--6, we have $\hat X=L$ (from $x_k^*\le v$), $\hat k=k$ and
703: $\hat n:=|\hat X|\le k_r-1$ (from $v\le x_{k_r}^*$); since
704: $k_r<k+2gn/s+1$ by \eqref{klkr3}, we get the two (equivalent) bounds
705: \begin{equation}
706: \hat n<k+2gn/s\quad\mbox{and}\quad \hat n-\hat k<2gn/s.
707: \label{hatnleft}
708: \end{equation}
709: Note that if $i_v=\lceil ks/n+g\rceil$ then,
710: by Cor.\ \ref{c:rankdir3}(a,b), the Boole-Benferroni inequality and the
711: choice \eqref{sgf}, the complement ${\cal E}_l'$ of ${\cal E}_l$ has
712: $\Prob[{\cal E}_l']\le2e^{-2g^2\!/s}=2n^{-2\beta}$.
713: Second, if $i_v\ne\lceil ks/n+g\rceil$, then $n<k+gn/s$
714: (Cor.\ \ref{c:rankdir3}(e)) combined with $k<n/2$ gives $n<2gn/s$;
715: hence $\hat n-\hat k<\hat n<n<2gn/s$ implies \eqref{hatnleft}.  Since
716: also ${\cal E}_l$ implies \eqref{hatnleft}, we have
717: \begin{equation}
718: \Prob[{\cal A}_l']\le2n^{-2\beta}\quad\mbox{for}\quad
719: {\cal A}_l:=\left\{\,\hat n-\hat k<2gn/s\,\right\}.
720: \label{Al}
721: \end{equation}
722: 
723: {\em Right case\/}: $k\ge n/2$.
724: First, suppose the event
725: ${\cal E}_r:=\{x_{k_l}^*\le v\le x_k^*\}$ occurs.  By the rules of
726: Steps 4--6, we have $\hat X=R$ (from $v\le x_k^*$),
727: $\hat n-\hat k=n-k$ and $\hat n:=|\hat X|\le n-k_l$ (from
728: $x_{k_l}^*\le v$); since $k_l\ge k-2gn/s$ by \eqref{klkr3}, we get
729: the two (equivalent) bounds
730: \begin{equation}
731: \hat n\le n-k+2gn/s\quad\mbox{and}\quad\hat k\le2gn/s,
732: \label{hatnright}
733: \end{equation}
734: using $\hat n-\hat k=n-k$.
735: If $i_v=\lceil ks/n-g\rceil$ then, by Cor.\ \ref{c:rankdir3}(c,d),
736: the complement ${\cal E}_r'$ of ${\cal E}_r$ has
737: $\Prob[{\cal E}_r']\le2e^{-2g^2\!/s}=2n^{-2\beta}$.
738: Second, if $i_v\ne\lceil ks/n-g\rceil$, then $k\le gn/s$
739: (Cor.\ \ref{c:rankdir3}(e)) combined with $k\ge n/2$ gives
740: $n\le2gn/s$; hence $\hat k\le\hat n<n\le2gn/s$ implies
741: \eqref{hatnright}.  Thus
742: \begin{equation}
743: \Prob[{\cal A}_r']\le2n^{-2\beta}\quad\mbox{for}\quad
744: {\cal A}_r:=\left\{\,\hat k\le2gn/s\,\right\}.
745: \label{Ar}
746: \end{equation}
747: 
748: Since $k<n-k$ if $k<n/2$, $n-k\le k$ if $k\ge n/2$, \eqref{hatnleft} and
749: \eqref{hatnright} yield
750: \begin{equation}
751: \Prob[{\cal B}']\le2n^{-2\beta}\quad\mbox{for}\quad
752: {\cal B}:=\left\{\,\hat n\le\min\{\,k,n-k\,\}+2gn/s\,\right\}.
753: \label{B}
754: \end{equation}
755: Note that $\min\{k,n-k\}\le\lfloor n/2\rfloor\le n/2$; this relation
756: will be used implicitly below.
757: 
758: For the recursive call of Step 7, let $\hat s$, $\hat g$ and
759: $\hat\imath_v$ denote the quantities generated as in \eqref{sgf} and
760: \eqref{iv} with $n$ and $k$ replaced by $\hat n$ and $\hat k$, let
761: $\hat v$ be the pivot found at Step 3, and let $\check X$, $\check n$
762: and $\check k$ correspond to $\hat X$, $\hat n$ and $\hat k$ at Step 7,
763: so that $\check n:=|\check X|<\hat n$.
764: 
765: The cost of selecting $v$ and $\hat v$ at Step 3 may be estimated as
766: \begin{equation}
767: C_{si_v}+C_{\hat s\hat\imath_v}\le
768: 1.5s+\gamma f(s)+1.5\hat s+\gamma f(\hat s)\le 3s+2\gamma f(s),
769: \label{CsivFR}
770: \end{equation}
771: since $f$ is increasing and \eqref{CnkFR} holds for
772: $\hat s\le s\le n-1=n'$ (cf.\ \eqref{sgf}) from $\hat n<n$.
773: 
774: Let $c:=n-s$ and $\hat c:=\hat n-\hat s$ denote the costs of Step 4
775: for the two calls.  Since $0\le\hat c<n$ and
776: $\Exp\hat c=\Exp[\hat c|{\cal B}]\Prob[{\cal B}]+
777: \Exp[\hat c|{\cal B}']\Prob[{\cal B}']\le
778: \Exp[\hat c|{\cal B}]+n\Prob[{\cal B}']$, by \eqref{B} we have
779: \begin{equation}
780: c+\Exp\hat c\le n-s+\min\{\,k,n-k\,\}+2gn/s+2n^{1-2\beta}.
781: \label{cEhatc}
782: \end{equation}
783: 
784: Using \eqref{CnkFR} again with $\check n<n$,
785: the cost of finishing up at Step 7 is at most
786: \begin{equation}
787: \Exp C_{\check n\check k}\le
788: \Exp\left[\,1.5\check n+\gamma f(\check n)\,\right]=
789: 1.5\Exp \check n+\gamma\Exp f(\check n).
790: \label{ECcheckn}
791: \end{equation}
792: Thus we need suitable bounds for $\Exp\check n$ and $\Exp f(\check n)$,
793: which may be derived as follows.
794: 
795: To generalize \eqref{B} to the recursive call, consider the events
796: \begin{equation}
797: \hat{\cal B}:=\left\{\,\check n\le\min\{\,\hat k,\hat n-\hat k\,\}+
798: 2\hat g\hat n/\hat s\,\right\}
799: \quad\mbox{and}\quad
800: {\cal C}:=\left\{\,\check n\le\lfloor\delta f(n)\rfloor\,\right\}.
801: \label{hatBC}
802: \end{equation}
803: By \eqref{Al} and \eqref{Ar}, $\hat{\cal B}\cap{\cal A}_l$ and
804: $\hat{\cal B}\cap{\cal A}_r$ imply ${\cal C}$, since
805: $2gn/s+2\hat g\hat n/\hat s\le\delta f(n)$ by \eqref{gnsboundFR} with
806: $\hat n<n$ and $\delta:=4(\beta/\alpha)^{1/2}$.  For the recursive
807: call, proceeding as in the derivation of \eqref{B} with $n$ replaced
808: by $\hat n=i$, $k$ by $\hat k$, etc., shows that, due to random
809: sampling,
810: \begin{equation}
811: \Prob[\hat{\cal B}'|{\cal A}_l,\hat n=i]\le2i^{-2\beta}
812: \quad\mbox{and}\quad
813: \Prob[\hat{\cal B}'|{\cal A}_r,\hat n=i]\le2i^{-2\beta}.
814: \label{PB'AlB'Ar}
815: \end{equation}
816: 
817: In the left case of $k<n/2$, using $\check n<n$ and
818: $\Prob[{\cal A}_l']\le2n^{-2\beta}$ (cf.\ \eqref{Al}), we get
819: $$
820: \Exp\check n=\Exp[\check n|{\cal A}_l]\Prob[{\cal A}_l]+
821: \Exp[\check n|{\cal A}_l']\Prob[{\cal A}_l']\le
822: \Exp[\check n|{\cal A}_l]+n2n^{-2\beta}.
823: $$
824: Partitioning ${\cal A}_l$ into the events
825: ${\cal D}_i:={\cal A}_l\cap\{\hat n=i\}$, $i=0\colon n-1$
826: ($\hat n<n$ always), we have
827: $$
828: \Exp[\check n|{\cal A}_l]=\sum_{i=0}^{n-1}
829: \Exp[\check n|{\cal D}_i]\Prob[{\cal D}_i|{\cal A}_l]\le
830: \max_{i=0\colon n-1}\Exp[\check n|{\cal D}_i],
831: $$
832: where $\Exp[\check n|{\cal D}_i]\le\lfloor\delta f(n)\rfloor$ if
833: $i\le\lfloor\delta f(n)\rfloor+1$, because $\check n<\hat n$ always.
834: As for the remaining terms,
835: $\hat{\cal B}\cap{\cal A}_l\subset{\cal C}$ implies
836: $\Prob[{\cal C}'|{\cal D}_i]\le\Prob[\hat{\cal B}'|{\cal D}_i]\le
837: 2i^{-2\beta}$ by \eqref{PB'AlB'Ar}, where
838: ${\cal C}:=\{\check n\le\lfloor\delta f(n)\rfloor\}$
839: and $\check n<\hat n=i$ when the event ${\cal D}_i$ occurs, so
840: $\Exp[\check n|{\cal D}_i]\le\lfloor\delta f(n)\rfloor+i2i^{-2\beta}$.
841: Hence
842: $$
843: \max_{i=0\colon n-1}\Exp[\check n|{\cal D}_i]\le
844: \lfloor\delta f(n)\rfloor+
845: \max_{i=\lfloor\delta f(n)\rfloor+2\colon n-1}2i^{1-2\beta},
846: $$
847: where the final term is omitted if $\lfloor\delta f(n)\rfloor>n-3$;
848: otherwise it is at most
849: $$
850: 2\max\left\{\,(\lfloor\delta f(n)\rfloor+1)^{1-2\beta},
851: n^{1-2\beta}\,\right\}\le
852: 2\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
853: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n),
854: $$
855: since $\max_{i=\lfloor\delta f(n)\rfloor+1\colon n}2i^{1-2\beta}$
856: is bounded as above (consider $\beta\ge1/2$, then $\beta<1/2$ and use
857: $\delta f(n)<\lfloor\delta f(n)\rfloor+1$, the monotonicity of $f$ and
858: \eqref{ne2g2sFR} for the final inequality).
859: Collecting the preceding estimates, we obtain
860: \begin{equation}
861: \Exp\check n\le\lfloor\delta f(n)\rfloor+2n^{1-2\beta}+
862: 2\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
863: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n).
864: \label{Echeckn}
865: \end{equation}
866: Similarly, replacing $\check n$ by $f(\check n)$ in our derivations
867: and using the monotonicity of $f$ yields
868: \begin{subequations}
869: \label{Efcheckn}
870: \begin{equation}
871: \Exp f(\check n)\le f(\lfloor\delta f(n)\rfloor)+2f(n)n^{-2\beta}+
872: \max_{i=\lfloor\delta f(n)\rfloor+2\colon n-1}2f(i)i^{-2\beta},
873: \label{Efcheckn:a}
874: \end{equation}
875: where the final term is omitted if $\lfloor\delta f(n)\rfloor>n-3$;
876: otherwise it is at most
877: \begin{equation}
878: 2\max\left\{\,
879: \frac{f(\lfloor\delta f(n)\rfloor+1)}
880: {(\lfloor\delta f(n)\rfloor+1)^{2\beta}},
881: \frac{f(n)}{n^{2\beta}}\,\right\}\le
882: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
883: n^{-2\beta}\,\right\}f(n).
884: \label{Efcheckn:b}
885: \end{equation}
886: \end{subequations}
887: To see this, use the monotonicity of $f$ and the fact that for $i\le n$
888: (cf.\ \eqref{sgf})
889: $$
890: f(i)i^{-2\beta}\!/f(n)=i^{2/3-2\beta}n^{-2/3}(\ln i/\ln n)^{1/3}\le
891: i^{2/3-2\beta}n^{-2/3}.
892: $$
893: 
894: For the right case, replace ${\cal A}_l$ by ${\cal A}_r$ in the
895: preceding paragraph to get \eqref{Echeckn}--\eqref{Efcheckn}.
896: 
897: Add the costs \eqref{CsivFR}, \eqref{cEhatc} and \eqref{ECcheckn},
898: using \eqref{Echeckn}--\eqref{Efcheckn}, to get
899: \begin{eqnarray*}
900: C_{nk}&\le&3s+2\gamma f(s)+n-s+\min\{\,k,n-k\,\}+2gn/s+2n^{1-2\beta}\\
901: &&{}+1.5\lfloor\delta f(n)\rfloor+3n^{1-2\beta}+
902: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
903: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n)\\
904: &&{}+\gamma f(\lfloor\delta f(n)\rfloor)+
905: 2\gamma f(n)n^{-2\beta}+
906: 2\gamma\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
907: n^{-2\beta}\,\right\}f(n).
908: \end{eqnarray*}
909: Now, using the bounds \eqref{sfsFR}--\eqref{flfloordeltaFR},
910: $2gn/s\le\frac12\delta f(n)$ (cf.\ \eqref{gnsboundFR}) and
911: \eqref{ne2g2sFR} gives
912: \begin{eqnarray*}
913: \lefteqn{C_{nk}\le n+\min\{\,k,n-k\,\}}\\
914: &&{}+\Big[2\bar\alpha+2\delta+5n^{1/3-2\beta}\ln^{-1/3}n+
915: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
916: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}\Big]f(n)\\
917: &&{}+\left[2\bar\alpha\phi(\bar\alpha f(\bar n))+
918: \delta\phi(\delta f(\bar n))+2n^{-2\beta}+
919: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
920: n^{-2\beta}\,\right\}\right]\gamma f(n).
921: \end{eqnarray*}
922: By \eqref{0.95FR}--\eqref{0.05FR}, the two bracketed terms above are
923: at most $0.05\gamma f(n)$ and $0.95\gamma f(n)$, respectively; thus
924: \eqref{CnkFR} holds as required.
925: \qed
926: \end{proof}
927: %
928: %   *** SUBSECTION 4.2 ***
929: \subsection{Other sampling strategies}
930: \label{ss:othersample}
931: %
932: We now indicate briefly how to adapt the proof of Thm \ref{t:selFR}
933: to several variations on \eqref{sgf}; a choice similar to
934: \eqref{sgfFRsn2/3} below was used in \cite{flri:asf}.
935: %
936: %   *** REMARKS 4.2 ***
937: \begin{remarks}
938: \label{r:selFR}
939: \rm
940: (a)
941: Theorem \ref{t:selFR} remains true for $\beta\ge1/6$ and
942: \eqref{sgf} replaced by
943: \begin{equation}
944: s:=\min\left\{\left\lceil\alpha n^{2/3}\right\rceil,n-1\right\},\
945: g:=(\beta s\ln n)^{1/2}\ \mbox{and}\
946: f(n):=n^{2/3}\ln^{1/2}n.
947: \label{sgfFRsn2/3}
948: \end{equation}
949: Indeed, using $e^{3/2}-1\le\alpha\bar n^{2/3}\le\bar n-1$,
950: $e^{3/2}\le\delta f(\bar n)$, $\bar\alpha:=\alpha+\bar n^{-2/3}$
951: and $s=\tilde\alpha n^{2/3}$ with $\tilde\alpha\in[\alpha,\bar\alpha)$
952: yields \eqref{sfsFR}--\eqref{gnsboundFR} as before, and $\ln^{-1/2}$
953: replaces $\ln^{-1/3}$ in \eqref{ne2g2sFR}, \eqref{0.05FR} and
954: \eqref{Echeckn}.
955: \par(b)
956: Theorem \ref{t:selFR} holds for the following modification of
957: \eqref{sgf} with $\epsilon_l>1$
958: \begin{equation}
959: s:=\min\left\{\lceil\alpha f(n)\rceil,n-1\right\}\ \mbox{and}\
960: g:=(\beta s\ln^{\epsilon_l}n)^{1/2}\ \mbox{with}\
961: f(n):=n^{2/3}\ln^{\epsilon_l/3}n.
962: \label{sgfFRlneps}
963: \end{equation}
964: First, using $e^{\epsilon_l}-1\le\alpha f(\bar n)\le\bar n-1$ and
965: $e^{\epsilon_l}\le\delta f(\bar n)$ gives
966: \eqref{sfsFR}--\eqref{gnsboundFR} as before.  Next, fix
967: $\tilde\beta\ge1/6$.  Let $\beta_n:=\beta\ln^{\epsilon_l-1}n$.
968: Increase $\bar n$ if necessary so that $\beta_i\ge\tilde\beta$ for
969: all $i\ge\min\{\bar n,\lceil\delta f(\bar n)\rceil\}$; then
970: replace $\beta$ by $\tilde\beta$ and $\ln^{-1/3}$ by
971: $\ln^{-\epsilon_l/3}$ in \eqref{ne2g2sFR} and below.
972: \par(c)
973: Several other replacements for \eqref{sgf} may be analyzed as in
974: \cite[\S\S4.1--4.2]{kiw:rsq}.
975: \par(d)
976: None of these choices gives $f(n)$ better than that in \eqref{sgf} for
977: the bound \eqref{CnkFR}.
978: \end{remarks}
979: 
980: We now comment briefly on the possible use of sampling with
981: replacement.
982: %
983: %   *** REMARKS 4.3 ***
984: \begin{remarks}
985: \label{r:binsample}
986: \rm
987: (a)
988: Suppose Step 2 of {\sc Select} employs sampling with replacement.
989: Since the tail bound \eqref{Pexpg} remais valid for the binomial
990: distribution \cite{chv:thd,hoe:pis}, Lemma \ref{l:rankgen} is not
991: affected.  However, when Step 4 no longer skips comparisons with
992: the elements of $S$, $-s$ in \eqref{cEhatc} is replaced by $0$; the
993: resulting change in the bound on $C_{nk}$ only needs replacing
994: $2\bar\alpha$ in \eqref{0.05FR} by $3\bar\alpha$.  Hence the
995: preceding results remain valid.
996: \par(b)
997: Of course, sampling with replacement needs additional storage for
998: $S$.  However, the increase in both storage and the number of
999: comparisons may be tolerated because the sample sizes are relatively
1000: small.
1001: \end{remarks}
1002: %
1003: %   *** SUBSECTION 4.3 ***
1004: \subsection{Handling small subfiles}
1005: \label{ss:subfile}
1006: %
1007: Since the sampling efficiency decreases when $X$ shrinks, consider the
1008: following modification.  For a fixed cut-off parameter
1009: $n_{\rm cut}\ge1$, let sSelect$(X,k)$ be a ``small-select'' routine that
1010: finds the $k$th smallest element of $X$ in at most $C_{\rm cut}<\infty$
1011: comparisons when $|X|\le n_{\rm cut}$ (even bubble sort will do).  Then
1012: {\sc Select} is modified to start with the following
1013: \medbreak\noindent{\bf Step 0} ({\em Small file case\/}).
1014: If $n:=|X|\le n_{\rm cut}$, return sSelect$(X,k)$.
1015: 
1016: Our preceding results remain valid for this modification.  In fact it
1017: suffices if $C_{\rm cut}$ bounds the {\em expected\/} number of
1018: comparisons of sSelect$(X,k)$ for $n\le n_{\rm cut}$.  For instance,
1019: \eqref{CnkFR} holds for $n\le n_{\rm cut}$ and $\gamma\ge C_{\rm cut}$,
1020: and by induction as in Rem.\ \ref{r:sel3}(c) we have $C_{nk}<\infty$
1021: for all $n$, which suffices for the proof of Thm \ref{t:selFR}.
1022: 
1023: Another advantage is that even small $n_{\rm cut}$ ($1000$ say) limits
1024: nicely the stack space for recursion.  Specifically, the tail
1025: recursion of Step 7 is easily eliminated (set $X:=\hat X$, $k:=\hat k$
1026: and go to Step 0), and the calls of Step 3 deal with subsets whose
1027: sizes quickly reach $n_{\rm cut}$.  For example, for the choice of
1028: \eqref{sgf} with $\alpha=1$ and $n_{\rm cut}=600$, at most four
1029: recursive levels occur for $n\le2^{31}\approx2.15\cdot10^9$.
1030: %
1031: %   *** SECTION 5 ***
1032: \section{A modified version}
1033: \label{s:modmed}
1034: We now consider a modification inspired by a remark of
1035: \cite{bro:ra489}.  For $k$ close to $\lceil n/2\rceil$, by symmetry
1036: it is best to choose $v$ as the sample median with
1037: $i_v=\lceil s/2\rceil$, thus attempting to get $v$ close to $x_k^*$
1038: instead of $x_{\lceil k-gn/s\rceil}^*$ or $x_{\lceil k+gn/s\rceil}^*$;
1039: then more elements are eliminated.  Hence we may let
1040: \begin{equation}
1041: i_v:=\left\{\begin{array}{ll}
1042: \lceil ks/n+g\rceil&\mbox{if}\ k<n/2-gn/s,\\
1043: \lceil s/2\rceil&
1044: \mbox{if}\ n/2-gn/s\le k\le n/2+gn/s,\\
1045: \lceil ks/n-g\rceil&\mbox{if}\ k>n/2+gn/s.
1046: \end{array}\right.
1047: \label{iv3}
1048: \end{equation}
1049: Note that \eqref{iv3} coincides with \eqref{iv} in the {\em left\/} case
1050: of $k<n/2-gn/s$ and the {\em right\/} case of $k>n/2+gn/s$, but the
1051: {\em middle\/} case of $n/2-gn/s\le k\le n/2+gn/s$ fixes $i_v$
1052: at the median position $\lceil s/2\rceil$; in fact $i_v$ is the median
1053: of the three values in \eqref{iv3}:
1054: \begin{equation}
1055: i_v:=\max\left\{\,\min\left(\,\lceil ks/n+g\rceil,
1056: \lceil s/2\rceil\,\right),\lceil ks/n-g\rceil\,\right\}.
1057: \label{iv3med}
1058: \end{equation}
1059: Corollary \ref{c:rankdir3} remains valid for the left and right cases.
1060: For the middle case, letting
1061: \begin{equation}
1062: j_l:=\max\left\{\,\lceil n/2-gn/s\rceil,1\,\right\}
1063: \quad\mbox{and}\quad
1064: j_r:=\min\left\{\,\lceil n/2+gn/s\rceil,n\,\right\},
1065: \label{jljr3}
1066: \end{equation}
1067: we obtain from Lemma \ref{l:rankgen} with $\kappa=1/2$ the following
1068: complement of Corollary \ref{c:rankdir3}.
1069: %
1070: %   *** COROLLARY 5.1 ***
1071: \begin{corollary}
1072: \label{c:iv3}
1073: $\Prob[v<x_{j_l}^*]\le e^{-2g^2\!/s}$ and\/
1074: $\Prob[x_{j_r}^*<v]\le e^{-2g^2\!/s}$
1075: if\/ $n/2-gn/s\le k\le n/2+gn/s$.
1076: \end{corollary}
1077: %
1078: %   *** THEOREM 5.2 ***
1079: \begin{theorem}
1080: \label{t:selFRmed}
1081: Theorem\/ {\rm\ref{t:selFR}}
1082: holds for {\sc Select} with Step\/ $3$ using\/ \eqref{iv3}.
1083: \end{theorem}
1084: \begin{proof}
1085: We only indicate how to adapt the proof of Thm \ref{t:selFR} following
1086: \eqref{0.05FR}.  As noted after \eqref{iv3}, the left case now has
1087: $k<n/2-gn/s$ and the right case has $k>n/2+gn/s$, so we only need to
1088: discuss the middle case.
1089: 
1090: {\em Middle case\/}:
1091: $n/2-gn/s\le k\le n/2+gn/s$.  Suppose
1092: the event ${\cal E}_m:=\{x_{j_l}^*\le v\le x_{j_r}^*\}$ occurs
1093: (note that $\Prob[{\cal E}_m']\le 2e^{-2g^2\!/s}=2n^{-2\beta}$
1094: by Cor.\ \ref{c:iv3}).
1095: If $\hat X=L$ then, by the rules of Steps 4--6, we have $\hat k=k$
1096: and $\hat n\le j_r-1$; since $j_r<n/2+gn/s+1$ by \eqref{jljr3}, we
1097: get $\hat n<n/2+gn/s$.  Hence $k\ge n/2-gn/s$ yields
1098: $\hat n<k+2gn/s$ and $\hat n-\hat k<2gn/s$ as in \eqref{hatnleft}.
1099: Next, if $\hat X=R$ then $\hat n-\hat k=n-k$ and $\hat k:=k-|L\cup E|$,
1100: so $L\cup E=\{x\in X:x\le v\}\ni x_{j_l}^*$ gives $\hat k\le k-j_l$.
1101: Since $k\le n/2+gn/s$ and $j_l\ge n/2-gn/s$ by \eqref{jljr3}, we get
1102: $\hat k\le2gn/s$ and $\hat n\le\hat n-\hat k+2gn/s$ as in
1103: \eqref{hatnright}; further, $\hat n\le n-j_l$ yields
1104: $\hat n\le n/2+gn/s$.  Noticing that $n/2-gn/s\le k\le n/2+gn/s$ implies
1105: $n/2\le\min\{k,n-k\}+gn/s$, we have
1106: $\hat n\le\min\{k,n-k\}+2gn/s$ in both cases.
1107: 
1108: Thus in the middle case we again have \eqref{B} and hence
1109: \eqref{cEhatc}; further, by \eqref{Al} and \eqref{Ar}, the event
1110: ${\cal E}_m\subset{\cal A}_l\cup{\cal A}_r$ is partitioned into
1111: ${\cal E}_m\cap{\cal A}_l$ and
1112: ${\cal E}_m\cap{\cal A}_l'\cap{\cal A}_r$.
1113: 
1114: Next, reasoning as before, we see that \eqref{PB'AlB'Ar} and hence
1115: \eqref{Echeckn}--\eqref{Efcheckn} remain valid in the left and right
1116: cases, whereas in the middle case we have
1117: \begin{equation}
1118: \Prob[\hat{\cal B}'|{\cal E}_m,{\cal A}_l,\hat n=i]\le2i^{-2\beta}
1119: \quad\mbox{and}\quad
1120: \Prob[\hat{\cal B}'|{\cal E}_m,{\cal A}_l',{\cal A}_r,\hat n=i]\le
1121: 2i^{-2\beta}.
1122: \label{PB'EmAl}
1123: \end{equation}
1124: 
1125: In the middle case,
1126: $\Exp\check n=\Exp[\check n|{\cal E}_m]\Prob[{\cal E}_m]+
1127: \Exp[\check n|{\cal E}_m']\Prob[{\cal E}_m']$ is bounded by
1128: $\Exp[\check n|{\cal E}_m]+2n^{1-2\beta}$, since
1129: $\Prob[{\cal E}_m']\le2n^{-2\beta}$ and $\check n<n$ always.  Next,
1130: partitioning ${\cal E}_m$ into ${\cal E}_m\cap{\cal A}_l$ and
1131: ${\cal E}_m\cap{\cal A}_l'\cap{\cal A}_r$, we obtain
1132: $\Exp[\check n|{\cal E}_m]\le
1133: \max\{\Exp[\check n|{\cal E}_m,{\cal A}_l],
1134: \Exp[\check n|{\cal E}_m,{\cal A}_l',{\cal A}_r]\}$, where
1135: $\Exp[\check n|{\cal E}_m,{\cal A}_l]$ and
1136: $\Exp[\check n|{\cal E}_m,{\cal A}_l',{\cal A}_r]$ may be bounded like
1137: $\Exp[\check n|{\cal A}_l]$ and $\Exp[\check n|{\cal A}_r]$ in the left
1138: and right cases to get \eqref{Echeckn}.
1139: Then \eqref{Efcheckn} is obtained similarly, and the conclusion follows
1140: as before.
1141: \qed
1142: \end{proof}
1143: %
1144: %   *** SECTION 6 ***
1145: \section{Ternary partitions}
1146: \label{s:ternpart}
1147: In this section we discuss ways of implementing {\sc Select} when
1148: the input set is given as an array $x[1\colon n]$.  We employ the
1149: following notation.
1150: 
1151: Each stage works with a segment $x[l\colon r]$ of the input array
1152: $x[1\colon n]$, where $1\le l\le r\le n$ are such that $x_i<x_l$ for
1153: $i=1\colon l-1$, $x_r<x_i$ for $i=r+1\colon n$, and the $k$th smallest
1154: element of $x[1\colon n]$ is the $(k-l+1)$th smallest element of
1155: $x[l\colon r]$.  The task of {\sc Select} is {\em extended\/}: given
1156: $x[l\colon r]$ and $l\le k\le r$,
1157: {\sc Select}$(x,l,r,k,k_-,k_+)$ permutes $x[l\colon r]$ and finds
1158: $l\le k_-\le k\le k_+\le r$
1159: such that $x_i<x_k$ for all $l\le i<k_-$, $x_i=x_k$ for all
1160: $k_-\le i\le k_+$, $x_i>x_k$ for all $k_+<i\le r$.  The initial call
1161: is {\sc Select}$(x,1,n,k,k_-,k_+)$.
1162: 
1163: A vector swap denoted by $x[a\colon b]\leftrightarrow x[b+1\colon c]$
1164: means that the first $d:=\min(b+1-a,c-b)$ elements of array
1165: $x[a\colon c]$ are exchanged with its last $d$ elements in arbitrary
1166: order if $d>0$; e.g., we may exchange
1167: $x_{a+i}\leftrightarrow x_{c-i}$ for $0\le i<d$, or
1168: $x_{a+i}\leftrightarrow x_{c-d+1+i}$ for $0\le i<d$.
1169: %
1170: %   *** SUBSECTION 6.1 ***
1171: \subsection{Tripartitioning schemes}
1172: \label{ss:tripart}
1173: For a given pivot $v:=x_l$ from the array $x[l\colon r]$, the following
1174: {\em ternary\/} scheme \cite[\S5.1]{kiw:psq} partitions the array into
1175: three blocks, with $x_m<v$ for $l\le m<a$, $x_m=v$ for $a\le m\le b$,
1176: $x_m>v$ for $b<m\le r$.
1177: After comparing the pivot $v$ to $x_r$ to produce the initial setup
1178: \begin{equation}
1179: \begin{tabular}{llrlrlrr}
1180: \hline
1181: \multicolumn{1}{|c|}{$x=v$} &
1182: \multicolumn{2}{|c|}{$x<v$} &
1183: \multicolumn{2}{|c|}{?} &
1184: \multicolumn{2}{|c|}{$x>v$} &
1185: \multicolumn{1}{|c|}{$x=v$} \\
1186: \hline
1187: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1188: $l$ & $p$ & $i$ & & & $j$ & $q$ & $r$\\
1189: \end{tabular}
1190: \label{ternini}
1191: \end{equation}
1192: with $i:=l$ and $j:=r$,
1193: we work with the three inner blocks of the array
1194: \begin{equation}
1195: \begin{tabular}{lllrrr}
1196: \hline
1197: \multicolumn{1}{|c|}{$x=v$} &
1198: \multicolumn{1}{|c|}{$x<v$} &
1199: \multicolumn{2}{|c|}{?} &
1200: \multicolumn{1}{|c|}{$x>v$} &
1201: \multicolumn{1}{|c|}{$x=v$} \\
1202: \hline
1203: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1204: $l$ & $p$ & $i$ & $j$ & $q$ & $r$\\
1205: \end{tabular}\ ,
1206: \label{ternbeg}
1207: \end{equation}
1208: until the middle part is empty or just contains an element equal to the
1209: pivot
1210: \begin{equation}
1211: \begin{tabular}{llrclrr}
1212: \hline
1213: \multicolumn{1}{|c|}{$x=v$} &
1214: \multicolumn{2}{|c|}{$x<v$} &
1215: \multicolumn{1}{|c|}{$x=v$} &
1216: \multicolumn{2}{|c|}{$x>v$} &
1217: \multicolumn{1}{|c|}{$x=v$} \\
1218: \hline
1219: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1220: $l$ & $p$ & $j$ & & $i$ & $q$ & $r$ \\
1221: \end{tabular}
1222: \label{ternmid}
1223: \end{equation}
1224: (i.e., $j=i-1$ or $j=i-2$),
1225: then swap the ends into the middle for the final arrangement
1226: \begin{equation}
1227: \begin{tabular}{llrr}
1228: \hline
1229: \multicolumn{1}{|c|}{$x<v$} &
1230: \multicolumn{2}{|c|}{$x=v$} &
1231: \multicolumn{1}{|c|}{$x>v$} \\
1232: \hline
1233: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1234: $l$ & $a$ & $b$ & $r$\\
1235: \end{tabular}\ .
1236: \label{ternend}
1237: \end{equation}
1238: %
1239: %   *** SCHEME A ***
1240: \begin{scheme}[Safeguarded ternary partition]
1241: \label{sts}
1242: \rm
1243: \begin{description}
1244: \itemsep0pt
1245: \item[]
1246: \item[\ref{sts}1.] [Initialize.]
1247: Set $i:=l$, $p:=i+1$, $j:=r$ and $q:=j-1$.
1248: If $v>x_j$, exchange $x_i\leftrightarrow x_j$ and set $p:=i$;
1249: else if $v<x_j$, set $q:=j$.
1250: \item[\ref{sts}2.] [Increase $i$ until $x_i\ge v$.]
1251: Increase $i$ by $1$; then if $x_i<v$, repeat this step.
1252: \item[\ref{sts}3.] [Decrease $j$ until $x_j\le v$.]
1253: Decrease $j$ by $1$; then if $x_j>v$, repeat this step.
1254: \item[\ref{sts}4.] [Exchange.]
1255: (Here $x_j\le v\le x_i$.)
1256: If $i<j$, exchange $x_i\leftrightarrow x_j$; then
1257: if $x_i=v$, exchange $x_i\leftrightarrow x_p$ and increase $p$ by $1$;
1258: if $x_j=v$, exchange $x_j\leftrightarrow x_q$ and decrease $q$ by $1$;
1259: return to \ref{sts}2.
1260: If $i=j$ (so that $x_i=x_j=v$), increase $i$ by $1$ and
1261: decrease $j$ by $1$.
1262: \item[\ref{sts}5.] [Cleanup.]
1263: Set $a:=l+j-p+1$ and $b:=r-q+i-1$.
1264: Exchange $x[l\colon p-1]\leftrightarrow x[p\colon j]$ and
1265: $x[i\colon q]\leftrightarrow x[q+1\colon r]$.
1266: \end{description}
1267: \end{scheme}
1268: 
1269: Step \ref{sts}1 ensures that $x_l\le v\le x_r$, so steps \ref{sts}2 and
1270: \ref{sts}3 don't need to test whether $i\le j$.  This scheme makes two
1271: extraneous comparisons (only one when $i=j$ at \ref{sts}4).  Spurious
1272: comparisons are avoided in the following modification
1273: \cite[\S5.3]{kiw:psq} of the scheme of \cite{bemc:esf}
1274: (cf.\ \cite[Ex.\ 5.2.2--41]{knu:acpIII2}),
1275: for which $i=j+1$ in \eqref{ternmid}.
1276: %
1277: %   *** SCHEME B ***
1278: \begin{scheme}[Double-index controlled ternary partition]
1279: \label{stind2}
1280: \rm
1281: \begin{description}
1282: \itemsep0pt
1283: \item[]
1284: \item[\ref{stind2}1.] [Initialize.]
1285: Set $i:=p:=l+1$ and $j:=q:=r$.
1286: \item[\ref{stind2}2.] [Increase $i$ until $x_i>v$.]
1287: If $i\le j$ and $x_i<v$, increase $i$ by $1$ and repeat this step.
1288: If $i\le j$ and $x_i=v$, exchange $x_p\leftrightarrow x_i$, increase
1289: $p$ and $i$ by $1$, and repeat this step.
1290: \item[\ref{stind2}3.] [Decrease $j$ until $x_j<v$.]
1291: If $i<j$ and $x_j>v$, decrease $j$ by $1$ and repeat this step.
1292: If $i<j$ and $x_j=v$, exchange $x_j\leftrightarrow x_q$, decrease
1293: $j$ and $q$ by $1$, and repeat this step.
1294: If $i\ge j$, set $j:=i-1$ and go to \ref{stind2}5.
1295: \item[\ref{stind2}4.] [Exchange.]
1296: Exchange $x_i\leftrightarrow x_j$, increase $i$ by $1$,
1297: decrease $j$ by $1$, and return to \ref{stind2}2.
1298: \item[\ref{stind2}5.] [Cleanup.]
1299: Set $a:=l+i-p$ and $b:=r-q+j$.
1300: Swap $x[l\colon p-1]\leftrightarrow x[p\colon j]$ and
1301: $x[i\colon q]\leftrightarrow x[q+1\colon r]$.
1302: \end{description}
1303: \end{scheme}
1304: %
1305: %   *** SUBSECTION 6.2 ***
1306: \subsection{Preparing for ternary partitions}
1307: \label{ss:preptern}
1308: At Step 1, $r-l+1$ replaces $n$ in finding $s$ and $g$.
1309: At Step 2, it is convenient to place the sample in the initial part of
1310: $x[l\colon r]$ by exchanging $x_i\leftrightarrow x_{i+{\rm rand}(r-i)}$
1311: for $l\le i\le r_s:=l+s-1$, where ${\rm rand}(r-i)$ denotes a random
1312: integer, uniformly distributed between $0$ and $r-i$.
1313: 
1314: Step 3 uses $i:=k-l+1$ and $m:=r-l+1$ instead of $k$ and $n$
1315: to find the pivot position
1316: \begin{equation}
1317: k_v:=\left\{\begin{array}{ll}
1318: \rlap{$\min$}\phantom{\max}
1319: \left\{\,\lceil l-1+is/m+g\rceil,r_s\,\right\}&
1320: \mbox{if}\ i<m/2,\\
1321: \max\left\{\,\lceil l-1+is/m-g\rceil,l\,\right\}&
1322: \mbox{if}\ i\ge m/2,
1323: \end{array}\right.
1324: \label{kv}
1325: \end{equation}
1326: so that the recursive call of {\sc Select}$(x,l,r_s,k_v,k_v^-,k_v^+)$
1327: produces $v:=x_{k_v}$.
1328: 
1329: After $v$ has been found, our array looks as follows
1330: \begin{equation}
1331: \begin{tabular}{llrrccr}
1332: \hline
1333: \multicolumn{1}{|c|}{$x<v$} &
1334: \multicolumn{2}{|c|}{$x=v$} &
1335: \multicolumn{1}{|c|}{$x>v$} &
1336: \multicolumn{2}{|c|}{?}\\
1337: \hline
1338: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1339: $l$ & $k_v^-$ & $k_v^+$ & $r_s$ & & $r$\\
1340: \end{tabular}\ .
1341: \label{partrec}
1342: \end{equation}
1343: Setting $\bar l:=k_v^-$ and $\bar r:=r-r_s+k_v^+$, we swap
1344: $x[k_v^++1\colon r_s]\leftrightarrow x[r_s+1\colon r]$ in
1345: \eqref{partrec} to get
1346: \begin{equation}
1347: \begin{tabular}{llrlrr}
1348: \hline
1349: \multicolumn{1}{|c|}{$x<v$} &
1350: \multicolumn{2}{|c|}{$x=v$} &
1351: \multicolumn{2}{|c|}{?} &
1352: \multicolumn{1}{|c|}{$x>v$} \\
1353: \hline
1354: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1355: $l$ & $\bar l$ & $k_v^+$ & & $\bar r$ & $r$\\
1356: \end{tabular}\ .
1357: \label{partini}
1358: \end{equation}
1359: If $k_v^+=r_s$, we use scheme \ref{sts} with $l$ replaced by $k_v^+$
1360: in \ref{sts}1 (cf.\ \eqref{ternini}) and by $\bar l$ in \ref{sts}5
1361: (cf.\ \eqref{ternmid}); for $k_v^+<r_s$, we set
1362: $i:=k_v^+$, $p:=i+1$, $j:=\bar r+1$, $q:=\bar r$, omit \ref{sts}1
1363: and replace $l$, $r$ by $\bar l$, $\bar r$ in \ref{sts}5.
1364: Similarly, for scheme \ref{stind2}, we replace $l$, $r$ by
1365: $k_v^+$, $\bar r$ in \ref{stind2}1, and by $\bar l$, $\bar r$ in
1366: \ref{stind2}5.
1367: 
1368: After partitioning $l$ and $r$ are updated by setting $l:=b+1$ if
1369: $a\le k$, $r:=a-1$ if $k\le b$.  If $l\ge r$,
1370: {\sc Select} may return $k_-:=k_+:=k$ if $l=r$, $k_-:=r+1$ and
1371: $k_+:=l-1$ if $l>r$.  Otherwise, instead of calling {\sc Select}
1372: recursively, Step 6 may jump back to Step 1, or to Step 0 if sSelect
1373: is used (cf.\ \S\ref{ss:subfile}).
1374: 
1375: A simple version of sSelect is obtained if Steps 2 and 3 choose $v:=x_k$
1376: when $r-l+1\le n_{\rm cut}$ (this choice of \cite{flri:asf} works well
1377: in practice, but more sophisticated pivots could be tried); then the
1378: ternary partitioning code can be used by sSelect as well.
1379: %
1380: %   *** SECTION 7 ***
1381: \section{Experimental results}
1382: \label{s:exp}
1383: %
1384: %   *** SUBSECTION 7.1 ***
1385: \subsection{Implemented algorithms}
1386: \label{ss:impl}
1387: %
1388: An implementation of {\sc Select} was programmed in Fortran 77 and
1389: run on a notebook PC (Pentium 4M 2 GHz, 768 MB RAM) under MS
1390: Windows XP.  The input set $X$ was specified as a double precision
1391: array.  For efficiency, the recursion was removed and small arrays with
1392: $n\le n_{\rm cut}$ were handled as if Steps 2 and 3 chose $v:=x_k$;
1393: the resulting version of sSelect (cf.\ \S\S\ref{ss:subfile} and
1394: \ref{ss:preptern}) typically required less than $3.5n$ comparisons.
1395: The choice of \eqref{sgfFRsn2/3} was employed, with the parameters
1396: $\alpha=0.5$, $\beta=0.25$ and $n_{\rm cut}=600$ as proposed in
1397: \cite{flri:asf}; future work should test other sample sizes and
1398: parameters.
1399: %
1400: %   *** SUBSECTION 7.2 ***
1401: \subsection{Testing examples}
1402: \label{ss:examp}
1403: %
1404: As in \cite{kiw:rsq}, we used minor modifications of the input sequences
1405: of \cite{val:iss}:
1406: \begin{description}
1407: \itemsep0pt
1408: \item[random]
1409: A random permutation of the integers $1$ through $n$.
1410: \item[onezero]
1411: A random permutation of $\lceil n/2\rceil$ ones and $\lfloor n/2\rfloor$
1412: zeros.
1413: \item[sorted]
1414: The integers $1$ through $n$ in increasing order.
1415: \item[rotated]
1416: A sorted sequence rotated left once; i.e., $(2,3,\ldots,n,1)$.
1417: \item[organpipe]
1418: %The integers $1$ through $n/2$ in increasing order, followed by $n/2$
1419: %through $1$ in decreasing order.
1420: The integers $(1,2,\ldots,n/2,n/2,\ldots,2,1)$.
1421: \item[m3killer]
1422: Musser's ``median-of-3 killer'' sequence with $n=4j$ and $k=n/2$:
1423: $$
1424: \left(\begin{array}{ccccccccccccc}
1425: 1&  2 & 3&  4 & \ldots&  k-2& k-1& k& k+1& \ldots& 2k-2& 2k-1& 2k\\
1426: 1& k+1& 3& k+3& \ldots& 2k-3& k-1& 2&  4 & \ldots& 2k-2& 2k-1& 2k
1427: \end{array}\right).
1428: $$
1429: \item[twofaced]
1430: Obtained by randomly permuting the
1431: elements of an m3killer sequence in positions $4\lfloor\log_2n\rfloor$
1432: through $n/2-1$ and $n/2+4\lfloor\log_2n\rfloor-1$ through $n-2$.
1433: \end{description}
1434: For each input sequence, its (lower) median element was selected
1435: for $k:=\lceil n/2\rceil$.
1436: %
1437: %   *** SUBSECTION 7.3 ***
1438: \subsection{Computational results}
1439: \label{ss:result}
1440: %
1441: We varied the input size $n$ from $50{,}000$ to $16{,}000{,}000$.  For
1442: the random, onezero and twofaced sequences, for each input size,
1443: 20 instances were randomly generated; for the deterministic
1444: sequences, 20 runs were made to measure the solution time.
1445: 
1446: The performance of {\sc Select} on randomly generated inputs is
1447: summarized in Table \ref{tab:Selrand},
1448: %
1449: %   *** TABLE 7.1 ***
1450: \begin{table}[t!]
1451: \caption{Performance of {\sc Select} on randomly generated inputs.}
1452: \label{tab:Selrand}
1453: \footnotesize
1454: \begin{center}
1455: \begin{tabular}{lrrrrrrrrrrrrr}
1456: \hline
1457: Sequence &\multicolumn{1}{c}{Size}
1458: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1459: \vphantom{$1^{2^3}$}} % Need more vertical space!
1460: &\multicolumn{3}{c}{Comparisons $[n]$}
1461: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1462: &\multicolumn{1}{c}{$L_{\rm avg}$}
1463: &\multicolumn{1}{c}{$P_{\rm avg}$}
1464: &\multicolumn{1}{c}{$N_{\rm avg}$}
1465: &\multicolumn{1}{c}{$p_{\rm avg}$}
1466: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1467: &\multicolumn{1}{c}{$n$}
1468: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1469: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1470: & &\multicolumn{1}{c}{$[n]$}
1471: &\multicolumn{1}{c}{$[\ln n]$}
1472: &\multicolumn{1}{c}{$[\ln n]$} &
1473: &\multicolumn{1}{c}{$[\%n]$}\\
1474: \hline
1475: %dsel20/dsel20x alpha=0.5 beta=0.25 cutoff=600
1476: random     &  50K
1477: &    2&   10&    0& 1.66& 1.77& 1.61& 1.74& 1.65& 0.46& 0.55& 8.33& 2.59\\
1478:            & 100K
1479: &    3&   10&    0& 1.63& 1.71& 1.55& 1.76& 1.63& 0.60& 0.69& 7.58& 2.12\\
1480:            & 500K
1481: &   13&   20&   10& 1.56& 1.61& 1.54& 1.36& 1.56& 0.67& 0.74& 8.05& 1.19\\
1482:            &   1M
1483: &   23&   30&   20& 1.52& 1.58& 1.00& 0.55& 1.52& 0.66& 0.73& 8.32& 0.91\\
1484:            &   2M
1485: &   46&   51&   40& 1.54& 1.56& 1.52& 1.22& 1.54& 0.75& 0.82& 8.38& 0.72\\
1486:            &   4M
1487: &   88&   91&   80& 1.53& 1.55& 1.52& 1.18& 1.53& 0.86& 0.92& 8.22& 0.57\\
1488:            &   8M
1489: &  172&  181&  160& 1.52& 1.53& 1.51& 1.13& 1.52& 0.92& 0.98& 8.54& 0.44\\
1490:            &  16M
1491: &  336&  341&  320& 1.52& 1.53& 1.51& 1.06& 1.52& 0.95& 1.01& 8.41& 0.35\\
1492: onezero    &  50K
1493: &    2&   10&    0& 1.28& 1.51& 1.00& 0.00& 1.28& 0.24& 0.18& 1.26& 1.91\\
1494:            & 100K
1495: &    3&   10&    0& 1.25& 1.51& 1.00& 0.00& 1.25& 0.26& 0.15& 1.20& 1.49\\
1496:            & 500K
1497: &   15&   20&   10& 1.33& 1.50& 1.00& 0.00& 1.33& 0.29& 0.17& 1.34& 0.93\\
1498:            &   1M
1499: &   30&   41&   20& 1.33& 1.50& 1.00& 0.00& 1.33& 0.27& 0.15& 1.20& 0.73\\
1500:            &   2M
1501: &   60&   71&   41& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\
1502:            &   4M
1503: &  109&  131&   90& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\
1504:            &   8M
1505: &  219&  261&  190& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\
1506:            &  16M
1507: &  436&  501&  370& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\
1508: twofaced   &  50K
1509: &    1&   10&    0& 1.67& 1.77& 1.59& 1.87& 1.67& 0.47& 0.56& 8.24& 2.63\\
1510:            & 100K
1511: &    3&   11&    0& 1.62& 1.73& 1.56& 1.67& 1.62& 0.60& 0.69& 7.61& 2.11\\
1512:            & 500K
1513: &   12&   20&   10& 1.56& 1.59& 1.53& 1.23& 1.56& 0.63& 0.71& 8.33& 1.18\\
1514:            &   1M
1515: &   24&   31&   20& 1.55& 1.57& 1.53& 1.23& 1.55& 0.69& 0.76& 8.22& 0.92\\
1516:            &   2M
1517: &   45&   51&   40& 1.54& 1.57& 1.52& 1.23& 1.54& 0.78& 0.85& 8.36& 0.73\\
1518:            &   4M
1519: &   88&   91&   80& 1.53& 1.54& 1.52& 1.17& 1.53& 0.88& 0.94& 8.05& 0.57\\
1520:            &   8M
1521: &  170&  180&  160& 1.52& 1.53& 1.51& 1.12& 1.52& 0.90& 0.97& 8.51& 0.44\\
1522:            &  16M
1523: &  332&  341&  320& 1.52& 1.53& 1.51& 1.04& 1.52& 0.96& 1.02& 8.55& 0.35\\
1524: \hline
1525: \end{tabular}
1526: \end{center}
1527: \end{table}
1528: %
1529: where the average, maximum and minimum solution times are in
1530: milliseconds, and the comparison counts are in multiples of $n$; e.g.,
1531: column six gives $C_{\rm avg}/n$, where $C_{\rm avg}$ is the average
1532: number of comparisons made over all instances.  Thus
1533: $\gamma_{\rm avg}:=(C_{\rm avg}-1.5n)_+/f(n)$ estimates the constant
1534: $\gamma$ in the bound \eqref{CnkFR}; moreover, we have
1535: $C_{\rm avg}\approx L_{\rm avg}$, where $L_{\rm avg}$ is the average
1536: sum of sizes of partitioned arrays.  Further,
1537: $P_{\rm avg}$ is the average number of {\sc Select} partitions, whereas
1538: $N_{\rm avg}$ is the average number of calls to sSelect and
1539: $p_{\rm avg}$ is the average number of sSelect partitions per call;
1540: both $P_{\rm avg}$ and $N_{\rm avg}$ grow slowly with $\ln n$
1541: (linearly on the onezero inputs).
1542: Finally, $s_{\rm avg}$ is the average sum of sample sizes;
1543: $s_{\rm avg}/n^{2/3}$ drops from $0.95$ for $n=50{\rm K}$ to $0.88$ for
1544: $n=16{\rm M}$ on the random and twofaced inputs, and oscillates about
1545: $0.7$ on the onezero inputs, whereas the initial
1546: $s/n^{2/3}\approx\alpha=0.5$.
1547: The results for the random and twofaced sequences are very similar:
1548: the average solution times grow linearly with $n$ (except for small
1549: inputs whose solution times couldn't be measured accurately), and the
1550: differences between maximum and minimum times are quite small (and also
1551: partly due to the operating system).  Except for the smallest inputs,
1552: the maximum and minimum numbers of comparisons are quite close, and
1553: $C_{\rm avg}$ nicely approaches the theoretical lower bound of $1.5n$;
1554: this is reflected in the values of $\gamma_{\rm avg}$.  The results for
1555: the onezero inputs essentially average two cases: the first pass
1556: eliminates either almost all or about half of the elements.
1557: 
1558: Table \ref{tab:Seldet} exhibits similar features of {\sc Select} on
1559: the deterministic inputs.
1560: %
1561: %   *** TABLE 7.2 ***
1562: \begin{table}[t!]
1563: \caption{Performance of {\sc Select} on deterministic inputs.}
1564: \label{tab:Seldet}
1565: \footnotesize
1566: \begin{center}
1567: \tabcolsep=0.98\tabcolsep
1568: \begin{tabular}{lrrrrrrrrrrrrr}
1569: \hline
1570: Sequence &\multicolumn{1}{c}{Size}
1571: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1572: \vphantom{$1^{2^3}$}} % Need more vertical space!
1573: &\multicolumn{3}{c}{Comparisons $[n]$}
1574: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1575: &\multicolumn{1}{c}{$L_{\rm avg}$}
1576: &\multicolumn{1}{c}{$P_{\rm avg}$}
1577: &\multicolumn{1}{c}{$N_{\rm avg}$}
1578: &\multicolumn{1}{c}{$p_{\rm avg}$}
1579: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1580: &\multicolumn{1}{c}{$n$}
1581: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1582: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1583: & &\multicolumn{1}{c}{$[n]$}
1584: &\multicolumn{1}{c}{$[\ln n]$}
1585: &\multicolumn{1}{c}{$[\ln n]$} &
1586: &\multicolumn{1}{c}{$[\%n]$}\\
1587: \hline
1588: %dsel10o/dsel10ox alpha=0.5 beta=0.25 cutoff=600
1589: sorted     &  50K
1590: &    1&   10&    0& 1.67& 1.76& 1.59& 1.85& 1.66& 0.48& 0.57& 7.24& 2.65\\
1591:            & 100K
1592: &    2&   10&    0& 1.62& 1.69& 1.55& 1.70& 1.62& 0.60& 0.69& 6.76& 2.12\\
1593:            & 500K
1594: &    8&   10&    0& 1.56& 1.62& 1.53& 1.35& 1.56& 0.67& 0.74& 7.52& 1.19\\
1595:            &   1M
1596: &   15&   20&   10& 1.54& 1.58& 1.53& 1.19& 1.54& 0.68& 0.75& 7.87& 0.92\\
1597:            &   2M
1598: &   27&   31&   20& 1.54& 1.56& 1.52& 1.23& 1.54& 0.74& 0.81& 7.61& 0.73\\
1599:            &   4M
1600: &   51&   61&   40& 1.53& 1.55& 1.52& 1.19& 1.53& 0.87& 0.93& 7.34& 0.57\\
1601:            &   8M
1602: &   98&  111&   90& 1.52& 1.53& 1.51& 1.10& 1.52& 0.89& 0.95& 8.03& 0.44\\
1603:            &  16M
1604: &  186&  200&  170& 1.52& 1.52& 1.51& 1.04& 1.52& 0.95& 1.01& 7.99& 0.35\\
1605: rotated    &  50K
1606: &    1&   10&    0& 1.67& 1.78& 1.59& 1.86& 1.66& 0.48& 0.57& 9.45& 2.64\\
1607:            & 100K
1608: &    2&   10&    0& 1.63& 1.73& 1.58& 1.76& 1.63& 0.61& 0.69& 9.12& 2.12\\
1609:            & 500K
1610: &    8&   10&    0& 1.56& 1.62& 1.54& 1.39& 1.56& 0.65& 0.73&10.03& 1.18\\
1611:            &   1M
1612: &   15&   20&   10& 1.55& 1.58& 1.53& 1.29& 1.55& 0.69& 0.76& 9.56& 0.92\\
1613:            &   2M
1614: &   27&   31&   20& 1.54& 1.55& 1.52& 1.19& 1.54& 0.78& 0.84& 8.69& 0.72\\
1615:            &   4M
1616: &   51&   60&   50& 1.53& 1.54& 1.52& 1.18& 1.53& 0.87& 0.94& 8.92& 0.57\\
1617:            &   8M
1618: &   98&  111&   90& 1.52& 1.53& 1.51& 1.12& 1.52& 0.89& 0.96& 9.29& 0.44\\
1619:            &  16M
1620: &  185&  210&  170& 1.52& 1.53& 1.51& 1.04& 1.52& 0.93& 0.99& 8.96& 0.35\\
1621: organpipe  &  50K
1622: &    1&   10&    0& 1.67& 1.78& 1.59& 1.94& 1.67& 0.45& 0.55& 8.21& 2.62\\
1623:            & 100K
1624: &    3&   10&    0& 1.62& 1.69& 1.57& 1.68& 1.62& 0.60& 0.69& 7.61& 2.11\\
1625:            & 500K
1626: &   10&   10&   10& 1.57& 1.60& 1.54& 1.43& 1.56& 0.67& 0.75& 8.18& 1.19\\
1627:            &   1M
1628: &   20&   20&   10& 1.55& 1.58& 1.52& 1.24& 1.55& 0.70& 0.77& 8.21& 0.93\\
1629:            &   2M
1630: &   37&   41&   30& 1.53& 1.55& 1.52& 1.15& 1.53& 0.78& 0.85& 8.48& 0.72\\
1631:            &   4M
1632: &   68&   80&   60& 1.53& 1.54& 1.52& 1.13& 1.53& 0.84& 0.91& 8.21& 0.57\\
1633:            &   8M
1634: &  130&  150&  120& 1.52& 1.54& 1.51& 1.07& 1.52& 0.88& 0.94& 8.64& 0.44\\
1635:            &  16M
1636: &  240&  260&  230& 1.52& 1.53& 1.51& 1.02& 1.52& 0.94& 1.00& 8.44& 0.35\\
1637: m3killer   &  50K
1638: &    1&   10&    0& 1.67& 1.76& 1.60& 1.89& 1.67& 0.47& 0.55& 8.82& 2.62\\
1639:            & 100K
1640: &    4&   10&    0& 1.63& 1.71& 1.57& 1.80& 1.63& 0.60& 0.69& 7.69& 2.13\\
1641:            & 500K
1642: &   11&   20&   10& 1.57& 1.62& 1.53& 1.44& 1.57& 0.66& 0.73& 8.61& 1.19\\
1643:            &   1M
1644: &   20&   20&   20& 1.55& 1.59& 1.52& 1.40& 1.55& 0.72& 0.79& 8.33& 0.93\\
1645:            &   2M
1646: &   38&   41&   30& 1.54& 1.56& 1.52& 1.25& 1.54& 0.78& 0.85& 8.30& 0.73\\
1647:            &   4M
1648: &   73&   81&   70& 1.53& 1.54& 1.52& 1.28& 1.53& 0.87& 0.94& 8.22& 0.57\\
1649:            &   8M
1650: &  137&  150&  130& 1.52& 1.53& 1.51& 1.05& 1.52& 0.91& 0.97& 8.37& 0.44\\
1651:            &  16M
1652: &  248&  260&  230& 1.52& 1.52& 1.51& 0.96& 1.52& 0.92& 0.97& 8.42& 0.35\\
1653: \hline
1654: \end{tabular}
1655: \end{center}
1656: \end{table}
1657: %
1658: The results for the sorted and rotated sequences are very similar,
1659: whereas the solution times on the organpipe and m3killer sequences
1660: are between those for the sorted and random sequences.
1661: 
1662: The results of Tabs.\ \ref{tab:Selrand}--\ref{tab:Seldet} were obtained
1663: with scheme \ref{sts} of \S\ref{ss:preptern}; to save space,
1664: Table \ref{tab:SelpartB} gives only selected results for scheme
1665: \ref{stind2},
1666: %
1667: %   *** TABLE 7.3 ***
1668: \begin{table}[t!]
1669: \caption{Performance of {\sc Select} with ternary scheme \ref{stind2}.}
1670: \label{tab:SelpartB}
1671: \footnotesize
1672: \begin{center}
1673: \begin{tabular}{lrrrrrrrrrrrrr}
1674: \hline
1675: Sequence &\multicolumn{1}{c}{Size}
1676: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1677: \vphantom{$1^{2^3}$}} % Need more vertical space!
1678: &\multicolumn{3}{c}{Comparisons $[n]$}
1679: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1680: &\multicolumn{1}{c}{$L_{\rm avg}$}
1681: &\multicolumn{1}{c}{$P_{\rm avg}$}
1682: &\multicolumn{1}{c}{$N_{\rm avg}$}
1683: &\multicolumn{1}{c}{$p_{\rm avg}$}
1684: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1685: &\multicolumn{1}{c}{$n$}
1686: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1687: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1688: & &\multicolumn{1}{c}{$[n]$}
1689: &\multicolumn{1}{c}{$[\ln n]$}
1690: &\multicolumn{1}{c}{$[\ln n]$} &
1691: &\multicolumn{1}{c}{$[\%n]$}\\
1692: \hline
1693: %dsel20b/dsel20bx alpha=0.5 beta=0.25 cutoff=600
1694: random     &  2M
1695: &   43&   51&   40& 1.53& 1.54& 1.52& 1.02& 1.53& 0.76& 0.83& 8.31& 0.72\\
1696:            &   4M
1697: &   93&  101&   90& 1.53& 1.55& 1.52& 1.09& 1.53& 0.85& 0.92& 8.42& 0.57\\
1698:            &   8M
1699: &  177&  190&  170& 1.52& 1.54& 1.51& 1.03& 1.52& 0.87& 0.93& 8.15& 0.44\\
1700:            &  16M
1701: &  343&  350&  340& 1.51& 1.53& 1.51& 0.88& 1.51& 0.91& 0.97& 8.50& 0.35\\
1702: onezero    &  2M
1703: &   82&   91&   70& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\
1704:            &   4M
1705: &  149&  180&  130& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\
1706:            &   8M
1707: &  304&  351&  270& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\
1708:            &  16M
1709: &  621&  711&  531& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\
1710: sorted     &  2M
1711: &   23&   30&   20& 1.54& 1.55& 1.52& 1.18& 1.54& 0.78& 0.85& 7.61& 0.72\\
1712:            &   4M
1713: &   43&   50&   40& 1.53& 1.54& 1.51& 1.18& 1.53& 0.86& 0.92& 7.76& 0.57\\
1714:            &   8M
1715: &   82&   90&   80& 1.52& 1.53& 1.51& 1.10& 1.52& 0.89& 0.95& 8.01& 0.44\\
1716:            &  16M
1717: &  156&  160&  150& 1.52& 1.53& 1.51& 1.04& 1.52& 0.97& 1.03& 8.12& 0.35\\
1718: \hline
1719: \end{tabular}
1720: \end{center}
1721: \end{table}
1722: %
1723: whereas Table \ref{tab:SelpartB}
1724: %
1725: %   *** TABLE 7.4 ***
1726: \begin{table}%[t!]
1727: \caption{Performance of {\sc Select} with the hybrid scheme of
1728: \cite[\S5.6]{kiw:psq}.}
1729: \label{tab:SelpartI}
1730: \footnotesize
1731: \begin{center}
1732: \begin{tabular}{lrrrrrrrrrrrrr}
1733: \hline
1734: Sequence &\multicolumn{1}{c}{Size}
1735: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1736: \vphantom{$1^{2^3}$}} % Need more vertical space!
1737: &\multicolumn{3}{c}{Comparisons $[n]$}
1738: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1739: &\multicolumn{1}{c}{$L_{\rm avg}$}
1740: &\multicolumn{1}{c}{$P_{\rm avg}$}
1741: &\multicolumn{1}{c}{$N_{\rm avg}$}
1742: &\multicolumn{1}{c}{$p_{\rm avg}$}
1743: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1744: &\multicolumn{1}{c}{$n$}
1745: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1746: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1747: & &\multicolumn{1}{c}{$[n]$}
1748: &\multicolumn{1}{c}{$[\ln n]$}
1749: &\multicolumn{1}{c}{$[\ln n]$} &
1750: &\multicolumn{1}{c}{$[\%n]$}\\
1751: \hline
1752: %dsel20d/dsel20dx alpha=0.5 beta=0.25 cutoff=600
1753: random     &  2M
1754: &   44&   50&   40& 1.53& 1.54& 1.52& 1.03& 1.53& 0.76& 0.83& 8.31& 0.72\\
1755:            &   4M
1756: &   86&  100&   80& 1.53& 1.55& 1.52& 1.10& 1.53& 0.85& 0.92& 8.42& 0.57\\
1757:            &   8M
1758: &  163&  171&  160& 1.52& 1.54& 1.51& 1.03& 1.52& 0.87& 0.93& 8.15& 0.44\\
1759:            &  16M
1760: &  317&  321&  310& 1.51& 1.53& 1.51& 0.88& 1.51& 0.91& 0.97& 8.50& 0.35\\
1761: onezero    &  2M
1762: &   74&   80&   70& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\
1763:            &   4M
1764: &  141&  151&  130& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\
1765:            &   8M
1766: &  285&  301&  270& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\
1767:            &  16M
1768: &  578&  621&  541& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\
1769: sorted     &  2M
1770: &   23&   30&   20& 1.54& 1.55& 1.52& 1.18& 1.54& 0.78& 0.85& 7.61& 0.72\\
1771:            &   4M
1772: &   42&   50&   40& 1.53& 1.54& 1.51& 1.19& 1.53& 0.86& 0.92& 7.76& 0.57\\
1773:            &   8M
1774: &   80&   80&   80& 1.52& 1.53& 1.51& 1.11& 1.52& 0.89& 0.95& 8.01& 0.44\\
1775:            &  16M
1776: &  153&  170&  150& 1.52& 1.53& 1.51& 1.04& 1.52& 0.97& 1.03& 8.12& 0.35\\
1777: \hline
1778: \end{tabular}
1779: \end{center}
1780: \end{table}
1781: %
1782: presents results for the hybrid scheme I of \cite[\S5.6]{kiw:psq},
1783: which combines some features of schemes \ref{sts} and \ref{stind2}.
1784: The hybrid scheme is quite competitive, although slower than scheme
1785: \ref{sts} on the onezero inputs.
1786: 
1787: The preceding results were obtained with the modified choice \eqref{iv3}
1788: of $i_v$.  For brevity, Table \ref{tab:Seliv} gives results for
1789: {\sc Select} with scheme \ref{sts} and the standard choice \eqref{iv}
1790: of $i_v$ on the random inputs only, since these inputs are most
1791: frequently used in theory and practice for evaluating sorting and
1792: selection methods.
1793: %
1794: %   *** TABLE 7.5 ***
1795: \begin{table}%[t!]
1796: \caption{Performance of {\sc Select} with the standard choice of $i_v$.}
1797: \label{tab:Seliv}
1798: \footnotesize
1799: \begin{center}
1800: \begin{tabular}{lrrrrrrrrrrrrr}
1801: \hline
1802: Sequence &\multicolumn{1}{c}{Size}
1803: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1804: \vphantom{$1^{2^3}$}} % Need more vertical space!
1805: &\multicolumn{3}{c}{Comparisons $[n]$}
1806: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1807: &\multicolumn{1}{c}{$L_{\rm avg}$}
1808: &\multicolumn{1}{c}{$P_{\rm avg}$}
1809: &\multicolumn{1}{c}{$N_{\rm avg}$}
1810: &\multicolumn{1}{c}{$p_{\rm avg}$}
1811: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1812: &\multicolumn{1}{c}{$n$}
1813: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1814: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1815: & &\multicolumn{1}{c}{$[n]$}
1816: &\multicolumn{1}{c}{$[\ln n]$}
1817: &\multicolumn{1}{c}{$[\ln n]$} &
1818: &\multicolumn{1}{c}{$[\%n]$}\\
1819: \hline
1820: %dsel20/dsel20x alpha=0.5 beta=0.25 cutoff=600
1821: random     &  50K
1822: &    4&   10&    0& 1.83& 1.97& 1.74& 3.73& 1.83& 0.57& 0.67& 8.49& 2.96\\
1823:            & 100K
1824: &    4&   10&    0& 1.73& 1.83& 1.61& 3.13& 1.73& 0.73& 0.82& 7.80& 2.32\\
1825:            & 500K
1826: &   14&   20&   10& 1.65& 1.69& 1.61& 3.25& 1.65& 0.82& 0.90& 8.40& 1.30\\
1827:            &   1M
1828: &   25&   30&   20& 1.61& 1.65& 1.58& 2.83& 1.60& 0.89& 0.97& 8.28& 0.99\\
1829:            &   2M
1830: &   46&   50&   40& 1.59& 1.61& 1.56& 2.92& 1.59& 0.99& 1.06& 8.01& 0.77\\
1831:            &   4M
1832: &   90&  100&   80& 1.56& 1.58& 1.54& 2.61& 1.56& 1.15& 1.22& 8.34& 0.60\\
1833:            &   8M
1834: &  174&  181&  170& 1.55& 1.57& 1.54& 2.70& 1.55& 1.21& 1.27& 8.09& 0.47\\
1835:            &  16M
1836: &  341&  351&  330& 1.54& 1.56& 1.53& 2.68& 1.54& 1.21& 1.28& 8.33& 0.36\\
1837: \hline
1838: \end{tabular}
1839: \end{center}
1840: \end{table}
1841: %
1842: The modified choice typically requires fewer comparisons for small
1843: inputs, but its advantages are less pronounced for larger inputs.
1844: A similar behavior was observed for {\sc Select} with scheme
1845: \ref{stind2}. % and for {\sc bSelect}.
1846: 
1847: For comparison, Table \ref{tab:qSel} extracts from \cite{kiw:rsq}
1848: some results of {\sc qSelect} for the samples \eqref{sgf}.
1849: %
1850: %   *** TABLE 7.6 ***
1851: \begin{table}
1852: \caption{Performance of quintary {\sc qSelect} on random inputs.}
1853: \label{tab:qSel}
1854: \footnotesize
1855: \begin{center}
1856: \begin{tabular}{lrrrrrrrrrrrrr}
1857: \hline
1858: Sequence &\multicolumn{1}{c}{Size}
1859: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1860: \vphantom{$1^{2^3}$}} % Need more vertical space!
1861: &\multicolumn{3}{c}{Comparisons $[n]$}
1862: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1863: &\multicolumn{1}{c}{$L_{\rm avg}$}
1864: &\multicolumn{1}{c}{$P_{\rm avg}$}
1865: &\multicolumn{1}{c}{$N_{\rm avg}$}
1866: &\multicolumn{1}{c}{$p_{\rm avg}$}
1867: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1868: &\multicolumn{1}{c}{$n$}
1869: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1870: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1871: & &\multicolumn{1}{c}{$[n]$}
1872: &\multicolumn{1}{c}{$[\ln n]$}
1873: &\multicolumn{1}{c}{$[\ln n]$} &
1874: &\multicolumn{1}{c}{$[\%n]$}\\
1875: \hline
1876: %dsel10o/dsel10ox alpha=0.5 beta=0.25 cutoff=600
1877: random     &  50K
1878: &    3&   10&    0& 1.81& 1.85& 1.77& 5.23& 1.22& 0.46& 1.01& 7.62& 4.11\\
1879:            & 100K
1880: &    4&   10&    0& 1.72& 1.76& 1.65& 4.50& 1.15& 0.45& 0.99& 8.05& 3.20\\
1881:            & 500K
1882: &   13&   20&   10& 1.62& 1.63& 1.60& 4.14& 1.08& 0.59& 1.27& 7.59& 1.86\\
1883:            &   1M
1884: &   24&   30&   20& 1.59& 1.60& 1.57& 3.93& 1.06& 0.64& 1.35& 8.18& 1.47\\
1885:            &   2M
1886: &   46&   50&   40& 1.57& 1.58& 1.56& 3.73& 1.04& 0.76& 1.59& 7.67& 1.16\\
1887:            &   4M
1888: &   86&   91&   80& 1.56& 1.56& 1.55& 3.61& 1.03& 0.94& 1.94& 7.21& 0.91\\
1889:            &   8M
1890: &  163&  171&  160& 1.54& 1.55& 1.54& 3.45& 1.03& 0.98& 1.99& 7.45& 0.72\\
1891:            &  16M
1892: &  316&  321&  310& 1.53& 1.54& 1.53& 3.44& 1.02& 0.99& 2.02& 7.55& 0.57\\
1893: \hline
1894: \end{tabular}
1895: \end{center}
1896: \end{table}
1897: %
1898: As noted in \S\ref{s:intro}, {\sc qSelect} is slightly faster than
1899: {\sc Select} on larger inputs because most of its work occurs on the
1900: first partition (cf.\ $L_{\rm avg}$ in Tabs.\ \ref{tab:Selrand} and
1901: \ref{tab:qSel}).  In Table \ref{tab:riSel}
1902: %
1903: %   *** TABLE 7.7 ***
1904: \begin{table}[t!]
1905: \caption{Performance of {\sc riSelect} on random inputs.}
1906: \label{tab:riSel}
1907: \footnotesize
1908: \begin{center}
1909: \begin{tabular}{lrrrrrrrrrr}
1910: \hline
1911: Sequence &\multicolumn{1}{c}{Size}
1912: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1913: \vphantom{$1^{2^3}$}} % Need more vertical space!
1914: &\multicolumn{3}{c}{Comparisons $[n]$}
1915: &\multicolumn{1}{c}{$L_{\rm avg}$}
1916: &\multicolumn{1}{c}{$P_{\rm avg}$}
1917: &\multicolumn{1}{c}{$N_{\rm rnd}$}\\
1918: &\multicolumn{1}{c}{$n$}
1919: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1920: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1921: &\multicolumn{1}{c}{$[\ln n]$}
1922: &\multicolumn{1}{c}{$[n]$}&\\
1923: \hline
1924: %dsel08
1925: random     &  50K
1926: &    2&   10&    0& 3.10& 4.32& 1.88& 3.10& 1.63& 0.45\\
1927:            & 100K
1928: &    4&   10&    0& 2.61& 4.19& 1.77& 2.61& 1.60& 0.20\\
1929:            & 500K
1930: &   17&   20&   10& 2.91& 4.45& 1.69& 2.91& 1.57& 0.25\\
1931:            &   1M
1932: &   33&   41&   20& 2.81& 3.79& 1.84& 2.81& 1.57& 0.40\\
1933:            &   2M
1934: &   62&   90&   40& 2.60& 3.57& 1.83& 2.60& 1.61& 0.35\\
1935:            &   4M
1936: &  135&  191&   90& 2.86& 4.38& 1.83& 2.86& 1.65& 0.55\\
1937:            &   8M
1938: &  249&  321&  190& 2.60& 3.48& 1.80& 2.60& 1.58& 0.40\\
1939:            &  16M
1940: &  553&  762&  331& 2.99& 4.49& 1.73& 2.99& 1.58& 0.40\\
1941: \hline
1942: \end{tabular}
1943: \end{center}
1944: \end{table}
1945: %
1946: we give corresponding results for {\sc riSelect}, a Fortran version of
1947: the algorithm of \cite{val:iss}.  For these inputs, {\sc riSelect}
1948: behaves like {\sc Find} with median-of-3 pivots (because the
1949: average numbers of randomization steps, $N_{\rm rnd}$, are negligible);
1950: hence the expected value of $C_{\rm avg}$ is of order $2.75n$
1951: \cite{kimapr:ahf}. 
1952: 
1953: Our final Table \ref{tab:comp_small}
1954: %
1955: %   *** TABLE 7.8 ***
1956: \begin{table}
1957: \caption{Numbers of comparisons per element made on small random
1958: inputs.}
1959: \label{tab:comp_small}
1960: \footnotesize
1961: \begin{center}
1962: \begin{tabular}{lccccccccccc}
1963: \hline
1964: %dsel20x, dsel10x, dsel08x alpha=0.5 beta=0.25 cutoff=600
1965: Size%
1966: \vphantom{$1^{2^3}$} % Need more vertical space!
1967: &
1968: &  1000&  2500&  5000&  7500& 10000& 12500& 15000& 17500& 20000& 25000\\
1969: \hline
1970: &avg
1971: &  2.48&  2.06&  1.93&  1.87&  1.81&  1.79&  1.77&  1.76&  1.74&  1.71\\
1972: {\sc Select}
1973: &max
1974: &  4.25&  3.03&  2.28&  2.22&  2.09&  2.05&  1.95&  1.93&  1.93&  1.93\\
1975: &min
1976: &  1.55&  1.06&  1.03&  1.64&  1.62&  1.61&  1.64&  1.63&  1.59&  1.60\\
1977: \hline
1978: &avg
1979: &  2.86&  2.55&  2.24&  2.16&  2.07&  2.03&  1.98&  1.98&  1.94&  1.90\\
1980: {\sc qSelect}
1981: &max
1982: &  3.97&  3.55&  2.57&  2.38&  2.28&  2.21&  2.16&  2.13&  2.11&  2.31\\
1983: &min
1984: &  2.29&  1.97&  1.98&  1.95&  1.87&  1.86&  1.82&  1.83&  1.82&  1.75\\
1985: \hline
1986: &avg
1987: &  2.72&  2.85&  2.66&  2.71&  2.72&  2.83&  2.78&  2.75&  2.75&  2.84\\
1988: {\sc riSelect}
1989: &max
1990: &  4.40&  4.51&  4.69&  4.43&  4.62&  4.76&  4.64&  4.40&  5.10&  4.77\\
1991: &min
1992: &  1.68&  1.83&  1.75&  1.59&  1.70&  1.77&  1.78&  1.67&  1.90&  1.71\\
1993: \hline
1994: \end{tabular}
1995: \end{center}
1996: \end{table}
1997: %
1998: shows that {\sc Select} beats its competitors with respect to the
1999: numbers of comparisons made on small random inputs (100 instances for
2000: each input size $n$).
2001: 
2002: Our computational results, combined with those in
2003: \cite{kiw:psq,kiw:rsq},
2004: suggest that both {\sc Select} and {\sc qSelect} may compete with
2005: {\sc Find} in practice.
2006: 
2007: %{\bf Acknowledgment}.  I would like to thank the Associate Editor and
2008: %the two anonymous referees for their helpful comments.
2009: {\bf Acknowledgment}.  I would like to thank Olgierd Hryniewicz,
2010: Roger Koenker, Ronald L. Rivest and John D. Valois for useful
2011: discussions.
2012: 
2013: %\clearpage
2014: 
2015: %
2016: %   *** REFERENCES ***
2017: \footnotesize
2018: %\bibliography{kckabbr,kalg,kbk,kck,kint,kth}
2019: %\bibliographystyle{kck}
2020: \newcommand{\etalchar}[1]{$^{#1}$}
2021: \newcommand{\noopsort}[1]{} \newcommand{\printfirst}[2]{#1}
2022:   \newcommand{\singleletter}[1]{#1} \newcommand{\switchargs}[2]{#2#1}
2023: \ifx\undefined\bysame
2024: \newcommand{\bysame}{\leavevmode\hbox to3em{\hrulefill}\,}
2025: \fi
2026: \begin{thebibliography}{PRKT83}
2027: 
2028: \bibitem[BeM93]{bemc:esf}
2029: J.~L. Bentley and M.~D. McIlroy, {\em Engineering a sort function},
2030:   Software--Practice and Experience {\bf 23} (1993) 1249--1265.
2031: 
2032: \bibitem[BFP{\etalchar{+}}72]{blflprrita:tbs}
2033: M.~R. Blum, R.~W. Floyd, V.~R. Pratt, R.~L. Rivest and R.~E. Tarjan, {\em Time
2034:   bounds for selection}, J. Comput. System Sci. {\bf 7} (1972) 448--461.
2035: 
2036: \bibitem[Bro76]{bro:ra489}
2037: T.~Brown, {\em Remark on {A}lgorithm 489}, ACM Trans. Math. Software {\bf 3}
2038:   (1976) 301--304.
2039: 
2040: \bibitem[Chv79]{chv:thd}
2041: V.~Chv{\'a}tal, {\em The tail of the hypergeometric distribution}, Discrete
2042:   Math. {\bf 25} (1979) 285--287.
2043: 
2044: \bibitem[CuM89]{cumu:acs}
2045: W.~Cunto and J.~I. Munro, {\em Average case selection}, J. of the ACM {\bf 36}
2046:   (1989) 270--279.
2047: 
2048: \bibitem[DHUZ01]{dohaulzw:lbs}
2049: D.~Dor, J.~H{\aa}stad, S.~Ulfberg and U.~Zwick, {\em On lower bounds for
2050:   selecting the median}, SIAM J. Discrete Math. {\bf 14} (2001) 299--311.
2051: 
2052: \bibitem[DoZ99]{dozw:sm}
2053: D.~Dor and U.~Zwick, {\em Selecting the median}, SIAM J. Comput. {\bf 28}
2054:   (1999) 1722--1758.
2055: 
2056: \bibitem[DoZ01]{dozw:msr}
2057: \bysame, {\em Median selection requires $(2+\epsilon){N}$ comparisons}, SIAM J.
2058:   Discrete Math. {\bf 14} (2001) 312--325.
2059: 
2060: \bibitem[FlR75a]{flri:asf}
2061: R.~W. Floyd and R.~L. Rivest, {\em The algorithm {SELECT}---for finding the
2062:   $i$th smallest of $n$ elements ({A}lgorithm 489)}, Comm. ACM {\bf 18} (1975)
2063:   173.
2064: 
2065: \bibitem[FlR75b]{flri:etb}
2066: \bysame, {\em Expected time bounds for selection}, Comm. ACM {\bf 18} (1975)
2067:   165--172.
2068: 
2069: \bibitem[Gr{\"u}99]{gru:mvh}
2070: R.~Gr{\"u}bel, {\em On the median-of-$k$ version of {H}oare's selection
2071:   algorithm}, Theor. Inform. Appl. {\bf 33} (1999) 177--192.
2072: 
2073: \bibitem[Hoa61]{hoa:a65}
2074: C.~A.~R. Hoare, {\em Algorithm 65: {\sc Find}}, Comm. ACM {\bf 4} (1961)
2075:   321--322.
2076: 
2077: \bibitem[Hoe63]{hoe:pis}
2078: W.~Hoeffding, {\em Probability inequalities for sums of bounded random
2079:   variables}, J. Amer. Statist. Assoc. {\bf 58} (1963) 13--30.
2080: 
2081: \bibitem[Kiw03a]{kiw:psq}
2082: K.~C. Kiwiel, {\em Partitioning schemes for quicksort and quickselect}, Tech.
2083:   report, Systems Research Institute, Warsaw, 2003.
2084: \newblock Available at the URL http://arxiv.org/abs/cs.DS/0312054.
2085: 
2086: \bibitem[Kiw03b]{kiw:rsq}
2087: \bysame, {\em Randomized selection with quintary partitions}, Tech. report,
2088:   Systems Research Institute, Warsaw, 2003.
2089: \newblock Available at the URL http://arxiv.org/abs/cs.DS/0312055.
2090: 
2091: \bibitem[KMP97]{kimapr:ahf}
2092: P.~Kirschenhofer, C.~Mart{\'\i}nez and H.~Prodinger, {\em Analysis of {H}oare's
2093:   {\sc find} algorithm with median-of-three partition}, Random Stuctures and
2094:   Algorithms {\bf 10} (1997) 143--156.
2095: 
2096: \bibitem[Knu98]{knu:acpIII2}
2097: D.~E. Knuth, {\em The Art of Computer Programming. Volume III: Sorting and
2098:   Searching}, second ed., Addison-Wesley, Reading, MA, 1998.
2099: 
2100: \bibitem[MaR01]{maro:oss}
2101: C.~Mart{\'\i}nez and S.~Roura, {\em Optimal sampling strategies in quicksort
2102:   and quickselect}, SIAM J. Comput. {\bf 31} (2001) 683--705.
2103: 
2104: \bibitem[Mus97]{mus:iss}
2105: D.~R. Musser, {\em Introspective sorting and selection algorithms},
2106:   Software--Practice and Experience {\bf 27} (1997) 983--993.
2107: 
2108: \bibitem[PRKT83]{poriti:eds}
2109: J.~T. Postmus, A.~H.~G. Rinnooy~Kan and G.~T. Timmer, {\em An efficient dynamic
2110:   selection method}, Comm. ACM {\bf 26} (1983) 878--881.
2111: 
2112: \bibitem[SPP76]{scpapi:fm}
2113: A.~Sch{\"o}nhage, M.~Paterson and N.~Pippenger, {\em Finding the median}, J.
2114:   Comput. System Sci. {\bf 13} (1976) 184--199.
2115: 
2116: \bibitem[Val00]{val:iss}
2117: J.~D. Valois, {\em Introspective sorting and selection revisited},
2118:   Software--Practice and Experience {\bf 30} (2000) 617--638.
2119: 
2120: \end{thebibliography}
2121: \normalsize
2122: %   *** END OF REFERENCES ***
2123: %
2124: \end{document}             % End of document.
2125: