1: \documentclass[12pt]{article}
2: % All margin dimensions are measured from a point one inch from top
3: % and left side of page.
4: \oddsidemargin=0in % Left margin on odd-numbered pages.
5: \evensidemargin=0in % Left margin on even-numbered pages.
6: \textheight=8.9in % Height of text (excluding head and foot).
7: \textwidth=6.35in % Width of text on page.
8: \topmargin=-0.5in % Extra space added to top of page.
9: % 1pc = 0.421751 cm, 1 cm = 2.37106 pc, 1 pt = 0.0351459 cm
10: \title{Randomized selection with tripartitioning}
11: \author{Krzysztof C. Kiwiel\thanks{Systems Research Institute,
12: % Polish Academy of Sciences,
13: Newelska 6, 01--447 Warsaw, Poland
14: ({\tt kiwiel@ibspan.waw.pl})}}
15: \date{January 4, 2004}
16:
17: % ersatz blackboard characters
18: \newcommand{\BbbF}{{\rm\normalcolor I\kern-.18em F}}
19: \newcommand{\BbbR}{{\rm\normalcolor I\kern-.18em R}}
20: \newcommand{\eqref}[1]{{\normalfont\normalcolor(\ref{#1})}}
21: \makeatletter
22: % the proof environment
23: \def\proof{%
24: \def\a##1{\begin{trivlist}\item[]{\bf\ignorespaces{##1}.}%
25: \enspace\ignorespaces}%
26: \def\b[##1]{\a{Proof\ \ignorespaces{##1}}}%
27: \@ifnextchar[{\b}{\a{Proof}}}
28: \def\endproof{\end{trivlist}}
29: % end-of-proof symbol
30: \def\qed{\relax\protect\ifmmode\ifinner\else\quad\fi\fi
31: \hbox{\vbox{\hrule height.4pt\hbox{\vbox{\hrule height.4pt
32: \hbox{\vrule width.4pt\vphantom{\normalsize A}\kern.5em
33: \vrule width.4pt}\hrule height.4pt}}}}}
34: % subequations
35: \newtoks\@stequation
36: \def\subequations{\refstepcounter{equation}%
37: \edef\@savedequation{\the\c@equation}%
38: \@stequation=\expandafter{\theequation}% %only want \theequation
39: \edef\@savedtheequation{\the\@stequation}% %expanded once
40: \edef\oldtheequation{\theequation}%
41: \setcounter{equation}{0}%
42: \def\theequation{\oldtheequation\alph{equation}}}%
43: \def\endsubequations{%
44: \setcounter{equation}{\@savedequation}%
45: \@stequation=\expandafter{\@savedtheequation}%
46: \edef\theequation{\the\@stequation}\global\@ignoretrue}
47: % modifed theorem environment
48: \def\@begintheorem#1#2{\trivlist
49: \item[\hskip \labelsep{\bfseries #1\ #2.}]\itshape}
50: \def\@opargbegintheorem#1#2#3{\trivlist
51: \item[\hskip \labelsep{\bfseries #1\ #2\ (#3).}]\itshape}
52: % numbering equations, figures and tables
53: \@addtoreset{equation}{section}% Makes \section reset `equation' counter.
54: \def\theequation{\thesection.\arabic{equation}}
55: \@addtoreset{figure}{section}
56: \def\thefigure{\thesection.\arabic{figure}}
57: \@addtoreset{table}{section}
58: \def\thetable{\thesection.\arabic{table}}
59: % fix up of the eqnarray environment
60: \let\@@eqnsel=\relax
61: \def\@tempa{%
62: \stepcounter{equation}%
63: \def\@currentlabel{\p@equation\theequation}%
64: \global\@eqnswtrue\m@th
65: \global\@eqcnt\z@
66: \tabskip\mathindent
67: \let\\=\@eqncr
68: \setlength\abovedisplayskip{\topsep}%
69: \ifvmode
70: \addtolength\abovedisplayskip{\partopsep}%
71: \fi
72: \addtolength\abovedisplayskip{\parskip}%
73: \setlength\belowdisplayskip{\abovedisplayskip}%
74: \setlength\belowdisplayshortskip{\abovedisplayskip}%
75: \setlength\abovedisplayshortskip{\abovedisplayskip}%
76: $$\everycr{}\halign to\linewidth% $$
77: \bgroup
78: \hskip\@centering
79: $\displaystyle\tabskip\z@skip{##}$\@eqnsel&%
80: \global\@eqcnt\@ne \hskip \tw@\arraycolsep \hfil${##}$\hfil&%
81: \global\@eqcnt\tw@ \hskip \tw@\arraycolsep
82: $\displaystyle{##}$\hfil \tabskip\@centering&%
83: \global\@eqcnt\thr@@
84: \hb@xt@\z@\bgroup\hss##\egroup\tabskip\z@skip\cr}%
85: \def\@tempb{%
86: \stepcounter{equation}%
87: \def\@currentlabel{\p@equation\theequation}%
88: \global\@eqnswtrue
89: \m@th
90: \global\@eqcnt\z@
91: \tabskip\@centering
92: \let\\\@eqncr
93: $$\everycr{}\halign to\displaywidth\bgroup
94: \hskip\@centering$\displaystyle\tabskip\z@skip{##}$\@eqnsel
95: &\global\@eqcnt\@ne\hskip \tw@\arraycolsep \hfil${##}$\hfil
96: &\global\@eqcnt\tw@ \hskip \tw@\arraycolsep
97: $\displaystyle{##}$\hfil\tabskip\@centering
98: &\global\@eqcnt\thr@@ \hb@xt@\z@\bgroup\hss##\egroup
99: \tabskip\z@skip
100: \cr
101: }
102: %
103: \ifx\eqnarray\@tempa% If the fleqn document-class option is in effect
104: \def\eqnarray{%
105: \stepcounter{equation}%
106: \def\@currentlabel{\p@equation\theequation}%
107: \global\@eqnswtrue\m@th
108: \global\@eqcnt\z@
109: \tabskip\mathindent
110: \let\\=\@eqncr
111: \setlength\abovedisplayskip{\topsep}%
112: \ifvmode
113: \addtolength\abovedisplayskip{\partopsep}%
114: \fi
115: \addtolength\abovedisplayskip{\parskip}%
116: \setlength\belowdisplayskip{\abovedisplayskip}%
117: \setlength\belowdisplayshortskip{\abovedisplayskip}%
118: \setlength\abovedisplayshortskip{\abovedisplayskip}%
119: $$\everycr{}\halign to\linewidth% $$
120: \bgroup
121: \hskip\@centering
122: $\displaystyle\tabskip\z@skip{##}$\@eqnsel&%
123: \global\@eqcnt\@ne
124: \@@eqnsel% \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
125: \hfil${{}##{}}$\hfil&% as in fixup.sty but textstyle!!!
126: \global\@eqcnt\tw@
127: \@@eqnsel% \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
128: $\displaystyle{##}$\hfil \tabskip\@centering&%
129: \global\@eqcnt\thr@@
130: \hb@xt@\z@\bgroup\hss##\egroup\tabskip\z@skip\cr}%
131: \else\ifx\eqnarray\@tempb% Else try the default eqnarray environment.
132: \def\eqnarray{%
133: \stepcounter{equation}%
134: \def\@currentlabel{\p@equation\theequation}%
135: \global\@eqnswtrue
136: \m@th
137: \global\@eqcnt\z@
138: \tabskip\@centering
139: \let\\\@eqncr
140: $$\everycr{}\halign to\displaywidth\bgroup
141: \hskip\@centering$\displaystyle\tabskip\z@skip{##}$\@eqnsel
142: &\global\@eqcnt\@ne
143: \@@eqnsel% \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
144: \hfil${{}##{}}$\hfil% as in fixup.sty but textstyle!!!
145: &\global\@eqcnt\tw@
146: \@@eqnsel% \@@eqnsel has replaced \hskip \tw@\arraycolsep!!!
147: $\displaystyle{##}$\hfil\tabskip\@centering
148: &\global\@eqcnt\thr@@ \hb@xt@\z@\bgroup\hss##\egroup
149: \tabskip\z@skip
150: \cr}
151: \else \typeout{Warning: Unable to fix unknown version of \string\eqnarray.}
152: \fi\fi
153: \def\@tempa{} % Free up TeX's memory
154: \def\@tempb{}
155: % closed format bibliography
156: \@ifundefined{chapter}{%
157: \renewenvironment{thebibliography}[1]
158: {\section*{\refname
159: \@mkboth{\MakeUppercase\refname}{\MakeUppercase\refname}}%
160: \list{\@biblabel{\@arabic\c@enumiv}}%
161: {\settowidth\labelwidth{\@biblabel{#1}}%
162: \leftmargin\labelwidth
163: \advance\leftmargin\labelsep
164: \itemsep \z@ % Suppresses vertical separation.
165: \@openbib@code
166: \usecounter{enumiv}%
167: \let\p@enumiv\@empty
168: \renewcommand\theenumiv{\@arabic\c@enumiv}}%
169: \sloppy
170: \clubpenalty4000
171: \@clubpenalty \clubpenalty
172: \widowpenalty4000%
173: \sfcode`\.\@m}
174: {\def\@noitemerr
175: {\@latex@warning{Empty `thebibliography' environment}}%
176: \endlist}}%
177: {\renewenvironment{thebibliography}[1]
178: {\section*{\bibname
179: \@mkboth{\MakeUppercase\bibname}{\MakeUppercase\bibname}}%
180: \list{\@biblabel{\@arabic\c@enumiv}}%
181: {\settowidth\labelwidth{\@biblabel{#1}}%
182: \leftmargin\labelwidth
183: \advance\leftmargin\labelsep
184: \itemsep \z@ % Suppresses vertical separation.
185: \@openbib@code
186: \usecounter{enumiv}%
187: \let\p@enumiv\@empty
188: \renewcommand\theenumiv{\@arabic\c@enumiv}}%
189: \sloppy
190: \clubpenalty4000
191: \@clubpenalty \clubpenalty
192: \widowpenalty4000%
193: \sfcode`\.\@m}
194: {\def\@noitemerr
195: {\@latex@warning{Empty `thebibliography' environment}}%
196: \endlist}}%
197: % Mathematical definitions
198: \newcommand{\Argmax}{{\operator@font Arg}\max}
199: \newcommand{\Argmin}{{\operator@font Arg}\min}
200: \newcommand{\argmax}{{\operator@font arg}\max}
201: \newcommand{\argmin}{{\operator@font arg}\min}
202: \newcommand{\Exp}{\mathord{\operator@font E}}
203: \newcommand{\med}{\mathop{\operator@font med}}
204: \newcommand{\Prob}{\mathord{\operator@font P}}
205: \newcommand{\rank}{\mathop{\operator@font rank}}
206: \newcommand{\var}{\mathop{\operator@font var}}
207: \makeatother
208: % Theorem and definition-like environments are numbered together,
209: % starting from number 1 within each section.
210: \newtheorem{theorem}{Theorem}[section]
211: \newtheorem{algorithm}[theorem]{Algorithm}
212: \newtheorem{assumption}[theorem]{Assumption}
213: \newtheorem{corollary}[theorem]{Corollary}
214: \newtheorem{definition}[theorem]{Definition}
215: \newtheorem{example}[theorem]{Example}
216: \newtheorem{examples}[theorem]{Examples}
217: \newtheorem{fact}[theorem]{Fact}
218: \newtheorem{lemma}[theorem]{Lemma}
219: \newtheorem{procedure}[theorem]{Procedure}
220: \newtheorem{proposition}[theorem]{Proposition}
221: \newtheorem{remark}[theorem]{Remark}
222: \newtheorem{remarks}[theorem]{Remarks}
223: % Schemes are numbered alphabetically throughout.
224: \newtheorem{scheme}{Scheme}
225: \renewcommand{\thescheme}{\Alph{scheme}}
226: %-----------------------------------------------------------------------
227: \hyphenation{quick-sel-ect}
228: %-----------------------------------------------------------------------
229:
230: \begin{document} % End of preamble and beginning of text.
231:
232: \maketitle % Produces the title.
233:
234: \begin{abstract}
235: \noindent
236: We show that several versions of Floyd and Rivest's algorithm
237: {\sc Select} [Comm.\ ACM {\bf 18} (1975) 173] for finding the $k$th
238: smallest of $n$ elements require at most $n+\min\{k,n-k\}+o(n)$
239: comparisons on average, even when equal elements occur. This parallels
240: our recent analysis of another variant due to Floyd and Rivest
241: [Comm.\ ACM {\bf 18} (1975) 165--172]. Our computational results
242: suggest that both variants perform well in practice, and may compete
243: with other selection methods, such as Hoare's {\sc Find} or
244: quickselect with median-of-3 pivots.
245: \end{abstract}
246:
247: \begin{quotation}
248: \noindent{\bf Key words.} Selection, medians, partitioning,
249: computational complexity.
250: \end{quotation}
251:
252: %\begin{quotation}
253: %\noindent{\bf MSC Subject Classifications.} 68W20, 68W05, 68Q25
254: %\end{quotation}
255:
256: %\begin{quotation}
257: %\noindent{\bf Abbreviated title:} Randomized selection.
258: %\end{quotation}
259:
260: % *** SECTION 1 ***
261: \section{Introduction}
262: \label{s:intro}
263: The {\em selection problem\/} is defined as follows: Given a set
264: $X:=\{x_j\}_{j=1}^n$ of $n$ elements, a total order $<$ on $X$,
265: and an integer $1\le k\le n$, find the {\em $k$th smallest\/}
266: element of $X$, i.e., an element $x$ of $X$ for which there are at
267: most $k-1$ elements $x_j<x$ and at least $k$ elements $x_j\le x$.
268: The {\em median\/} of $X$ is the $\lceil n/2\rceil$th smallest
269: element of $X$.
270:
271: Selection is one of the fundamental problems in computer science;
272: see, e.g., the references in \cite{dohaulzw:lbs,dozw:sm,dozw:msr} and
273: \cite[\S5.3.3]{knu:acpIII2}. Most references concentrate on the
274: number of comparisons between pairs of elements made in selection
275: algorithms. In the worst case, selection needs at least
276: $(2+\epsilon)n$ comparisons \cite{dozw:msr}, whereas the algorithm of
277: \cite{blflprrita:tbs} makes at most $5.43n$, that of \cite{scpapi:fm}
278: needs $3n+o(n)$, and that in \cite{dozw:sm} takes $2.95n+o(n)$. In the
279: average case, for $k\le\lceil n/2\rceil$, at least $n+k-O(1)$
280: comparisons are necessary \cite{cumu:acs}, whereas the best upper bound
281: is $n+k+O(n^{1/2}\ln^{1/2}n)$ \cite[Eq.\ (5.3.3.16)]{knu:acpIII2}. The
282: classical algorithm {\sc Find} of \cite{hoa:a65}, also known as
283: quickselect, has an upper bound of $3.39n+o(n)$ for $k=\lceil n/2\rceil$
284: in the average case \cite[Ex.\ 5.2.2--32]{knu:acpIII2}, which improves
285: to $2.75n+o(n)$ for median-of-3 pivots \cite{gru:mvh,kimapr:ahf}.
286:
287: In practice {\sc Find} is most popular. One reason is that the
288: algorithms of \cite{blflprrita:tbs,scpapi:fm} are much slower on the
289: average \cite{mus:iss,val:iss}, whereas \cite{kimapr:ahf} adds that
290: other methods proposed so far, although better than {\sc Find} in
291: theory, are not practical because they are difficult to implement,
292: their constant factors and hidden lower order terms are too large,
293: etc. It is quite suprising that these references
294: \cite{kimapr:ahf,mus:iss,val:iss} ignore the algorithm {\sc Select}
295: of \cite{flri:etb}, since most textbooks mention that {\sc Select} is
296: asymptotically faster than {\sc Find}. In contrast, this paper shows
297: that {\sc Select} can compete with {\sc Find} in both theory and
298: practice, even for fairly small values of the input size $n$.
299:
300: We now outline our contributions in more detail. The initial two
301: versions of {\sc Select} \cite{flri:etb} had gaps in their analysis
302: (cf.\ \cite{bro:ra489,poriti:eds}, \cite[Ex.\ 5.3.3--24]{knu:acpIII2});
303: the first version was validated in \cite{kiw:rsq}, and the second one
304: will be addressed elsewhere. This paper deals with the third version
305: of {\sc Select} from \cite{flri:asf}, which operates as follows. Using
306: a small random sample, it finds an element $v$ almost sure to be just
307: above the $k$th if $k<n/2$, or below the $k$th if $k\ge n/2$.
308: Partitioning $X$ about $v$ leaves $\min\{k,n-k\}+o(n)$ elements on
309: average for the next recursive call, in which $k$ is near $1$ or $n$
310: with high probability, so this second call eliminates almost all the
311: remaining elements.
312:
313: Apparently this version of {\sc Select} has not been analyzed in the
314: literature, even in the case of distinct elements. We first revise it
315: slightly to simplify our analysis. Then, without assuming that the
316: elements are distinct, we show that {\sc Select} needs at most
317: $n+\min\{k,n-k\}+O(n^{2/3}\ln^{1/3}n)$ comparisons on average, with
318: $\ln^{1/3}n$ replaced by $\ln^{1/2}n$ for the original samples of
319: \cite{flri:asf}. Thus the average cost of {\sc Select} reaches the
320: lower bounds of $1.5n+o(n)$ for median selection and $1.25n+o(n)$
321: for selecting an element of random rank. For the latter task,
322: {\sc Find} has the bound $2n+o(n)$ when its pivot is set to the
323: median of a random sample of $s$ elements, with $s\to\infty$,
324: $s/n\to\infty$ as $n\to\infty$ \cite{maro:oss}; thus {\sc Select}
325: improves upon {\sc Find} mostly by using $k$, the rank of the element
326: to be found, for selecting the pivot $v$ in each recursive call.
327:
328: {\sc Select} can be implemented by using the tripartitioning schemes
329: of \cite[\S5]{kiw:psq}, which include a modified scheme of
330: \cite{bemc:esf}; more traditional bipartitioning schemes
331: \cite[\S2]{kiw:psq} can perform quite poorly in {\sc Select} when
332: equal elements occur. We add that the implementation of \cite{flri:asf}
333: avoids random number generation by assuming that the input file is in
334: random order, but this results in poor performance on some inputs of
335: \cite{val:iss}; hence our implementation of {\sc Select} employs
336: random sampling.
337:
338: Our computational experience shows that {\sc Select} outperforms even
339: quite sophisticated implementations of {\sc Find} in both comparison
340: counts and computing times. To save space, only selected results are
341: reported for the version of \cite{val:iss}, but our experience with
342: other versions on many different inputs was similar. {\sc Select}
343: turned out to be more stable than {\sc Find}, having much smaller
344: variations of solution times and numbers of comparisons. Quite
345: suprisingly, contrary to the folklore saying that {\sc Select} is only
346: asymptotically faster than {\sc Find}, {\sc Select} makes significantly
347: fewer comparisons even for small inputs
348: (cf.\ Tab.\ \ref{tab:comp_small}).
349:
350: To relate our results with those of \cite{kiw:rsq}, let's call
351: {\sc qSelect} the quintary method of \cite{kiw:rsq} stemming from
352: \cite[\S2.1]{flri:etb}. {\sc qSelect} eliminates almost all
353: elements on its first call by using two pivots, almost sure to be
354: just below and above the $k$th element, in a quintary partitioning
355: scheme. Thus most work occurs on the first call of {\sc qSelect},
356: which corresponds to the first two calls of {\sc Select}. Hence
357: {\sc Select} and {\sc qSelect} share the same efficiency estimates,
358: and in practice make similarly many comparisons. However, {\sc qSelect}
359: tends to be slightly faster on median finding: although its quintary
360: scheme is more complex, most of its work is spent on the first pass
361: through $X$, whereas {\sc Select} first partitions $X$ and then the
362: remaining part (about half) of $X$ on its second call to achieve a
363: similar problem reduction. On the other hand, {\sc Select} makes
364: fewer comparisons on small inputs. Of course, future work should assess
365: more fully the relative merits of {\sc Select} and {\sc qSelect}. For
366: now, the tests reported in \cite{kiw:psq,kiw:rsq} and in \S\ref{s:exp}
367: suggest that both {\sc Select} and {\sc qSelect} can compete
368: successfully with refined implementations of {\sc Find}.
369:
370: The paper is organized as follows. A general version of {\sc Select} is
371: introduced in \S\ref{s:alg}, and its basic features are analyzed in
372: \S\ref{s:sample}. The average performance of {\sc Select} is studied
373: in \S\ref{s:average}. A modification that improves practical
374: performance is introduced in \S\ref{s:modmed}.
375: Partitioning schemes are discussed in \S\ref{s:ternpart}.
376: Finally, our computational results are reported in \S\ref{s:exp}.
377: %The Appendix contains proofs of certain technical results.
378: %Finally, we have a conclusion section.
379:
380: Our notation is fairly standard.
381: $|A|$ denotes the cardinality of a set $A$.
382: In a given probability space, $\Prob$ is the probability measure,
383: $\Exp$ is the mean-value operator and $\Prob[\cdot|{\cal E}]$ is the
384: probability conditioned on an event ${\cal E}$; the complement of
385: ${\cal E}$ is denoted by ${\cal E}'$.
386: %
387: % *** SECTION 2 ***
388: \section{The algorithm {\sc Select}}
389: \label{s:alg}
390: In this section we describe a general version of {\sc Select} in terms
391: of two auxiliary functions $s(n)$ and $g(n)$ (the sample size and rank
392: gap), which will be chosen later. We omit their arguments in general,
393: as no confusion can arise.
394: %
395: % *** ALGORITHM 2.1 ***
396: \begin{algorithm}
397: \label{alg:sel3}
398: \rm
399: \hfil\newline\noindent{\bf {\sc Select}$(X,k)$}
400: (Selects the $k$th smallest element of $X$, with $1\le k\le n:=|X|$)
401: \medbreak\noindent{\bf Step 1} ({\em Initiation\/}).
402: If $n=1$, return $x_1$.
403: %Choose the sample size $s\in\{1\colon n-1\}$ and gap $g>0$.
404: Choose the sample size $s\le n-1$ and gap $g>0$.
405: \medbreak\noindent{\bf Step 2} ({\em Sample selection\/}).
406: Pick randomly a sample $S:=\{y_1,\ldots,y_s\}$ from $X$.
407: \medbreak\noindent{\bf Step 3} ({\em Pivot selection\/}).
408: Let $v$ be the output of {\sc Select}$(S,i_v)$, where
409: \begin{equation}
410: i_v:=\left\{\begin{array}{ll}
411: \rlap{$\min$}\phantom{\max}\left\{\,\lceil ks/n+g\rceil,s\,\right\}&
412: \mbox{if}\ k<n/2,\\
413: \max\left\{\,\lceil ks/n-g\rceil,1\,\right\}&
414: \mbox{if}\ k\ge n/2.
415: \end{array}\right.
416: \label{iv}
417: \end{equation}
418: \medbreak\noindent{\bf Step 4} ({\em Partitioning\/}).
419: By comparing each element $x$ of $X\setminus S$ to $v$, partition $X$
420: into the three sets $L:=\{x\in X:x<v\}$, $E:=\{x\in X:x=v\}$ and
421: $R:=\{x\in X:v<x\}$.
422: \medbreak\noindent{\bf Step 5} ({\em Stopping test\/}).
423: If $|L|<k\le|L\cup E|$, return $v$.
424: \medbreak\noindent{\bf Step 6} ({\em Reduction\/}).
425: If $k\le|L|$, set $\hat X:=L$, $\hat n:=|\hat X|$ and $\hat k:=k$;
426: else set $\hat X:=R$, $\hat n:=|\hat X|$ and $\hat k:=k-|L\cup E|$.
427: \medbreak\noindent{\bf Step 7} ({\em Recursion\/}).
428: Return {\sc Select}$(\hat X,\hat k)$.
429: \end{algorithm}
430:
431: A few remarks on the algorithm are in order.
432: %
433: % *** REMARKS 2.2 ***
434: \begin{remarks}
435: \label{r:sel3}
436: \rm
437: (a)
438: The correctness and finiteness of {\sc Select} stem by induction from
439: the following observations. The returns of Steps 1 and 5 deliver the
440: desired element. At Step 6, $\hat X$ and $\hat k$ are chosen so that
441: the $k$th smallest element of $X$ is the $\hat k$th smallest element
442: of $\hat X$, and $\hat n<n$ (since $v\not\in\hat X$). Also $|S|<n$ for
443: the recursive call at Step 3.
444: \par(b)
445: When Step 5 returns $v$, {\sc Select} may also return information about
446: the positions of the elements of $X$ relative to $v$. For instance, if
447: $X$ is stored as an array, its $k$ smallest elements may be placed first
448: via interchanges at Step 4 (cf.\ \S\ref{s:ternpart}). Hence Step 4 need
449: only compare $v$ with the elements of $X\setminus S$.
450: \par(c)
451: The following elementary property is needed in \S\ref{s:average}.
452: Let $c_n$ denote the maximum number of comparisons taken by {\sc Select}
453: on any input of size $n$. Since Step 3 makes at most $c_s$
454: comparisons with $s<n$, Step 4 needs at most $n-s$, and Step 7 takes
455: at most $c_{\hat n}$ with $\hat n<n$, by induction $c_n<\infty$ for
456: all $n$.
457: \end{remarks}
458: %
459: % *** SECTION 3 ***
460: \section{Sampling deviations}
461: \label{s:sample}
462: In this section we analyze general features of sampling used by
463: {\sc Select}.
464: Our analysis hinges on the following bound on the tail of the
465: hypergeometric distribution established in \cite{hoe:pis} and
466: rederived shortly in \cite{chv:thd}.
467: %
468: % *** FACT 3.1 ***
469: \begin{fact}
470: \label{f:balls3}
471: Let\/ $s$ balls be chosen uniformly at random from a set of\/ $n$ balls,
472: of which\/ $r$ are red, and\/ $r'$ be the random variable representing
473: the number of red balls drawn. Let\/ $p:=r/n$. Then
474: \begin{equation}
475: \Prob\left[\,r'\ge ps+g\,\right]\le e^{-2g^2\!/s}\quad\forall g\ge0.
476: \label{Pexpg}
477: \end{equation}
478: \end{fact}
479:
480: Denote by $x_1^*\le\ldots\le x_n^*$ and $y_1^*\le\ldots\le y_s^*$ the
481: sorted elements of the input set $X$ and the sample set $S$,
482: respectively, so that $v=y_{i_v}^*$. The following result will give
483: bounds on the position of $v$ in the sorted input sequence.
484: %
485: % *** LEMMA 3.2 ***
486: \begin{lemma}
487: \label{l:rankgen}
488: Suppose\/ $\bar\imath:=\max\{1,\min(\lceil\kappa s\rceil,s)\}$,
489: $\bar\jmath_l:=\max\{\lceil\kappa n-gn/s\rceil,1\}$, and\/
490: $\bar\jmath_r:=\min\{\lceil\kappa n+gn/s\rceil,n\}$, where\/
491: $-g<\kappa s\le s+g$, $1\le s\le n$ and $g\ge0$. Then\/{\rm:}
492: %
493: \par\indent\rlap{\rm(a)}\hphantom{\rm(a)}
494: $\Prob[y_{\bar\imath}^*<x_{\bar\jmath_l}^*]\le e^{-2g^2\!/s}$ if\/
495: $\bar\imath\ge\lceil\kappa s\rceil$.
496: %
497: \par\indent\rlap{\rm(b)}\hphantom{\rm(a)}
498: $\Prob[x_{\bar\jmath_r}^*<y_{\bar\imath}^*]\le e^{-2g^2\!/s}$ if\/
499: $\bar\imath\le\lceil\kappa s\rceil$.
500: \end{lemma}
501: \begin{proof}
502: Note that $-g<\kappa s\le s+g$ implies that $\bar\jmath_l\le n$ and
503: $\bar\jmath_r\ge1$ are well-defined.
504:
505: (a) If $y_{\bar\imath}^*<x_{\bar\jmath_l}^*$, at least $\bar\imath$
506: samples satisfy $y_i\le x_r^*$, where
507: $r:=\max_{x_j^*<x_{\bar\jmath_l}^*}j$.
508: In the setting of Fact \ref{f:balls3}, we have $r$ red elements
509: $x_j\le x_r^*$, $ps=rs/n$ and $r'\ge\bar\imath$. Now,
510: $1\le r\le\bar\jmath_l-1$ implies
511: $2\le\bar\jmath_l=\lceil\kappa n-gn/s\rceil<\kappa n-gn/s+1$,
512: so $-rs/n>-\kappa s+g$. Hence
513: $\bar\imath-ps-g>\kappa s-\kappa s+g-g=0$, i.e., $r'>ps+g$.
514: Thus $\Prob[y_{\bar\imath}^*<x_{\bar\jmath_l}^*]\le e^{-2g^2\!/s}$
515: by \eqref{Pexpg}.
516:
517: (b) If $x_{\bar\jmath_r}^*<y_{\bar\imath}^*$, $s-\bar\imath+1$ samples
518: are at least $x_{\bar\jmath+1}^*$ with
519: $\bar\jmath:=\max_{x_j^*=x_{\bar\jmath_r}^*}j$. Thus we have
520: $r:=n-\bar\jmath$ red elements $x_j\ge x_{\bar\jmath+1}^*$,
521: $ps=s-\bar\jmath s/n$ and $r'\ge s-\bar\imath+1$. Since
522: $\bar\imath<\kappa s+1$ and
523: $n>\bar\jmath\ge\bar\jmath_r\ge\kappa n+gn/s$,
524: we get $s-\bar\imath+1-ps-g>\bar\jmath s/n-\kappa s-g\ge\kappa s+g-
525: \kappa s-g=0$. Hence $r'>ps+g$ and
526: $\Prob[x_{\bar\jmath_r}^*<y_{\bar\imath}^*]\le
527: \Prob[r'\ge ps+g]\le e^{-2g^2\!/s}$ by \eqref{Pexpg}.
528: \qed
529: \end{proof}
530:
531: We now bound the position of $v$ relative to $x_k^*$, $x_{k_l}^*$ and
532: $x_{k_r}^*$, where
533: \begin{equation}
534: k_l:=\max\left\{\,\lceil k-2gn/s\rceil,1\,\right\}
535: \quad\mbox{and}\quad
536: k_r:=\min\left\{\,\lceil k+2gn/s\rceil,n\,\right\}.
537: \label{klkr3}
538: \end{equation}
539: %
540: % *** COROLLARY 3.3 ***
541: \begin{corollary}
542: \label{c:rankdir3}
543: {\rm(a)}
544: $\Prob[v<x_k^*]\le e^{-2g^2\!/s}$ if\/ $i_v=\lceil ks/n+g\rceil$
545: and\/ $k<n/2$.
546: %
547: \par\indent\rlap{\rm(b)}\hphantom{\rm(a)}
548: $\Prob[x_{k_r}^*<v]\le e^{-2g^2\!/s}$
549: if\/ $k<n/2$.
550: %
551: \par\indent\rlap{\rm(c)}\hphantom{\rm(a)}
552: $\Prob[x_k^*<v]\le e^{-2g^2\!/s}$ if\/ $i_v=\lceil ks/n-g\rceil$
553: and\/ $k\ge n/2$.
554: %
555: \par\indent\rlap{\rm(d)}\hphantom{\rm(a)}
556: $\Prob[v<x_{k_l}^*]\le e^{-2g^2\!/s}$
557: if\/ $k\ge n/2$.
558: %
559: \par\indent\rlap{\rm(e)}\hphantom{\rm(a)}
560: If\/ $k<n/2$, then\/
561: $i_v\ne\lceil ks/n+g\rceil$ iff\/ $n<k+gn/s${\rm;}
562: similarly, if\/ $k\ge n/2$, then\/
563: $i_v\ne\lceil ks/n-g\rceil$ iff\/ $k\le gn/s$.
564: \end{corollary}
565: \begin{proof}
566: Use Lem.\ \ref{l:rankgen} with $\kappa s=ks/n+g$ for (a,b), and
567: $\kappa s=ks/n-g$ for (c,d).
568: \qed
569: \end{proof}
570: %
571: % *** SECTION 4 ***
572: \section{Average case performance}
573: \label{s:average}
574: In this section we analyze the average performance of {\sc Select} for
575: various sample sizes.
576: %
577: % *** SUBSECTION 4.1 ***
578: \subsection{Floyd-Rivest's samples}
579: \label{ss:FRsample}
580: %
581: For positive constants $\alpha$ and $\beta$, consider choosing
582: $s=s(n)$ and $g=g(n)$ as
583: \begin{equation}
584: s:=\min\left\{\lceil\alpha f(n)\rceil,n-1\right\}\ \mbox{and}\
585: g:=(\beta s\ln n)^{1/2}\ \mbox{with}\ f(n):=n^{2/3}\ln^{1/3}n.
586: \label{sgf}
587: \end{equation}
588: This form of $g$ gives a probability bound
589: $e^{-2g^2\!/s}=n^{-2\beta}$ for Cor.\ \ref{c:rankdir3}.
590: To get more feeling, suppose $\alpha=\beta=1$ and $s=f(n)$.
591: Let $\phi(n):=f(n)/n$. Then $s/n=g/s=\phi(n)$ and it will be seen
592: that the recursive call reduces $n$ at least by the factor $4\phi(n)$
593: on average, i.e., $\phi(n)$ is a contraction factor; note that
594: $\phi(n)\approx2.4\%$ for $n=10^6$ (cf.\ Tab.\ \ref{tab:fnphin}).
595: %
596: % *** TABLE 4.1 ***
597: \begin{table}
598: \caption{Sample size $f(n):=n^{2/3}\ln^{1/3}n$ and relative sample size
599: $\phi(n):=f(n)/n$.}
600: \label{tab:fnphin}
601: \footnotesize
602: \begin{center}
603: \begin{tabular}{ccccccccc}
604: \hline
605: \vphantom{$1^{2^3}$} % Need more vertical space!
606: $n$ & $10^3$ & $10^4$ & $10^5$ & $10^6$ & $5\cdot10^6$ & $10^7$
607: & $5\cdot10^7$ & $10^8$ \\
608: \hline
609: $f(n)$ & 190.449& 972.953& 4864.76& 23995.0& 72287.1& 117248
610: & 353885 & 568986 \\
611: $\phi(n)$
612: & .190449& .097295& .048648& .023995& .014557& .011725
613: & .007078& .005690\\
614: \hline
615: \end{tabular}
616: \end{center}
617: \end{table}
618: %
619: % *** THEOREM 4.1 ***
620: \begin{theorem}
621: \label{t:selFR}
622: Let\/ $C_{nk}$ denote the expected number of comparisons made by
623: {\sc Select} for $s$ and\/ $g$ chosen as in\/ \eqref{sgf} with\/
624: $\beta\ge1/6$. There exists a positive constant\/ $\gamma$ such
625: that
626: \begin{equation}
627: C_{nk}\le n+\min\{\,k,n-k\,\}+\gamma f(n)\quad\forall1\le k\le n.
628: \label{CnkFR}
629: \end{equation}
630: \end{theorem}
631: \begin{proof}
632: We need a few preliminary facts.
633: The function $\phi(t):=f(t)/t=(\ln t/t)^{1/3}$ decreases to $0$ on
634: $[e,\infty)$, whereas $f(t)$ grows to infinity on $[2,\infty)$.
635: Let $\delta:=4(\beta/\alpha)^{1/2}$.
636: Pick $\bar n\ge3$ large enough so that
637: $e-1\le\alpha f(\bar n)\le\bar n-1$ and $e\le\delta f(\bar n)$.
638: Let $\bar\alpha:=\alpha+1/f(\bar n)$.
639: Then, by \eqref{sgf} and the monotonicity of $f$ and $\phi$, we have
640: for $n\ge\bar n$
641: \begin{equation}
642: s\le\bar\alpha f(n)\quad\mbox{and}\quad
643: f(s)\le\bar\alpha\phi(\bar\alpha f(\bar n))f(n),
644: \label{sfsFR}
645: \end{equation}
646: \begin{equation}
647: f(\lfloor\delta f(n)\rfloor)\le f(\delta f(n))\le
648: \delta\phi(\delta f(\bar n))f(n).
649: \label{flfloordeltaFR}
650: \end{equation}
651: For instance, the first inequality of \eqref{sfsFR} yields
652: $f(s)\le f(\bar\alpha f(n))$, whereas
653: $$
654: f(\bar\alpha f(n))=\bar\alpha\phi(\bar\alpha f(n))f(n)\le
655: \bar\alpha\phi(\bar\alpha f(\bar n))f(n).
656: $$
657: Also for $n\ge\bar n$,
658: we have $s=\lceil\alpha f(n)\rceil=\alpha f(n)+\epsilon$ with
659: $\epsilon\in[0,1)$ in \eqref{sgf}. Writing $s=\tilde\alpha f(n)$ with
660: $\tilde\alpha:=\alpha+\epsilon/f(n)\in[\alpha,\bar\alpha)$, we deduce
661: from \eqref{sgf} that
662: \begin{equation}
663: gn/s=(\beta/\tilde\alpha)^{1/2}f(n)\le(\beta/\alpha)^{1/2}f(n).
664: \label{gnsboundFR}
665: \end{equation}
666: In particular, $4gn/s\le\delta f(n)$, since
667: $\delta:=4(\beta/\alpha)^{1/2}$. Next, \eqref{sgf} implies
668: \begin{equation}
669: ne^{-2g^2\!/s}\le
670: n^{1-2\beta}=f(n)n^{1/3-2\beta}\ln^{-1/3}n.
671: \label{ne2g2sFR}
672: \end{equation}
673: Using the monotonicity of $f$ and $\phi$, increase $\bar n$ if necessary
674: to get for all $n\ge\bar n$
675: \begin{equation}
676: 2\bar\alpha\phi(\bar\alpha f(\bar n))+
677: \delta\phi(\delta f(\bar n))+2n^{-2\beta}+
678: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
679: n^{-2\beta}\,\right\}\le0.95.
680: \label{0.95FR}
681: \end{equation}
682: By Rem.\ \ref{r:sel3}(c), there is $\gamma$ such that \eqref{CnkFR}
683: holds for all $n\le\bar n$; increasing $\gamma$ if necessary, and
684: using the monotonicity of $f$ and the assumption $\beta\ge1/6$,
685: we have for all $n\ge\bar n$
686: \begin{equation}
687: 2\bar\alpha+2\delta+5n^{1/3-2\beta}\ln^{-1/3}n+
688: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
689: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}\le0.05\gamma.
690: \label{0.05FR}
691: \end{equation}
692:
693: Let $n'\ge\bar n$. Assuming \eqref{CnkFR} holds for all $n\le n'$,
694: for induction let $n=n'+1$.
695:
696: We need to consider the following two cases in the first call of
697: {\sc Select}.
698:
699: {\em Left case\/}: $k<n/2$.
700: First, suppose the event
701: ${\cal E}_l:=\{x_k^*\le v\le x_{k_r}^*\}$ occurs. By the rules of
702: Steps 4--6, we have $\hat X=L$ (from $x_k^*\le v$), $\hat k=k$ and
703: $\hat n:=|\hat X|\le k_r-1$ (from $v\le x_{k_r}^*$); since
704: $k_r<k+2gn/s+1$ by \eqref{klkr3}, we get the two (equivalent) bounds
705: \begin{equation}
706: \hat n<k+2gn/s\quad\mbox{and}\quad \hat n-\hat k<2gn/s.
707: \label{hatnleft}
708: \end{equation}
709: Note that if $i_v=\lceil ks/n+g\rceil$ then,
710: by Cor.\ \ref{c:rankdir3}(a,b), the Boole-Benferroni inequality and the
711: choice \eqref{sgf}, the complement ${\cal E}_l'$ of ${\cal E}_l$ has
712: $\Prob[{\cal E}_l']\le2e^{-2g^2\!/s}=2n^{-2\beta}$.
713: Second, if $i_v\ne\lceil ks/n+g\rceil$, then $n<k+gn/s$
714: (Cor.\ \ref{c:rankdir3}(e)) combined with $k<n/2$ gives $n<2gn/s$;
715: hence $\hat n-\hat k<\hat n<n<2gn/s$ implies \eqref{hatnleft}. Since
716: also ${\cal E}_l$ implies \eqref{hatnleft}, we have
717: \begin{equation}
718: \Prob[{\cal A}_l']\le2n^{-2\beta}\quad\mbox{for}\quad
719: {\cal A}_l:=\left\{\,\hat n-\hat k<2gn/s\,\right\}.
720: \label{Al}
721: \end{equation}
722:
723: {\em Right case\/}: $k\ge n/2$.
724: First, suppose the event
725: ${\cal E}_r:=\{x_{k_l}^*\le v\le x_k^*\}$ occurs. By the rules of
726: Steps 4--6, we have $\hat X=R$ (from $v\le x_k^*$),
727: $\hat n-\hat k=n-k$ and $\hat n:=|\hat X|\le n-k_l$ (from
728: $x_{k_l}^*\le v$); since $k_l\ge k-2gn/s$ by \eqref{klkr3}, we get
729: the two (equivalent) bounds
730: \begin{equation}
731: \hat n\le n-k+2gn/s\quad\mbox{and}\quad\hat k\le2gn/s,
732: \label{hatnright}
733: \end{equation}
734: using $\hat n-\hat k=n-k$.
735: If $i_v=\lceil ks/n-g\rceil$ then, by Cor.\ \ref{c:rankdir3}(c,d),
736: the complement ${\cal E}_r'$ of ${\cal E}_r$ has
737: $\Prob[{\cal E}_r']\le2e^{-2g^2\!/s}=2n^{-2\beta}$.
738: Second, if $i_v\ne\lceil ks/n-g\rceil$, then $k\le gn/s$
739: (Cor.\ \ref{c:rankdir3}(e)) combined with $k\ge n/2$ gives
740: $n\le2gn/s$; hence $\hat k\le\hat n<n\le2gn/s$ implies
741: \eqref{hatnright}. Thus
742: \begin{equation}
743: \Prob[{\cal A}_r']\le2n^{-2\beta}\quad\mbox{for}\quad
744: {\cal A}_r:=\left\{\,\hat k\le2gn/s\,\right\}.
745: \label{Ar}
746: \end{equation}
747:
748: Since $k<n-k$ if $k<n/2$, $n-k\le k$ if $k\ge n/2$, \eqref{hatnleft} and
749: \eqref{hatnright} yield
750: \begin{equation}
751: \Prob[{\cal B}']\le2n^{-2\beta}\quad\mbox{for}\quad
752: {\cal B}:=\left\{\,\hat n\le\min\{\,k,n-k\,\}+2gn/s\,\right\}.
753: \label{B}
754: \end{equation}
755: Note that $\min\{k,n-k\}\le\lfloor n/2\rfloor\le n/2$; this relation
756: will be used implicitly below.
757:
758: For the recursive call of Step 7, let $\hat s$, $\hat g$ and
759: $\hat\imath_v$ denote the quantities generated as in \eqref{sgf} and
760: \eqref{iv} with $n$ and $k$ replaced by $\hat n$ and $\hat k$, let
761: $\hat v$ be the pivot found at Step 3, and let $\check X$, $\check n$
762: and $\check k$ correspond to $\hat X$, $\hat n$ and $\hat k$ at Step 7,
763: so that $\check n:=|\check X|<\hat n$.
764:
765: The cost of selecting $v$ and $\hat v$ at Step 3 may be estimated as
766: \begin{equation}
767: C_{si_v}+C_{\hat s\hat\imath_v}\le
768: 1.5s+\gamma f(s)+1.5\hat s+\gamma f(\hat s)\le 3s+2\gamma f(s),
769: \label{CsivFR}
770: \end{equation}
771: since $f$ is increasing and \eqref{CnkFR} holds for
772: $\hat s\le s\le n-1=n'$ (cf.\ \eqref{sgf}) from $\hat n<n$.
773:
774: Let $c:=n-s$ and $\hat c:=\hat n-\hat s$ denote the costs of Step 4
775: for the two calls. Since $0\le\hat c<n$ and
776: $\Exp\hat c=\Exp[\hat c|{\cal B}]\Prob[{\cal B}]+
777: \Exp[\hat c|{\cal B}']\Prob[{\cal B}']\le
778: \Exp[\hat c|{\cal B}]+n\Prob[{\cal B}']$, by \eqref{B} we have
779: \begin{equation}
780: c+\Exp\hat c\le n-s+\min\{\,k,n-k\,\}+2gn/s+2n^{1-2\beta}.
781: \label{cEhatc}
782: \end{equation}
783:
784: Using \eqref{CnkFR} again with $\check n<n$,
785: the cost of finishing up at Step 7 is at most
786: \begin{equation}
787: \Exp C_{\check n\check k}\le
788: \Exp\left[\,1.5\check n+\gamma f(\check n)\,\right]=
789: 1.5\Exp \check n+\gamma\Exp f(\check n).
790: \label{ECcheckn}
791: \end{equation}
792: Thus we need suitable bounds for $\Exp\check n$ and $\Exp f(\check n)$,
793: which may be derived as follows.
794:
795: To generalize \eqref{B} to the recursive call, consider the events
796: \begin{equation}
797: \hat{\cal B}:=\left\{\,\check n\le\min\{\,\hat k,\hat n-\hat k\,\}+
798: 2\hat g\hat n/\hat s\,\right\}
799: \quad\mbox{and}\quad
800: {\cal C}:=\left\{\,\check n\le\lfloor\delta f(n)\rfloor\,\right\}.
801: \label{hatBC}
802: \end{equation}
803: By \eqref{Al} and \eqref{Ar}, $\hat{\cal B}\cap{\cal A}_l$ and
804: $\hat{\cal B}\cap{\cal A}_r$ imply ${\cal C}$, since
805: $2gn/s+2\hat g\hat n/\hat s\le\delta f(n)$ by \eqref{gnsboundFR} with
806: $\hat n<n$ and $\delta:=4(\beta/\alpha)^{1/2}$. For the recursive
807: call, proceeding as in the derivation of \eqref{B} with $n$ replaced
808: by $\hat n=i$, $k$ by $\hat k$, etc., shows that, due to random
809: sampling,
810: \begin{equation}
811: \Prob[\hat{\cal B}'|{\cal A}_l,\hat n=i]\le2i^{-2\beta}
812: \quad\mbox{and}\quad
813: \Prob[\hat{\cal B}'|{\cal A}_r,\hat n=i]\le2i^{-2\beta}.
814: \label{PB'AlB'Ar}
815: \end{equation}
816:
817: In the left case of $k<n/2$, using $\check n<n$ and
818: $\Prob[{\cal A}_l']\le2n^{-2\beta}$ (cf.\ \eqref{Al}), we get
819: $$
820: \Exp\check n=\Exp[\check n|{\cal A}_l]\Prob[{\cal A}_l]+
821: \Exp[\check n|{\cal A}_l']\Prob[{\cal A}_l']\le
822: \Exp[\check n|{\cal A}_l]+n2n^{-2\beta}.
823: $$
824: Partitioning ${\cal A}_l$ into the events
825: ${\cal D}_i:={\cal A}_l\cap\{\hat n=i\}$, $i=0\colon n-1$
826: ($\hat n<n$ always), we have
827: $$
828: \Exp[\check n|{\cal A}_l]=\sum_{i=0}^{n-1}
829: \Exp[\check n|{\cal D}_i]\Prob[{\cal D}_i|{\cal A}_l]\le
830: \max_{i=0\colon n-1}\Exp[\check n|{\cal D}_i],
831: $$
832: where $\Exp[\check n|{\cal D}_i]\le\lfloor\delta f(n)\rfloor$ if
833: $i\le\lfloor\delta f(n)\rfloor+1$, because $\check n<\hat n$ always.
834: As for the remaining terms,
835: $\hat{\cal B}\cap{\cal A}_l\subset{\cal C}$ implies
836: $\Prob[{\cal C}'|{\cal D}_i]\le\Prob[\hat{\cal B}'|{\cal D}_i]\le
837: 2i^{-2\beta}$ by \eqref{PB'AlB'Ar}, where
838: ${\cal C}:=\{\check n\le\lfloor\delta f(n)\rfloor\}$
839: and $\check n<\hat n=i$ when the event ${\cal D}_i$ occurs, so
840: $\Exp[\check n|{\cal D}_i]\le\lfloor\delta f(n)\rfloor+i2i^{-2\beta}$.
841: Hence
842: $$
843: \max_{i=0\colon n-1}\Exp[\check n|{\cal D}_i]\le
844: \lfloor\delta f(n)\rfloor+
845: \max_{i=\lfloor\delta f(n)\rfloor+2\colon n-1}2i^{1-2\beta},
846: $$
847: where the final term is omitted if $\lfloor\delta f(n)\rfloor>n-3$;
848: otherwise it is at most
849: $$
850: 2\max\left\{\,(\lfloor\delta f(n)\rfloor+1)^{1-2\beta},
851: n^{1-2\beta}\,\right\}\le
852: 2\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
853: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n),
854: $$
855: since $\max_{i=\lfloor\delta f(n)\rfloor+1\colon n}2i^{1-2\beta}$
856: is bounded as above (consider $\beta\ge1/2$, then $\beta<1/2$ and use
857: $\delta f(n)<\lfloor\delta f(n)\rfloor+1$, the monotonicity of $f$ and
858: \eqref{ne2g2sFR} for the final inequality).
859: Collecting the preceding estimates, we obtain
860: \begin{equation}
861: \Exp\check n\le\lfloor\delta f(n)\rfloor+2n^{1-2\beta}+
862: 2\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
863: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n).
864: \label{Echeckn}
865: \end{equation}
866: Similarly, replacing $\check n$ by $f(\check n)$ in our derivations
867: and using the monotonicity of $f$ yields
868: \begin{subequations}
869: \label{Efcheckn}
870: \begin{equation}
871: \Exp f(\check n)\le f(\lfloor\delta f(n)\rfloor)+2f(n)n^{-2\beta}+
872: \max_{i=\lfloor\delta f(n)\rfloor+2\colon n-1}2f(i)i^{-2\beta},
873: \label{Efcheckn:a}
874: \end{equation}
875: where the final term is omitted if $\lfloor\delta f(n)\rfloor>n-3$;
876: otherwise it is at most
877: \begin{equation}
878: 2\max\left\{\,
879: \frac{f(\lfloor\delta f(n)\rfloor+1)}
880: {(\lfloor\delta f(n)\rfloor+1)^{2\beta}},
881: \frac{f(n)}{n^{2\beta}}\,\right\}\le
882: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
883: n^{-2\beta}\,\right\}f(n).
884: \label{Efcheckn:b}
885: \end{equation}
886: \end{subequations}
887: To see this, use the monotonicity of $f$ and the fact that for $i\le n$
888: (cf.\ \eqref{sgf})
889: $$
890: f(i)i^{-2\beta}\!/f(n)=i^{2/3-2\beta}n^{-2/3}(\ln i/\ln n)^{1/3}\le
891: i^{2/3-2\beta}n^{-2/3}.
892: $$
893:
894: For the right case, replace ${\cal A}_l$ by ${\cal A}_r$ in the
895: preceding paragraph to get \eqref{Echeckn}--\eqref{Efcheckn}.
896:
897: Add the costs \eqref{CsivFR}, \eqref{cEhatc} and \eqref{ECcheckn},
898: using \eqref{Echeckn}--\eqref{Efcheckn}, to get
899: \begin{eqnarray*}
900: C_{nk}&\le&3s+2\gamma f(s)+n-s+\min\{\,k,n-k\,\}+2gn/s+2n^{1-2\beta}\\
901: &&{}+1.5\lfloor\delta f(n)\rfloor+3n^{1-2\beta}+
902: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
903: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}f(n)\\
904: &&{}+\gamma f(\lfloor\delta f(n)\rfloor)+
905: 2\gamma f(n)n^{-2\beta}+
906: 2\gamma\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
907: n^{-2\beta}\,\right\}f(n).
908: \end{eqnarray*}
909: Now, using the bounds \eqref{sfsFR}--\eqref{flfloordeltaFR},
910: $2gn/s\le\frac12\delta f(n)$ (cf.\ \eqref{gnsboundFR}) and
911: \eqref{ne2g2sFR} gives
912: \begin{eqnarray*}
913: \lefteqn{C_{nk}\le n+\min\{\,k,n-k\,\}}\\
914: &&{}+\Big[2\bar\alpha+2\delta+5n^{1/3-2\beta}\ln^{-1/3}n+
915: 3\max\left\{\,\delta^{1-2\beta}f(n)^{-2\beta},
916: n^{1/3-2\beta}\ln^{-1/3}n\,\right\}\Big]f(n)\\
917: &&{}+\left[2\bar\alpha\phi(\bar\alpha f(\bar n))+
918: \delta\phi(\delta f(\bar n))+2n^{-2\beta}+
919: 2\max\left\{\,[\delta f(n)]^{2/3-2\beta}n^{-2/3},
920: n^{-2\beta}\,\right\}\right]\gamma f(n).
921: \end{eqnarray*}
922: By \eqref{0.95FR}--\eqref{0.05FR}, the two bracketed terms above are
923: at most $0.05\gamma f(n)$ and $0.95\gamma f(n)$, respectively; thus
924: \eqref{CnkFR} holds as required.
925: \qed
926: \end{proof}
927: %
928: % *** SUBSECTION 4.2 ***
929: \subsection{Other sampling strategies}
930: \label{ss:othersample}
931: %
932: We now indicate briefly how to adapt the proof of Thm \ref{t:selFR}
933: to several variations on \eqref{sgf}; a choice similar to
934: \eqref{sgfFRsn2/3} below was used in \cite{flri:asf}.
935: %
936: % *** REMARKS 4.2 ***
937: \begin{remarks}
938: \label{r:selFR}
939: \rm
940: (a)
941: Theorem \ref{t:selFR} remains true for $\beta\ge1/6$ and
942: \eqref{sgf} replaced by
943: \begin{equation}
944: s:=\min\left\{\left\lceil\alpha n^{2/3}\right\rceil,n-1\right\},\
945: g:=(\beta s\ln n)^{1/2}\ \mbox{and}\
946: f(n):=n^{2/3}\ln^{1/2}n.
947: \label{sgfFRsn2/3}
948: \end{equation}
949: Indeed, using $e^{3/2}-1\le\alpha\bar n^{2/3}\le\bar n-1$,
950: $e^{3/2}\le\delta f(\bar n)$, $\bar\alpha:=\alpha+\bar n^{-2/3}$
951: and $s=\tilde\alpha n^{2/3}$ with $\tilde\alpha\in[\alpha,\bar\alpha)$
952: yields \eqref{sfsFR}--\eqref{gnsboundFR} as before, and $\ln^{-1/2}$
953: replaces $\ln^{-1/3}$ in \eqref{ne2g2sFR}, \eqref{0.05FR} and
954: \eqref{Echeckn}.
955: \par(b)
956: Theorem \ref{t:selFR} holds for the following modification of
957: \eqref{sgf} with $\epsilon_l>1$
958: \begin{equation}
959: s:=\min\left\{\lceil\alpha f(n)\rceil,n-1\right\}\ \mbox{and}\
960: g:=(\beta s\ln^{\epsilon_l}n)^{1/2}\ \mbox{with}\
961: f(n):=n^{2/3}\ln^{\epsilon_l/3}n.
962: \label{sgfFRlneps}
963: \end{equation}
964: First, using $e^{\epsilon_l}-1\le\alpha f(\bar n)\le\bar n-1$ and
965: $e^{\epsilon_l}\le\delta f(\bar n)$ gives
966: \eqref{sfsFR}--\eqref{gnsboundFR} as before. Next, fix
967: $\tilde\beta\ge1/6$. Let $\beta_n:=\beta\ln^{\epsilon_l-1}n$.
968: Increase $\bar n$ if necessary so that $\beta_i\ge\tilde\beta$ for
969: all $i\ge\min\{\bar n,\lceil\delta f(\bar n)\rceil\}$; then
970: replace $\beta$ by $\tilde\beta$ and $\ln^{-1/3}$ by
971: $\ln^{-\epsilon_l/3}$ in \eqref{ne2g2sFR} and below.
972: \par(c)
973: Several other replacements for \eqref{sgf} may be analyzed as in
974: \cite[\S\S4.1--4.2]{kiw:rsq}.
975: \par(d)
976: None of these choices gives $f(n)$ better than that in \eqref{sgf} for
977: the bound \eqref{CnkFR}.
978: \end{remarks}
979:
980: We now comment briefly on the possible use of sampling with
981: replacement.
982: %
983: % *** REMARKS 4.3 ***
984: \begin{remarks}
985: \label{r:binsample}
986: \rm
987: (a)
988: Suppose Step 2 of {\sc Select} employs sampling with replacement.
989: Since the tail bound \eqref{Pexpg} remais valid for the binomial
990: distribution \cite{chv:thd,hoe:pis}, Lemma \ref{l:rankgen} is not
991: affected. However, when Step 4 no longer skips comparisons with
992: the elements of $S$, $-s$ in \eqref{cEhatc} is replaced by $0$; the
993: resulting change in the bound on $C_{nk}$ only needs replacing
994: $2\bar\alpha$ in \eqref{0.05FR} by $3\bar\alpha$. Hence the
995: preceding results remain valid.
996: \par(b)
997: Of course, sampling with replacement needs additional storage for
998: $S$. However, the increase in both storage and the number of
999: comparisons may be tolerated because the sample sizes are relatively
1000: small.
1001: \end{remarks}
1002: %
1003: % *** SUBSECTION 4.3 ***
1004: \subsection{Handling small subfiles}
1005: \label{ss:subfile}
1006: %
1007: Since the sampling efficiency decreases when $X$ shrinks, consider the
1008: following modification. For a fixed cut-off parameter
1009: $n_{\rm cut}\ge1$, let sSelect$(X,k)$ be a ``small-select'' routine that
1010: finds the $k$th smallest element of $X$ in at most $C_{\rm cut}<\infty$
1011: comparisons when $|X|\le n_{\rm cut}$ (even bubble sort will do). Then
1012: {\sc Select} is modified to start with the following
1013: \medbreak\noindent{\bf Step 0} ({\em Small file case\/}).
1014: If $n:=|X|\le n_{\rm cut}$, return sSelect$(X,k)$.
1015:
1016: Our preceding results remain valid for this modification. In fact it
1017: suffices if $C_{\rm cut}$ bounds the {\em expected\/} number of
1018: comparisons of sSelect$(X,k)$ for $n\le n_{\rm cut}$. For instance,
1019: \eqref{CnkFR} holds for $n\le n_{\rm cut}$ and $\gamma\ge C_{\rm cut}$,
1020: and by induction as in Rem.\ \ref{r:sel3}(c) we have $C_{nk}<\infty$
1021: for all $n$, which suffices for the proof of Thm \ref{t:selFR}.
1022:
1023: Another advantage is that even small $n_{\rm cut}$ ($1000$ say) limits
1024: nicely the stack space for recursion. Specifically, the tail
1025: recursion of Step 7 is easily eliminated (set $X:=\hat X$, $k:=\hat k$
1026: and go to Step 0), and the calls of Step 3 deal with subsets whose
1027: sizes quickly reach $n_{\rm cut}$. For example, for the choice of
1028: \eqref{sgf} with $\alpha=1$ and $n_{\rm cut}=600$, at most four
1029: recursive levels occur for $n\le2^{31}\approx2.15\cdot10^9$.
1030: %
1031: % *** SECTION 5 ***
1032: \section{A modified version}
1033: \label{s:modmed}
1034: We now consider a modification inspired by a remark of
1035: \cite{bro:ra489}. For $k$ close to $\lceil n/2\rceil$, by symmetry
1036: it is best to choose $v$ as the sample median with
1037: $i_v=\lceil s/2\rceil$, thus attempting to get $v$ close to $x_k^*$
1038: instead of $x_{\lceil k-gn/s\rceil}^*$ or $x_{\lceil k+gn/s\rceil}^*$;
1039: then more elements are eliminated. Hence we may let
1040: \begin{equation}
1041: i_v:=\left\{\begin{array}{ll}
1042: \lceil ks/n+g\rceil&\mbox{if}\ k<n/2-gn/s,\\
1043: \lceil s/2\rceil&
1044: \mbox{if}\ n/2-gn/s\le k\le n/2+gn/s,\\
1045: \lceil ks/n-g\rceil&\mbox{if}\ k>n/2+gn/s.
1046: \end{array}\right.
1047: \label{iv3}
1048: \end{equation}
1049: Note that \eqref{iv3} coincides with \eqref{iv} in the {\em left\/} case
1050: of $k<n/2-gn/s$ and the {\em right\/} case of $k>n/2+gn/s$, but the
1051: {\em middle\/} case of $n/2-gn/s\le k\le n/2+gn/s$ fixes $i_v$
1052: at the median position $\lceil s/2\rceil$; in fact $i_v$ is the median
1053: of the three values in \eqref{iv3}:
1054: \begin{equation}
1055: i_v:=\max\left\{\,\min\left(\,\lceil ks/n+g\rceil,
1056: \lceil s/2\rceil\,\right),\lceil ks/n-g\rceil\,\right\}.
1057: \label{iv3med}
1058: \end{equation}
1059: Corollary \ref{c:rankdir3} remains valid for the left and right cases.
1060: For the middle case, letting
1061: \begin{equation}
1062: j_l:=\max\left\{\,\lceil n/2-gn/s\rceil,1\,\right\}
1063: \quad\mbox{and}\quad
1064: j_r:=\min\left\{\,\lceil n/2+gn/s\rceil,n\,\right\},
1065: \label{jljr3}
1066: \end{equation}
1067: we obtain from Lemma \ref{l:rankgen} with $\kappa=1/2$ the following
1068: complement of Corollary \ref{c:rankdir3}.
1069: %
1070: % *** COROLLARY 5.1 ***
1071: \begin{corollary}
1072: \label{c:iv3}
1073: $\Prob[v<x_{j_l}^*]\le e^{-2g^2\!/s}$ and\/
1074: $\Prob[x_{j_r}^*<v]\le e^{-2g^2\!/s}$
1075: if\/ $n/2-gn/s\le k\le n/2+gn/s$.
1076: \end{corollary}
1077: %
1078: % *** THEOREM 5.2 ***
1079: \begin{theorem}
1080: \label{t:selFRmed}
1081: Theorem\/ {\rm\ref{t:selFR}}
1082: holds for {\sc Select} with Step\/ $3$ using\/ \eqref{iv3}.
1083: \end{theorem}
1084: \begin{proof}
1085: We only indicate how to adapt the proof of Thm \ref{t:selFR} following
1086: \eqref{0.05FR}. As noted after \eqref{iv3}, the left case now has
1087: $k<n/2-gn/s$ and the right case has $k>n/2+gn/s$, so we only need to
1088: discuss the middle case.
1089:
1090: {\em Middle case\/}:
1091: $n/2-gn/s\le k\le n/2+gn/s$. Suppose
1092: the event ${\cal E}_m:=\{x_{j_l}^*\le v\le x_{j_r}^*\}$ occurs
1093: (note that $\Prob[{\cal E}_m']\le 2e^{-2g^2\!/s}=2n^{-2\beta}$
1094: by Cor.\ \ref{c:iv3}).
1095: If $\hat X=L$ then, by the rules of Steps 4--6, we have $\hat k=k$
1096: and $\hat n\le j_r-1$; since $j_r<n/2+gn/s+1$ by \eqref{jljr3}, we
1097: get $\hat n<n/2+gn/s$. Hence $k\ge n/2-gn/s$ yields
1098: $\hat n<k+2gn/s$ and $\hat n-\hat k<2gn/s$ as in \eqref{hatnleft}.
1099: Next, if $\hat X=R$ then $\hat n-\hat k=n-k$ and $\hat k:=k-|L\cup E|$,
1100: so $L\cup E=\{x\in X:x\le v\}\ni x_{j_l}^*$ gives $\hat k\le k-j_l$.
1101: Since $k\le n/2+gn/s$ and $j_l\ge n/2-gn/s$ by \eqref{jljr3}, we get
1102: $\hat k\le2gn/s$ and $\hat n\le\hat n-\hat k+2gn/s$ as in
1103: \eqref{hatnright}; further, $\hat n\le n-j_l$ yields
1104: $\hat n\le n/2+gn/s$. Noticing that $n/2-gn/s\le k\le n/2+gn/s$ implies
1105: $n/2\le\min\{k,n-k\}+gn/s$, we have
1106: $\hat n\le\min\{k,n-k\}+2gn/s$ in both cases.
1107:
1108: Thus in the middle case we again have \eqref{B} and hence
1109: \eqref{cEhatc}; further, by \eqref{Al} and \eqref{Ar}, the event
1110: ${\cal E}_m\subset{\cal A}_l\cup{\cal A}_r$ is partitioned into
1111: ${\cal E}_m\cap{\cal A}_l$ and
1112: ${\cal E}_m\cap{\cal A}_l'\cap{\cal A}_r$.
1113:
1114: Next, reasoning as before, we see that \eqref{PB'AlB'Ar} and hence
1115: \eqref{Echeckn}--\eqref{Efcheckn} remain valid in the left and right
1116: cases, whereas in the middle case we have
1117: \begin{equation}
1118: \Prob[\hat{\cal B}'|{\cal E}_m,{\cal A}_l,\hat n=i]\le2i^{-2\beta}
1119: \quad\mbox{and}\quad
1120: \Prob[\hat{\cal B}'|{\cal E}_m,{\cal A}_l',{\cal A}_r,\hat n=i]\le
1121: 2i^{-2\beta}.
1122: \label{PB'EmAl}
1123: \end{equation}
1124:
1125: In the middle case,
1126: $\Exp\check n=\Exp[\check n|{\cal E}_m]\Prob[{\cal E}_m]+
1127: \Exp[\check n|{\cal E}_m']\Prob[{\cal E}_m']$ is bounded by
1128: $\Exp[\check n|{\cal E}_m]+2n^{1-2\beta}$, since
1129: $\Prob[{\cal E}_m']\le2n^{-2\beta}$ and $\check n<n$ always. Next,
1130: partitioning ${\cal E}_m$ into ${\cal E}_m\cap{\cal A}_l$ and
1131: ${\cal E}_m\cap{\cal A}_l'\cap{\cal A}_r$, we obtain
1132: $\Exp[\check n|{\cal E}_m]\le
1133: \max\{\Exp[\check n|{\cal E}_m,{\cal A}_l],
1134: \Exp[\check n|{\cal E}_m,{\cal A}_l',{\cal A}_r]\}$, where
1135: $\Exp[\check n|{\cal E}_m,{\cal A}_l]$ and
1136: $\Exp[\check n|{\cal E}_m,{\cal A}_l',{\cal A}_r]$ may be bounded like
1137: $\Exp[\check n|{\cal A}_l]$ and $\Exp[\check n|{\cal A}_r]$ in the left
1138: and right cases to get \eqref{Echeckn}.
1139: Then \eqref{Efcheckn} is obtained similarly, and the conclusion follows
1140: as before.
1141: \qed
1142: \end{proof}
1143: %
1144: % *** SECTION 6 ***
1145: \section{Ternary partitions}
1146: \label{s:ternpart}
1147: In this section we discuss ways of implementing {\sc Select} when
1148: the input set is given as an array $x[1\colon n]$. We employ the
1149: following notation.
1150:
1151: Each stage works with a segment $x[l\colon r]$ of the input array
1152: $x[1\colon n]$, where $1\le l\le r\le n$ are such that $x_i<x_l$ for
1153: $i=1\colon l-1$, $x_r<x_i$ for $i=r+1\colon n$, and the $k$th smallest
1154: element of $x[1\colon n]$ is the $(k-l+1)$th smallest element of
1155: $x[l\colon r]$. The task of {\sc Select} is {\em extended\/}: given
1156: $x[l\colon r]$ and $l\le k\le r$,
1157: {\sc Select}$(x,l,r,k,k_-,k_+)$ permutes $x[l\colon r]$ and finds
1158: $l\le k_-\le k\le k_+\le r$
1159: such that $x_i<x_k$ for all $l\le i<k_-$, $x_i=x_k$ for all
1160: $k_-\le i\le k_+$, $x_i>x_k$ for all $k_+<i\le r$. The initial call
1161: is {\sc Select}$(x,1,n,k,k_-,k_+)$.
1162:
1163: A vector swap denoted by $x[a\colon b]\leftrightarrow x[b+1\colon c]$
1164: means that the first $d:=\min(b+1-a,c-b)$ elements of array
1165: $x[a\colon c]$ are exchanged with its last $d$ elements in arbitrary
1166: order if $d>0$; e.g., we may exchange
1167: $x_{a+i}\leftrightarrow x_{c-i}$ for $0\le i<d$, or
1168: $x_{a+i}\leftrightarrow x_{c-d+1+i}$ for $0\le i<d$.
1169: %
1170: % *** SUBSECTION 6.1 ***
1171: \subsection{Tripartitioning schemes}
1172: \label{ss:tripart}
1173: For a given pivot $v:=x_l$ from the array $x[l\colon r]$, the following
1174: {\em ternary\/} scheme \cite[\S5.1]{kiw:psq} partitions the array into
1175: three blocks, with $x_m<v$ for $l\le m<a$, $x_m=v$ for $a\le m\le b$,
1176: $x_m>v$ for $b<m\le r$.
1177: After comparing the pivot $v$ to $x_r$ to produce the initial setup
1178: \begin{equation}
1179: \begin{tabular}{llrlrlrr}
1180: \hline
1181: \multicolumn{1}{|c|}{$x=v$} &
1182: \multicolumn{2}{|c|}{$x<v$} &
1183: \multicolumn{2}{|c|}{?} &
1184: \multicolumn{2}{|c|}{$x>v$} &
1185: \multicolumn{1}{|c|}{$x=v$} \\
1186: \hline
1187: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1188: $l$ & $p$ & $i$ & & & $j$ & $q$ & $r$\\
1189: \end{tabular}
1190: \label{ternini}
1191: \end{equation}
1192: with $i:=l$ and $j:=r$,
1193: we work with the three inner blocks of the array
1194: \begin{equation}
1195: \begin{tabular}{lllrrr}
1196: \hline
1197: \multicolumn{1}{|c|}{$x=v$} &
1198: \multicolumn{1}{|c|}{$x<v$} &
1199: \multicolumn{2}{|c|}{?} &
1200: \multicolumn{1}{|c|}{$x>v$} &
1201: \multicolumn{1}{|c|}{$x=v$} \\
1202: \hline
1203: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1204: $l$ & $p$ & $i$ & $j$ & $q$ & $r$\\
1205: \end{tabular}\ ,
1206: \label{ternbeg}
1207: \end{equation}
1208: until the middle part is empty or just contains an element equal to the
1209: pivot
1210: \begin{equation}
1211: \begin{tabular}{llrclrr}
1212: \hline
1213: \multicolumn{1}{|c|}{$x=v$} &
1214: \multicolumn{2}{|c|}{$x<v$} &
1215: \multicolumn{1}{|c|}{$x=v$} &
1216: \multicolumn{2}{|c|}{$x>v$} &
1217: \multicolumn{1}{|c|}{$x=v$} \\
1218: \hline
1219: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1220: $l$ & $p$ & $j$ & & $i$ & $q$ & $r$ \\
1221: \end{tabular}
1222: \label{ternmid}
1223: \end{equation}
1224: (i.e., $j=i-1$ or $j=i-2$),
1225: then swap the ends into the middle for the final arrangement
1226: \begin{equation}
1227: \begin{tabular}{llrr}
1228: \hline
1229: \multicolumn{1}{|c|}{$x<v$} &
1230: \multicolumn{2}{|c|}{$x=v$} &
1231: \multicolumn{1}{|c|}{$x>v$} \\
1232: \hline
1233: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1234: $l$ & $a$ & $b$ & $r$\\
1235: \end{tabular}\ .
1236: \label{ternend}
1237: \end{equation}
1238: %
1239: % *** SCHEME A ***
1240: \begin{scheme}[Safeguarded ternary partition]
1241: \label{sts}
1242: \rm
1243: \begin{description}
1244: \itemsep0pt
1245: \item[]
1246: \item[\ref{sts}1.] [Initialize.]
1247: Set $i:=l$, $p:=i+1$, $j:=r$ and $q:=j-1$.
1248: If $v>x_j$, exchange $x_i\leftrightarrow x_j$ and set $p:=i$;
1249: else if $v<x_j$, set $q:=j$.
1250: \item[\ref{sts}2.] [Increase $i$ until $x_i\ge v$.]
1251: Increase $i$ by $1$; then if $x_i<v$, repeat this step.
1252: \item[\ref{sts}3.] [Decrease $j$ until $x_j\le v$.]
1253: Decrease $j$ by $1$; then if $x_j>v$, repeat this step.
1254: \item[\ref{sts}4.] [Exchange.]
1255: (Here $x_j\le v\le x_i$.)
1256: If $i<j$, exchange $x_i\leftrightarrow x_j$; then
1257: if $x_i=v$, exchange $x_i\leftrightarrow x_p$ and increase $p$ by $1$;
1258: if $x_j=v$, exchange $x_j\leftrightarrow x_q$ and decrease $q$ by $1$;
1259: return to \ref{sts}2.
1260: If $i=j$ (so that $x_i=x_j=v$), increase $i$ by $1$ and
1261: decrease $j$ by $1$.
1262: \item[\ref{sts}5.] [Cleanup.]
1263: Set $a:=l+j-p+1$ and $b:=r-q+i-1$.
1264: Exchange $x[l\colon p-1]\leftrightarrow x[p\colon j]$ and
1265: $x[i\colon q]\leftrightarrow x[q+1\colon r]$.
1266: \end{description}
1267: \end{scheme}
1268:
1269: Step \ref{sts}1 ensures that $x_l\le v\le x_r$, so steps \ref{sts}2 and
1270: \ref{sts}3 don't need to test whether $i\le j$. This scheme makes two
1271: extraneous comparisons (only one when $i=j$ at \ref{sts}4). Spurious
1272: comparisons are avoided in the following modification
1273: \cite[\S5.3]{kiw:psq} of the scheme of \cite{bemc:esf}
1274: (cf.\ \cite[Ex.\ 5.2.2--41]{knu:acpIII2}),
1275: for which $i=j+1$ in \eqref{ternmid}.
1276: %
1277: % *** SCHEME B ***
1278: \begin{scheme}[Double-index controlled ternary partition]
1279: \label{stind2}
1280: \rm
1281: \begin{description}
1282: \itemsep0pt
1283: \item[]
1284: \item[\ref{stind2}1.] [Initialize.]
1285: Set $i:=p:=l+1$ and $j:=q:=r$.
1286: \item[\ref{stind2}2.] [Increase $i$ until $x_i>v$.]
1287: If $i\le j$ and $x_i<v$, increase $i$ by $1$ and repeat this step.
1288: If $i\le j$ and $x_i=v$, exchange $x_p\leftrightarrow x_i$, increase
1289: $p$ and $i$ by $1$, and repeat this step.
1290: \item[\ref{stind2}3.] [Decrease $j$ until $x_j<v$.]
1291: If $i<j$ and $x_j>v$, decrease $j$ by $1$ and repeat this step.
1292: If $i<j$ and $x_j=v$, exchange $x_j\leftrightarrow x_q$, decrease
1293: $j$ and $q$ by $1$, and repeat this step.
1294: If $i\ge j$, set $j:=i-1$ and go to \ref{stind2}5.
1295: \item[\ref{stind2}4.] [Exchange.]
1296: Exchange $x_i\leftrightarrow x_j$, increase $i$ by $1$,
1297: decrease $j$ by $1$, and return to \ref{stind2}2.
1298: \item[\ref{stind2}5.] [Cleanup.]
1299: Set $a:=l+i-p$ and $b:=r-q+j$.
1300: Swap $x[l\colon p-1]\leftrightarrow x[p\colon j]$ and
1301: $x[i\colon q]\leftrightarrow x[q+1\colon r]$.
1302: \end{description}
1303: \end{scheme}
1304: %
1305: % *** SUBSECTION 6.2 ***
1306: \subsection{Preparing for ternary partitions}
1307: \label{ss:preptern}
1308: At Step 1, $r-l+1$ replaces $n$ in finding $s$ and $g$.
1309: At Step 2, it is convenient to place the sample in the initial part of
1310: $x[l\colon r]$ by exchanging $x_i\leftrightarrow x_{i+{\rm rand}(r-i)}$
1311: for $l\le i\le r_s:=l+s-1$, where ${\rm rand}(r-i)$ denotes a random
1312: integer, uniformly distributed between $0$ and $r-i$.
1313:
1314: Step 3 uses $i:=k-l+1$ and $m:=r-l+1$ instead of $k$ and $n$
1315: to find the pivot position
1316: \begin{equation}
1317: k_v:=\left\{\begin{array}{ll}
1318: \rlap{$\min$}\phantom{\max}
1319: \left\{\,\lceil l-1+is/m+g\rceil,r_s\,\right\}&
1320: \mbox{if}\ i<m/2,\\
1321: \max\left\{\,\lceil l-1+is/m-g\rceil,l\,\right\}&
1322: \mbox{if}\ i\ge m/2,
1323: \end{array}\right.
1324: \label{kv}
1325: \end{equation}
1326: so that the recursive call of {\sc Select}$(x,l,r_s,k_v,k_v^-,k_v^+)$
1327: produces $v:=x_{k_v}$.
1328:
1329: After $v$ has been found, our array looks as follows
1330: \begin{equation}
1331: \begin{tabular}{llrrccr}
1332: \hline
1333: \multicolumn{1}{|c|}{$x<v$} &
1334: \multicolumn{2}{|c|}{$x=v$} &
1335: \multicolumn{1}{|c|}{$x>v$} &
1336: \multicolumn{2}{|c|}{?}\\
1337: \hline
1338: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1339: $l$ & $k_v^-$ & $k_v^+$ & $r_s$ & & $r$\\
1340: \end{tabular}\ .
1341: \label{partrec}
1342: \end{equation}
1343: Setting $\bar l:=k_v^-$ and $\bar r:=r-r_s+k_v^+$, we swap
1344: $x[k_v^++1\colon r_s]\leftrightarrow x[r_s+1\colon r]$ in
1345: \eqref{partrec} to get
1346: \begin{equation}
1347: \begin{tabular}{llrlrr}
1348: \hline
1349: \multicolumn{1}{|c|}{$x<v$} &
1350: \multicolumn{2}{|c|}{$x=v$} &
1351: \multicolumn{2}{|c|}{?} &
1352: \multicolumn{1}{|c|}{$x>v$} \\
1353: \hline
1354: \vphantom{$1^{{2^3}^4}$} % Need more vertical space!
1355: $l$ & $\bar l$ & $k_v^+$ & & $\bar r$ & $r$\\
1356: \end{tabular}\ .
1357: \label{partini}
1358: \end{equation}
1359: If $k_v^+=r_s$, we use scheme \ref{sts} with $l$ replaced by $k_v^+$
1360: in \ref{sts}1 (cf.\ \eqref{ternini}) and by $\bar l$ in \ref{sts}5
1361: (cf.\ \eqref{ternmid}); for $k_v^+<r_s$, we set
1362: $i:=k_v^+$, $p:=i+1$, $j:=\bar r+1$, $q:=\bar r$, omit \ref{sts}1
1363: and replace $l$, $r$ by $\bar l$, $\bar r$ in \ref{sts}5.
1364: Similarly, for scheme \ref{stind2}, we replace $l$, $r$ by
1365: $k_v^+$, $\bar r$ in \ref{stind2}1, and by $\bar l$, $\bar r$ in
1366: \ref{stind2}5.
1367:
1368: After partitioning $l$ and $r$ are updated by setting $l:=b+1$ if
1369: $a\le k$, $r:=a-1$ if $k\le b$. If $l\ge r$,
1370: {\sc Select} may return $k_-:=k_+:=k$ if $l=r$, $k_-:=r+1$ and
1371: $k_+:=l-1$ if $l>r$. Otherwise, instead of calling {\sc Select}
1372: recursively, Step 6 may jump back to Step 1, or to Step 0 if sSelect
1373: is used (cf.\ \S\ref{ss:subfile}).
1374:
1375: A simple version of sSelect is obtained if Steps 2 and 3 choose $v:=x_k$
1376: when $r-l+1\le n_{\rm cut}$ (this choice of \cite{flri:asf} works well
1377: in practice, but more sophisticated pivots could be tried); then the
1378: ternary partitioning code can be used by sSelect as well.
1379: %
1380: % *** SECTION 7 ***
1381: \section{Experimental results}
1382: \label{s:exp}
1383: %
1384: % *** SUBSECTION 7.1 ***
1385: \subsection{Implemented algorithms}
1386: \label{ss:impl}
1387: %
1388: An implementation of {\sc Select} was programmed in Fortran 77 and
1389: run on a notebook PC (Pentium 4M 2 GHz, 768 MB RAM) under MS
1390: Windows XP. The input set $X$ was specified as a double precision
1391: array. For efficiency, the recursion was removed and small arrays with
1392: $n\le n_{\rm cut}$ were handled as if Steps 2 and 3 chose $v:=x_k$;
1393: the resulting version of sSelect (cf.\ \S\S\ref{ss:subfile} and
1394: \ref{ss:preptern}) typically required less than $3.5n$ comparisons.
1395: The choice of \eqref{sgfFRsn2/3} was employed, with the parameters
1396: $\alpha=0.5$, $\beta=0.25$ and $n_{\rm cut}=600$ as proposed in
1397: \cite{flri:asf}; future work should test other sample sizes and
1398: parameters.
1399: %
1400: % *** SUBSECTION 7.2 ***
1401: \subsection{Testing examples}
1402: \label{ss:examp}
1403: %
1404: As in \cite{kiw:rsq}, we used minor modifications of the input sequences
1405: of \cite{val:iss}:
1406: \begin{description}
1407: \itemsep0pt
1408: \item[random]
1409: A random permutation of the integers $1$ through $n$.
1410: \item[onezero]
1411: A random permutation of $\lceil n/2\rceil$ ones and $\lfloor n/2\rfloor$
1412: zeros.
1413: \item[sorted]
1414: The integers $1$ through $n$ in increasing order.
1415: \item[rotated]
1416: A sorted sequence rotated left once; i.e., $(2,3,\ldots,n,1)$.
1417: \item[organpipe]
1418: %The integers $1$ through $n/2$ in increasing order, followed by $n/2$
1419: %through $1$ in decreasing order.
1420: The integers $(1,2,\ldots,n/2,n/2,\ldots,2,1)$.
1421: \item[m3killer]
1422: Musser's ``median-of-3 killer'' sequence with $n=4j$ and $k=n/2$:
1423: $$
1424: \left(\begin{array}{ccccccccccccc}
1425: 1& 2 & 3& 4 & \ldots& k-2& k-1& k& k+1& \ldots& 2k-2& 2k-1& 2k\\
1426: 1& k+1& 3& k+3& \ldots& 2k-3& k-1& 2& 4 & \ldots& 2k-2& 2k-1& 2k
1427: \end{array}\right).
1428: $$
1429: \item[twofaced]
1430: Obtained by randomly permuting the
1431: elements of an m3killer sequence in positions $4\lfloor\log_2n\rfloor$
1432: through $n/2-1$ and $n/2+4\lfloor\log_2n\rfloor-1$ through $n-2$.
1433: \end{description}
1434: For each input sequence, its (lower) median element was selected
1435: for $k:=\lceil n/2\rceil$.
1436: %
1437: % *** SUBSECTION 7.3 ***
1438: \subsection{Computational results}
1439: \label{ss:result}
1440: %
1441: We varied the input size $n$ from $50{,}000$ to $16{,}000{,}000$. For
1442: the random, onezero and twofaced sequences, for each input size,
1443: 20 instances were randomly generated; for the deterministic
1444: sequences, 20 runs were made to measure the solution time.
1445:
1446: The performance of {\sc Select} on randomly generated inputs is
1447: summarized in Table \ref{tab:Selrand},
1448: %
1449: % *** TABLE 7.1 ***
1450: \begin{table}[t!]
1451: \caption{Performance of {\sc Select} on randomly generated inputs.}
1452: \label{tab:Selrand}
1453: \footnotesize
1454: \begin{center}
1455: \begin{tabular}{lrrrrrrrrrrrrr}
1456: \hline
1457: Sequence &\multicolumn{1}{c}{Size}
1458: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1459: \vphantom{$1^{2^3}$}} % Need more vertical space!
1460: &\multicolumn{3}{c}{Comparisons $[n]$}
1461: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1462: &\multicolumn{1}{c}{$L_{\rm avg}$}
1463: &\multicolumn{1}{c}{$P_{\rm avg}$}
1464: &\multicolumn{1}{c}{$N_{\rm avg}$}
1465: &\multicolumn{1}{c}{$p_{\rm avg}$}
1466: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1467: &\multicolumn{1}{c}{$n$}
1468: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1469: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1470: & &\multicolumn{1}{c}{$[n]$}
1471: &\multicolumn{1}{c}{$[\ln n]$}
1472: &\multicolumn{1}{c}{$[\ln n]$} &
1473: &\multicolumn{1}{c}{$[\%n]$}\\
1474: \hline
1475: %dsel20/dsel20x alpha=0.5 beta=0.25 cutoff=600
1476: random & 50K
1477: & 2& 10& 0& 1.66& 1.77& 1.61& 1.74& 1.65& 0.46& 0.55& 8.33& 2.59\\
1478: & 100K
1479: & 3& 10& 0& 1.63& 1.71& 1.55& 1.76& 1.63& 0.60& 0.69& 7.58& 2.12\\
1480: & 500K
1481: & 13& 20& 10& 1.56& 1.61& 1.54& 1.36& 1.56& 0.67& 0.74& 8.05& 1.19\\
1482: & 1M
1483: & 23& 30& 20& 1.52& 1.58& 1.00& 0.55& 1.52& 0.66& 0.73& 8.32& 0.91\\
1484: & 2M
1485: & 46& 51& 40& 1.54& 1.56& 1.52& 1.22& 1.54& 0.75& 0.82& 8.38& 0.72\\
1486: & 4M
1487: & 88& 91& 80& 1.53& 1.55& 1.52& 1.18& 1.53& 0.86& 0.92& 8.22& 0.57\\
1488: & 8M
1489: & 172& 181& 160& 1.52& 1.53& 1.51& 1.13& 1.52& 0.92& 0.98& 8.54& 0.44\\
1490: & 16M
1491: & 336& 341& 320& 1.52& 1.53& 1.51& 1.06& 1.52& 0.95& 1.01& 8.41& 0.35\\
1492: onezero & 50K
1493: & 2& 10& 0& 1.28& 1.51& 1.00& 0.00& 1.28& 0.24& 0.18& 1.26& 1.91\\
1494: & 100K
1495: & 3& 10& 0& 1.25& 1.51& 1.00& 0.00& 1.25& 0.26& 0.15& 1.20& 1.49\\
1496: & 500K
1497: & 15& 20& 10& 1.33& 1.50& 1.00& 0.00& 1.33& 0.29& 0.17& 1.34& 0.93\\
1498: & 1M
1499: & 30& 41& 20& 1.33& 1.50& 1.00& 0.00& 1.33& 0.27& 0.15& 1.20& 0.73\\
1500: & 2M
1501: & 60& 71& 41& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\
1502: & 4M
1503: & 109& 131& 90& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\
1504: & 8M
1505: & 219& 261& 190& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\
1506: & 16M
1507: & 436& 501& 370& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\
1508: twofaced & 50K
1509: & 1& 10& 0& 1.67& 1.77& 1.59& 1.87& 1.67& 0.47& 0.56& 8.24& 2.63\\
1510: & 100K
1511: & 3& 11& 0& 1.62& 1.73& 1.56& 1.67& 1.62& 0.60& 0.69& 7.61& 2.11\\
1512: & 500K
1513: & 12& 20& 10& 1.56& 1.59& 1.53& 1.23& 1.56& 0.63& 0.71& 8.33& 1.18\\
1514: & 1M
1515: & 24& 31& 20& 1.55& 1.57& 1.53& 1.23& 1.55& 0.69& 0.76& 8.22& 0.92\\
1516: & 2M
1517: & 45& 51& 40& 1.54& 1.57& 1.52& 1.23& 1.54& 0.78& 0.85& 8.36& 0.73\\
1518: & 4M
1519: & 88& 91& 80& 1.53& 1.54& 1.52& 1.17& 1.53& 0.88& 0.94& 8.05& 0.57\\
1520: & 8M
1521: & 170& 180& 160& 1.52& 1.53& 1.51& 1.12& 1.52& 0.90& 0.97& 8.51& 0.44\\
1522: & 16M
1523: & 332& 341& 320& 1.52& 1.53& 1.51& 1.04& 1.52& 0.96& 1.02& 8.55& 0.35\\
1524: \hline
1525: \end{tabular}
1526: \end{center}
1527: \end{table}
1528: %
1529: where the average, maximum and minimum solution times are in
1530: milliseconds, and the comparison counts are in multiples of $n$; e.g.,
1531: column six gives $C_{\rm avg}/n$, where $C_{\rm avg}$ is the average
1532: number of comparisons made over all instances. Thus
1533: $\gamma_{\rm avg}:=(C_{\rm avg}-1.5n)_+/f(n)$ estimates the constant
1534: $\gamma$ in the bound \eqref{CnkFR}; moreover, we have
1535: $C_{\rm avg}\approx L_{\rm avg}$, where $L_{\rm avg}$ is the average
1536: sum of sizes of partitioned arrays. Further,
1537: $P_{\rm avg}$ is the average number of {\sc Select} partitions, whereas
1538: $N_{\rm avg}$ is the average number of calls to sSelect and
1539: $p_{\rm avg}$ is the average number of sSelect partitions per call;
1540: both $P_{\rm avg}$ and $N_{\rm avg}$ grow slowly with $\ln n$
1541: (linearly on the onezero inputs).
1542: Finally, $s_{\rm avg}$ is the average sum of sample sizes;
1543: $s_{\rm avg}/n^{2/3}$ drops from $0.95$ for $n=50{\rm K}$ to $0.88$ for
1544: $n=16{\rm M}$ on the random and twofaced inputs, and oscillates about
1545: $0.7$ on the onezero inputs, whereas the initial
1546: $s/n^{2/3}\approx\alpha=0.5$.
1547: The results for the random and twofaced sequences are very similar:
1548: the average solution times grow linearly with $n$ (except for small
1549: inputs whose solution times couldn't be measured accurately), and the
1550: differences between maximum and minimum times are quite small (and also
1551: partly due to the operating system). Except for the smallest inputs,
1552: the maximum and minimum numbers of comparisons are quite close, and
1553: $C_{\rm avg}$ nicely approaches the theoretical lower bound of $1.5n$;
1554: this is reflected in the values of $\gamma_{\rm avg}$. The results for
1555: the onezero inputs essentially average two cases: the first pass
1556: eliminates either almost all or about half of the elements.
1557:
1558: Table \ref{tab:Seldet} exhibits similar features of {\sc Select} on
1559: the deterministic inputs.
1560: %
1561: % *** TABLE 7.2 ***
1562: \begin{table}[t!]
1563: \caption{Performance of {\sc Select} on deterministic inputs.}
1564: \label{tab:Seldet}
1565: \footnotesize
1566: \begin{center}
1567: \tabcolsep=0.98\tabcolsep
1568: \begin{tabular}{lrrrrrrrrrrrrr}
1569: \hline
1570: Sequence &\multicolumn{1}{c}{Size}
1571: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1572: \vphantom{$1^{2^3}$}} % Need more vertical space!
1573: &\multicolumn{3}{c}{Comparisons $[n]$}
1574: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1575: &\multicolumn{1}{c}{$L_{\rm avg}$}
1576: &\multicolumn{1}{c}{$P_{\rm avg}$}
1577: &\multicolumn{1}{c}{$N_{\rm avg}$}
1578: &\multicolumn{1}{c}{$p_{\rm avg}$}
1579: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1580: &\multicolumn{1}{c}{$n$}
1581: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1582: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1583: & &\multicolumn{1}{c}{$[n]$}
1584: &\multicolumn{1}{c}{$[\ln n]$}
1585: &\multicolumn{1}{c}{$[\ln n]$} &
1586: &\multicolumn{1}{c}{$[\%n]$}\\
1587: \hline
1588: %dsel10o/dsel10ox alpha=0.5 beta=0.25 cutoff=600
1589: sorted & 50K
1590: & 1& 10& 0& 1.67& 1.76& 1.59& 1.85& 1.66& 0.48& 0.57& 7.24& 2.65\\
1591: & 100K
1592: & 2& 10& 0& 1.62& 1.69& 1.55& 1.70& 1.62& 0.60& 0.69& 6.76& 2.12\\
1593: & 500K
1594: & 8& 10& 0& 1.56& 1.62& 1.53& 1.35& 1.56& 0.67& 0.74& 7.52& 1.19\\
1595: & 1M
1596: & 15& 20& 10& 1.54& 1.58& 1.53& 1.19& 1.54& 0.68& 0.75& 7.87& 0.92\\
1597: & 2M
1598: & 27& 31& 20& 1.54& 1.56& 1.52& 1.23& 1.54& 0.74& 0.81& 7.61& 0.73\\
1599: & 4M
1600: & 51& 61& 40& 1.53& 1.55& 1.52& 1.19& 1.53& 0.87& 0.93& 7.34& 0.57\\
1601: & 8M
1602: & 98& 111& 90& 1.52& 1.53& 1.51& 1.10& 1.52& 0.89& 0.95& 8.03& 0.44\\
1603: & 16M
1604: & 186& 200& 170& 1.52& 1.52& 1.51& 1.04& 1.52& 0.95& 1.01& 7.99& 0.35\\
1605: rotated & 50K
1606: & 1& 10& 0& 1.67& 1.78& 1.59& 1.86& 1.66& 0.48& 0.57& 9.45& 2.64\\
1607: & 100K
1608: & 2& 10& 0& 1.63& 1.73& 1.58& 1.76& 1.63& 0.61& 0.69& 9.12& 2.12\\
1609: & 500K
1610: & 8& 10& 0& 1.56& 1.62& 1.54& 1.39& 1.56& 0.65& 0.73&10.03& 1.18\\
1611: & 1M
1612: & 15& 20& 10& 1.55& 1.58& 1.53& 1.29& 1.55& 0.69& 0.76& 9.56& 0.92\\
1613: & 2M
1614: & 27& 31& 20& 1.54& 1.55& 1.52& 1.19& 1.54& 0.78& 0.84& 8.69& 0.72\\
1615: & 4M
1616: & 51& 60& 50& 1.53& 1.54& 1.52& 1.18& 1.53& 0.87& 0.94& 8.92& 0.57\\
1617: & 8M
1618: & 98& 111& 90& 1.52& 1.53& 1.51& 1.12& 1.52& 0.89& 0.96& 9.29& 0.44\\
1619: & 16M
1620: & 185& 210& 170& 1.52& 1.53& 1.51& 1.04& 1.52& 0.93& 0.99& 8.96& 0.35\\
1621: organpipe & 50K
1622: & 1& 10& 0& 1.67& 1.78& 1.59& 1.94& 1.67& 0.45& 0.55& 8.21& 2.62\\
1623: & 100K
1624: & 3& 10& 0& 1.62& 1.69& 1.57& 1.68& 1.62& 0.60& 0.69& 7.61& 2.11\\
1625: & 500K
1626: & 10& 10& 10& 1.57& 1.60& 1.54& 1.43& 1.56& 0.67& 0.75& 8.18& 1.19\\
1627: & 1M
1628: & 20& 20& 10& 1.55& 1.58& 1.52& 1.24& 1.55& 0.70& 0.77& 8.21& 0.93\\
1629: & 2M
1630: & 37& 41& 30& 1.53& 1.55& 1.52& 1.15& 1.53& 0.78& 0.85& 8.48& 0.72\\
1631: & 4M
1632: & 68& 80& 60& 1.53& 1.54& 1.52& 1.13& 1.53& 0.84& 0.91& 8.21& 0.57\\
1633: & 8M
1634: & 130& 150& 120& 1.52& 1.54& 1.51& 1.07& 1.52& 0.88& 0.94& 8.64& 0.44\\
1635: & 16M
1636: & 240& 260& 230& 1.52& 1.53& 1.51& 1.02& 1.52& 0.94& 1.00& 8.44& 0.35\\
1637: m3killer & 50K
1638: & 1& 10& 0& 1.67& 1.76& 1.60& 1.89& 1.67& 0.47& 0.55& 8.82& 2.62\\
1639: & 100K
1640: & 4& 10& 0& 1.63& 1.71& 1.57& 1.80& 1.63& 0.60& 0.69& 7.69& 2.13\\
1641: & 500K
1642: & 11& 20& 10& 1.57& 1.62& 1.53& 1.44& 1.57& 0.66& 0.73& 8.61& 1.19\\
1643: & 1M
1644: & 20& 20& 20& 1.55& 1.59& 1.52& 1.40& 1.55& 0.72& 0.79& 8.33& 0.93\\
1645: & 2M
1646: & 38& 41& 30& 1.54& 1.56& 1.52& 1.25& 1.54& 0.78& 0.85& 8.30& 0.73\\
1647: & 4M
1648: & 73& 81& 70& 1.53& 1.54& 1.52& 1.28& 1.53& 0.87& 0.94& 8.22& 0.57\\
1649: & 8M
1650: & 137& 150& 130& 1.52& 1.53& 1.51& 1.05& 1.52& 0.91& 0.97& 8.37& 0.44\\
1651: & 16M
1652: & 248& 260& 230& 1.52& 1.52& 1.51& 0.96& 1.52& 0.92& 0.97& 8.42& 0.35\\
1653: \hline
1654: \end{tabular}
1655: \end{center}
1656: \end{table}
1657: %
1658: The results for the sorted and rotated sequences are very similar,
1659: whereas the solution times on the organpipe and m3killer sequences
1660: are between those for the sorted and random sequences.
1661:
1662: The results of Tabs.\ \ref{tab:Selrand}--\ref{tab:Seldet} were obtained
1663: with scheme \ref{sts} of \S\ref{ss:preptern}; to save space,
1664: Table \ref{tab:SelpartB} gives only selected results for scheme
1665: \ref{stind2},
1666: %
1667: % *** TABLE 7.3 ***
1668: \begin{table}[t!]
1669: \caption{Performance of {\sc Select} with ternary scheme \ref{stind2}.}
1670: \label{tab:SelpartB}
1671: \footnotesize
1672: \begin{center}
1673: \begin{tabular}{lrrrrrrrrrrrrr}
1674: \hline
1675: Sequence &\multicolumn{1}{c}{Size}
1676: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1677: \vphantom{$1^{2^3}$}} % Need more vertical space!
1678: &\multicolumn{3}{c}{Comparisons $[n]$}
1679: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1680: &\multicolumn{1}{c}{$L_{\rm avg}$}
1681: &\multicolumn{1}{c}{$P_{\rm avg}$}
1682: &\multicolumn{1}{c}{$N_{\rm avg}$}
1683: &\multicolumn{1}{c}{$p_{\rm avg}$}
1684: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1685: &\multicolumn{1}{c}{$n$}
1686: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1687: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1688: & &\multicolumn{1}{c}{$[n]$}
1689: &\multicolumn{1}{c}{$[\ln n]$}
1690: &\multicolumn{1}{c}{$[\ln n]$} &
1691: &\multicolumn{1}{c}{$[\%n]$}\\
1692: \hline
1693: %dsel20b/dsel20bx alpha=0.5 beta=0.25 cutoff=600
1694: random & 2M
1695: & 43& 51& 40& 1.53& 1.54& 1.52& 1.02& 1.53& 0.76& 0.83& 8.31& 0.72\\
1696: & 4M
1697: & 93& 101& 90& 1.53& 1.55& 1.52& 1.09& 1.53& 0.85& 0.92& 8.42& 0.57\\
1698: & 8M
1699: & 177& 190& 170& 1.52& 1.54& 1.51& 1.03& 1.52& 0.87& 0.93& 8.15& 0.44\\
1700: & 16M
1701: & 343& 350& 340& 1.51& 1.53& 1.51& 0.88& 1.51& 0.91& 0.97& 8.50& 0.35\\
1702: onezero & 2M
1703: & 82& 91& 70& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\
1704: & 4M
1705: & 149& 180& 130& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\
1706: & 8M
1707: & 304& 351& 270& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\
1708: & 16M
1709: & 621& 711& 531& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\
1710: sorted & 2M
1711: & 23& 30& 20& 1.54& 1.55& 1.52& 1.18& 1.54& 0.78& 0.85& 7.61& 0.72\\
1712: & 4M
1713: & 43& 50& 40& 1.53& 1.54& 1.51& 1.18& 1.53& 0.86& 0.92& 7.76& 0.57\\
1714: & 8M
1715: & 82& 90& 80& 1.52& 1.53& 1.51& 1.10& 1.52& 0.89& 0.95& 8.01& 0.44\\
1716: & 16M
1717: & 156& 160& 150& 1.52& 1.53& 1.51& 1.04& 1.52& 0.97& 1.03& 8.12& 0.35\\
1718: \hline
1719: \end{tabular}
1720: \end{center}
1721: \end{table}
1722: %
1723: whereas Table \ref{tab:SelpartB}
1724: %
1725: % *** TABLE 7.4 ***
1726: \begin{table}%[t!]
1727: \caption{Performance of {\sc Select} with the hybrid scheme of
1728: \cite[\S5.6]{kiw:psq}.}
1729: \label{tab:SelpartI}
1730: \footnotesize
1731: \begin{center}
1732: \begin{tabular}{lrrrrrrrrrrrrr}
1733: \hline
1734: Sequence &\multicolumn{1}{c}{Size}
1735: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1736: \vphantom{$1^{2^3}$}} % Need more vertical space!
1737: &\multicolumn{3}{c}{Comparisons $[n]$}
1738: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1739: &\multicolumn{1}{c}{$L_{\rm avg}$}
1740: &\multicolumn{1}{c}{$P_{\rm avg}$}
1741: &\multicolumn{1}{c}{$N_{\rm avg}$}
1742: &\multicolumn{1}{c}{$p_{\rm avg}$}
1743: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1744: &\multicolumn{1}{c}{$n$}
1745: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1746: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1747: & &\multicolumn{1}{c}{$[n]$}
1748: &\multicolumn{1}{c}{$[\ln n]$}
1749: &\multicolumn{1}{c}{$[\ln n]$} &
1750: &\multicolumn{1}{c}{$[\%n]$}\\
1751: \hline
1752: %dsel20d/dsel20dx alpha=0.5 beta=0.25 cutoff=600
1753: random & 2M
1754: & 44& 50& 40& 1.53& 1.54& 1.52& 1.03& 1.53& 0.76& 0.83& 8.31& 0.72\\
1755: & 4M
1756: & 86& 100& 80& 1.53& 1.55& 1.52& 1.10& 1.53& 0.85& 0.92& 8.42& 0.57\\
1757: & 8M
1758: & 163& 171& 160& 1.52& 1.54& 1.51& 1.03& 1.52& 0.87& 0.93& 8.15& 0.44\\
1759: & 16M
1760: & 317& 321& 310& 1.51& 1.53& 1.51& 0.88& 1.51& 0.91& 0.97& 8.50& 0.35\\
1761: onezero & 2M
1762: & 74& 80& 70& 1.30& 1.50& 1.00& 0.00& 1.30& 0.26& 0.14& 1.29& 0.56\\
1763: & 4M
1764: & 141& 151& 130& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.18& 0.41\\
1765: & 8M
1766: & 285& 301& 270& 1.20& 1.50& 1.00& 0.00& 1.20& 0.22& 0.13& 1.31& 0.32\\
1767: & 16M
1768: & 578& 621& 541& 1.25& 1.50& 1.00& 0.00& 1.25& 0.20& 0.11& 1.21& 0.27\\
1769: sorted & 2M
1770: & 23& 30& 20& 1.54& 1.55& 1.52& 1.18& 1.54& 0.78& 0.85& 7.61& 0.72\\
1771: & 4M
1772: & 42& 50& 40& 1.53& 1.54& 1.51& 1.19& 1.53& 0.86& 0.92& 7.76& 0.57\\
1773: & 8M
1774: & 80& 80& 80& 1.52& 1.53& 1.51& 1.11& 1.52& 0.89& 0.95& 8.01& 0.44\\
1775: & 16M
1776: & 153& 170& 150& 1.52& 1.53& 1.51& 1.04& 1.52& 0.97& 1.03& 8.12& 0.35\\
1777: \hline
1778: \end{tabular}
1779: \end{center}
1780: \end{table}
1781: %
1782: presents results for the hybrid scheme I of \cite[\S5.6]{kiw:psq},
1783: which combines some features of schemes \ref{sts} and \ref{stind2}.
1784: The hybrid scheme is quite competitive, although slower than scheme
1785: \ref{sts} on the onezero inputs.
1786:
1787: The preceding results were obtained with the modified choice \eqref{iv3}
1788: of $i_v$. For brevity, Table \ref{tab:Seliv} gives results for
1789: {\sc Select} with scheme \ref{sts} and the standard choice \eqref{iv}
1790: of $i_v$ on the random inputs only, since these inputs are most
1791: frequently used in theory and practice for evaluating sorting and
1792: selection methods.
1793: %
1794: % *** TABLE 7.5 ***
1795: \begin{table}%[t!]
1796: \caption{Performance of {\sc Select} with the standard choice of $i_v$.}
1797: \label{tab:Seliv}
1798: \footnotesize
1799: \begin{center}
1800: \begin{tabular}{lrrrrrrrrrrrrr}
1801: \hline
1802: Sequence &\multicolumn{1}{c}{Size}
1803: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1804: \vphantom{$1^{2^3}$}} % Need more vertical space!
1805: &\multicolumn{3}{c}{Comparisons $[n]$}
1806: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1807: &\multicolumn{1}{c}{$L_{\rm avg}$}
1808: &\multicolumn{1}{c}{$P_{\rm avg}$}
1809: &\multicolumn{1}{c}{$N_{\rm avg}$}
1810: &\multicolumn{1}{c}{$p_{\rm avg}$}
1811: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1812: &\multicolumn{1}{c}{$n$}
1813: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1814: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1815: & &\multicolumn{1}{c}{$[n]$}
1816: &\multicolumn{1}{c}{$[\ln n]$}
1817: &\multicolumn{1}{c}{$[\ln n]$} &
1818: &\multicolumn{1}{c}{$[\%n]$}\\
1819: \hline
1820: %dsel20/dsel20x alpha=0.5 beta=0.25 cutoff=600
1821: random & 50K
1822: & 4& 10& 0& 1.83& 1.97& 1.74& 3.73& 1.83& 0.57& 0.67& 8.49& 2.96\\
1823: & 100K
1824: & 4& 10& 0& 1.73& 1.83& 1.61& 3.13& 1.73& 0.73& 0.82& 7.80& 2.32\\
1825: & 500K
1826: & 14& 20& 10& 1.65& 1.69& 1.61& 3.25& 1.65& 0.82& 0.90& 8.40& 1.30\\
1827: & 1M
1828: & 25& 30& 20& 1.61& 1.65& 1.58& 2.83& 1.60& 0.89& 0.97& 8.28& 0.99\\
1829: & 2M
1830: & 46& 50& 40& 1.59& 1.61& 1.56& 2.92& 1.59& 0.99& 1.06& 8.01& 0.77\\
1831: & 4M
1832: & 90& 100& 80& 1.56& 1.58& 1.54& 2.61& 1.56& 1.15& 1.22& 8.34& 0.60\\
1833: & 8M
1834: & 174& 181& 170& 1.55& 1.57& 1.54& 2.70& 1.55& 1.21& 1.27& 8.09& 0.47\\
1835: & 16M
1836: & 341& 351& 330& 1.54& 1.56& 1.53& 2.68& 1.54& 1.21& 1.28& 8.33& 0.36\\
1837: \hline
1838: \end{tabular}
1839: \end{center}
1840: \end{table}
1841: %
1842: The modified choice typically requires fewer comparisons for small
1843: inputs, but its advantages are less pronounced for larger inputs.
1844: A similar behavior was observed for {\sc Select} with scheme
1845: \ref{stind2}. % and for {\sc bSelect}.
1846:
1847: For comparison, Table \ref{tab:qSel} extracts from \cite{kiw:rsq}
1848: some results of {\sc qSelect} for the samples \eqref{sgf}.
1849: %
1850: % *** TABLE 7.6 ***
1851: \begin{table}
1852: \caption{Performance of quintary {\sc qSelect} on random inputs.}
1853: \label{tab:qSel}
1854: \footnotesize
1855: \begin{center}
1856: \begin{tabular}{lrrrrrrrrrrrrr}
1857: \hline
1858: Sequence &\multicolumn{1}{c}{Size}
1859: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1860: \vphantom{$1^{2^3}$}} % Need more vertical space!
1861: &\multicolumn{3}{c}{Comparisons $[n]$}
1862: &\multicolumn{1}{c}{$\gamma_{\rm avg}$}
1863: &\multicolumn{1}{c}{$L_{\rm avg}$}
1864: &\multicolumn{1}{c}{$P_{\rm avg}$}
1865: &\multicolumn{1}{c}{$N_{\rm avg}$}
1866: &\multicolumn{1}{c}{$p_{\rm avg}$}
1867: &\multicolumn{1}{c}{$s_{\rm avg}$}\\
1868: &\multicolumn{1}{c}{$n$}
1869: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1870: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1871: & &\multicolumn{1}{c}{$[n]$}
1872: &\multicolumn{1}{c}{$[\ln n]$}
1873: &\multicolumn{1}{c}{$[\ln n]$} &
1874: &\multicolumn{1}{c}{$[\%n]$}\\
1875: \hline
1876: %dsel10o/dsel10ox alpha=0.5 beta=0.25 cutoff=600
1877: random & 50K
1878: & 3& 10& 0& 1.81& 1.85& 1.77& 5.23& 1.22& 0.46& 1.01& 7.62& 4.11\\
1879: & 100K
1880: & 4& 10& 0& 1.72& 1.76& 1.65& 4.50& 1.15& 0.45& 0.99& 8.05& 3.20\\
1881: & 500K
1882: & 13& 20& 10& 1.62& 1.63& 1.60& 4.14& 1.08& 0.59& 1.27& 7.59& 1.86\\
1883: & 1M
1884: & 24& 30& 20& 1.59& 1.60& 1.57& 3.93& 1.06& 0.64& 1.35& 8.18& 1.47\\
1885: & 2M
1886: & 46& 50& 40& 1.57& 1.58& 1.56& 3.73& 1.04& 0.76& 1.59& 7.67& 1.16\\
1887: & 4M
1888: & 86& 91& 80& 1.56& 1.56& 1.55& 3.61& 1.03& 0.94& 1.94& 7.21& 0.91\\
1889: & 8M
1890: & 163& 171& 160& 1.54& 1.55& 1.54& 3.45& 1.03& 0.98& 1.99& 7.45& 0.72\\
1891: & 16M
1892: & 316& 321& 310& 1.53& 1.54& 1.53& 3.44& 1.02& 0.99& 2.02& 7.55& 0.57\\
1893: \hline
1894: \end{tabular}
1895: \end{center}
1896: \end{table}
1897: %
1898: As noted in \S\ref{s:intro}, {\sc qSelect} is slightly faster than
1899: {\sc Select} on larger inputs because most of its work occurs on the
1900: first partition (cf.\ $L_{\rm avg}$ in Tabs.\ \ref{tab:Selrand} and
1901: \ref{tab:qSel}). In Table \ref{tab:riSel}
1902: %
1903: % *** TABLE 7.7 ***
1904: \begin{table}[t!]
1905: \caption{Performance of {\sc riSelect} on random inputs.}
1906: \label{tab:riSel}
1907: \footnotesize
1908: \begin{center}
1909: \begin{tabular}{lrrrrrrrrrr}
1910: \hline
1911: Sequence &\multicolumn{1}{c}{Size}
1912: &\multicolumn{3}{c}{Time $[{\rm msec}]$%
1913: \vphantom{$1^{2^3}$}} % Need more vertical space!
1914: &\multicolumn{3}{c}{Comparisons $[n]$}
1915: &\multicolumn{1}{c}{$L_{\rm avg}$}
1916: &\multicolumn{1}{c}{$P_{\rm avg}$}
1917: &\multicolumn{1}{c}{$N_{\rm rnd}$}\\
1918: &\multicolumn{1}{c}{$n$}
1919: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1920: &\multicolumn{1}{c}{avg}&\multicolumn{1}{c}{max}&\multicolumn{1}{c}{min}
1921: &\multicolumn{1}{c}{$[\ln n]$}
1922: &\multicolumn{1}{c}{$[n]$}&\\
1923: \hline
1924: %dsel08
1925: random & 50K
1926: & 2& 10& 0& 3.10& 4.32& 1.88& 3.10& 1.63& 0.45\\
1927: & 100K
1928: & 4& 10& 0& 2.61& 4.19& 1.77& 2.61& 1.60& 0.20\\
1929: & 500K
1930: & 17& 20& 10& 2.91& 4.45& 1.69& 2.91& 1.57& 0.25\\
1931: & 1M
1932: & 33& 41& 20& 2.81& 3.79& 1.84& 2.81& 1.57& 0.40\\
1933: & 2M
1934: & 62& 90& 40& 2.60& 3.57& 1.83& 2.60& 1.61& 0.35\\
1935: & 4M
1936: & 135& 191& 90& 2.86& 4.38& 1.83& 2.86& 1.65& 0.55\\
1937: & 8M
1938: & 249& 321& 190& 2.60& 3.48& 1.80& 2.60& 1.58& 0.40\\
1939: & 16M
1940: & 553& 762& 331& 2.99& 4.49& 1.73& 2.99& 1.58& 0.40\\
1941: \hline
1942: \end{tabular}
1943: \end{center}
1944: \end{table}
1945: %
1946: we give corresponding results for {\sc riSelect}, a Fortran version of
1947: the algorithm of \cite{val:iss}. For these inputs, {\sc riSelect}
1948: behaves like {\sc Find} with median-of-3 pivots (because the
1949: average numbers of randomization steps, $N_{\rm rnd}$, are negligible);
1950: hence the expected value of $C_{\rm avg}$ is of order $2.75n$
1951: \cite{kimapr:ahf}.
1952:
1953: Our final Table \ref{tab:comp_small}
1954: %
1955: % *** TABLE 7.8 ***
1956: \begin{table}
1957: \caption{Numbers of comparisons per element made on small random
1958: inputs.}
1959: \label{tab:comp_small}
1960: \footnotesize
1961: \begin{center}
1962: \begin{tabular}{lccccccccccc}
1963: \hline
1964: %dsel20x, dsel10x, dsel08x alpha=0.5 beta=0.25 cutoff=600
1965: Size%
1966: \vphantom{$1^{2^3}$} % Need more vertical space!
1967: &
1968: & 1000& 2500& 5000& 7500& 10000& 12500& 15000& 17500& 20000& 25000\\
1969: \hline
1970: &avg
1971: & 2.48& 2.06& 1.93& 1.87& 1.81& 1.79& 1.77& 1.76& 1.74& 1.71\\
1972: {\sc Select}
1973: &max
1974: & 4.25& 3.03& 2.28& 2.22& 2.09& 2.05& 1.95& 1.93& 1.93& 1.93\\
1975: &min
1976: & 1.55& 1.06& 1.03& 1.64& 1.62& 1.61& 1.64& 1.63& 1.59& 1.60\\
1977: \hline
1978: &avg
1979: & 2.86& 2.55& 2.24& 2.16& 2.07& 2.03& 1.98& 1.98& 1.94& 1.90\\
1980: {\sc qSelect}
1981: &max
1982: & 3.97& 3.55& 2.57& 2.38& 2.28& 2.21& 2.16& 2.13& 2.11& 2.31\\
1983: &min
1984: & 2.29& 1.97& 1.98& 1.95& 1.87& 1.86& 1.82& 1.83& 1.82& 1.75\\
1985: \hline
1986: &avg
1987: & 2.72& 2.85& 2.66& 2.71& 2.72& 2.83& 2.78& 2.75& 2.75& 2.84\\
1988: {\sc riSelect}
1989: &max
1990: & 4.40& 4.51& 4.69& 4.43& 4.62& 4.76& 4.64& 4.40& 5.10& 4.77\\
1991: &min
1992: & 1.68& 1.83& 1.75& 1.59& 1.70& 1.77& 1.78& 1.67& 1.90& 1.71\\
1993: \hline
1994: \end{tabular}
1995: \end{center}
1996: \end{table}
1997: %
1998: shows that {\sc Select} beats its competitors with respect to the
1999: numbers of comparisons made on small random inputs (100 instances for
2000: each input size $n$).
2001:
2002: Our computational results, combined with those in
2003: \cite{kiw:psq,kiw:rsq},
2004: suggest that both {\sc Select} and {\sc qSelect} may compete with
2005: {\sc Find} in practice.
2006:
2007: %{\bf Acknowledgment}. I would like to thank the Associate Editor and
2008: %the two anonymous referees for their helpful comments.
2009: {\bf Acknowledgment}. I would like to thank Olgierd Hryniewicz,
2010: Roger Koenker, Ronald L. Rivest and John D. Valois for useful
2011: discussions.
2012:
2013: %\clearpage
2014:
2015: %
2016: % *** REFERENCES ***
2017: \footnotesize
2018: %\bibliography{kckabbr,kalg,kbk,kck,kint,kth}
2019: %\bibliographystyle{kck}
2020: \newcommand{\etalchar}[1]{$^{#1}$}
2021: \newcommand{\noopsort}[1]{} \newcommand{\printfirst}[2]{#1}
2022: \newcommand{\singleletter}[1]{#1} \newcommand{\switchargs}[2]{#2#1}
2023: \ifx\undefined\bysame
2024: \newcommand{\bysame}{\leavevmode\hbox to3em{\hrulefill}\,}
2025: \fi
2026: \begin{thebibliography}{PRKT83}
2027:
2028: \bibitem[BeM93]{bemc:esf}
2029: J.~L. Bentley and M.~D. McIlroy, {\em Engineering a sort function},
2030: Software--Practice and Experience {\bf 23} (1993) 1249--1265.
2031:
2032: \bibitem[BFP{\etalchar{+}}72]{blflprrita:tbs}
2033: M.~R. Blum, R.~W. Floyd, V.~R. Pratt, R.~L. Rivest and R.~E. Tarjan, {\em Time
2034: bounds for selection}, J. Comput. System Sci. {\bf 7} (1972) 448--461.
2035:
2036: \bibitem[Bro76]{bro:ra489}
2037: T.~Brown, {\em Remark on {A}lgorithm 489}, ACM Trans. Math. Software {\bf 3}
2038: (1976) 301--304.
2039:
2040: \bibitem[Chv79]{chv:thd}
2041: V.~Chv{\'a}tal, {\em The tail of the hypergeometric distribution}, Discrete
2042: Math. {\bf 25} (1979) 285--287.
2043:
2044: \bibitem[CuM89]{cumu:acs}
2045: W.~Cunto and J.~I. Munro, {\em Average case selection}, J. of the ACM {\bf 36}
2046: (1989) 270--279.
2047:
2048: \bibitem[DHUZ01]{dohaulzw:lbs}
2049: D.~Dor, J.~H{\aa}stad, S.~Ulfberg and U.~Zwick, {\em On lower bounds for
2050: selecting the median}, SIAM J. Discrete Math. {\bf 14} (2001) 299--311.
2051:
2052: \bibitem[DoZ99]{dozw:sm}
2053: D.~Dor and U.~Zwick, {\em Selecting the median}, SIAM J. Comput. {\bf 28}
2054: (1999) 1722--1758.
2055:
2056: \bibitem[DoZ01]{dozw:msr}
2057: \bysame, {\em Median selection requires $(2+\epsilon){N}$ comparisons}, SIAM J.
2058: Discrete Math. {\bf 14} (2001) 312--325.
2059:
2060: \bibitem[FlR75a]{flri:asf}
2061: R.~W. Floyd and R.~L. Rivest, {\em The algorithm {SELECT}---for finding the
2062: $i$th smallest of $n$ elements ({A}lgorithm 489)}, Comm. ACM {\bf 18} (1975)
2063: 173.
2064:
2065: \bibitem[FlR75b]{flri:etb}
2066: \bysame, {\em Expected time bounds for selection}, Comm. ACM {\bf 18} (1975)
2067: 165--172.
2068:
2069: \bibitem[Gr{\"u}99]{gru:mvh}
2070: R.~Gr{\"u}bel, {\em On the median-of-$k$ version of {H}oare's selection
2071: algorithm}, Theor. Inform. Appl. {\bf 33} (1999) 177--192.
2072:
2073: \bibitem[Hoa61]{hoa:a65}
2074: C.~A.~R. Hoare, {\em Algorithm 65: {\sc Find}}, Comm. ACM {\bf 4} (1961)
2075: 321--322.
2076:
2077: \bibitem[Hoe63]{hoe:pis}
2078: W.~Hoeffding, {\em Probability inequalities for sums of bounded random
2079: variables}, J. Amer. Statist. Assoc. {\bf 58} (1963) 13--30.
2080:
2081: \bibitem[Kiw03a]{kiw:psq}
2082: K.~C. Kiwiel, {\em Partitioning schemes for quicksort and quickselect}, Tech.
2083: report, Systems Research Institute, Warsaw, 2003.
2084: \newblock Available at the URL http://arxiv.org/abs/cs.DS/0312054.
2085:
2086: \bibitem[Kiw03b]{kiw:rsq}
2087: \bysame, {\em Randomized selection with quintary partitions}, Tech. report,
2088: Systems Research Institute, Warsaw, 2003.
2089: \newblock Available at the URL http://arxiv.org/abs/cs.DS/0312055.
2090:
2091: \bibitem[KMP97]{kimapr:ahf}
2092: P.~Kirschenhofer, C.~Mart{\'\i}nez and H.~Prodinger, {\em Analysis of {H}oare's
2093: {\sc find} algorithm with median-of-three partition}, Random Stuctures and
2094: Algorithms {\bf 10} (1997) 143--156.
2095:
2096: \bibitem[Knu98]{knu:acpIII2}
2097: D.~E. Knuth, {\em The Art of Computer Programming. Volume III: Sorting and
2098: Searching}, second ed., Addison-Wesley, Reading, MA, 1998.
2099:
2100: \bibitem[MaR01]{maro:oss}
2101: C.~Mart{\'\i}nez and S.~Roura, {\em Optimal sampling strategies in quicksort
2102: and quickselect}, SIAM J. Comput. {\bf 31} (2001) 683--705.
2103:
2104: \bibitem[Mus97]{mus:iss}
2105: D.~R. Musser, {\em Introspective sorting and selection algorithms},
2106: Software--Practice and Experience {\bf 27} (1997) 983--993.
2107:
2108: \bibitem[PRKT83]{poriti:eds}
2109: J.~T. Postmus, A.~H.~G. Rinnooy~Kan and G.~T. Timmer, {\em An efficient dynamic
2110: selection method}, Comm. ACM {\bf 26} (1983) 878--881.
2111:
2112: \bibitem[SPP76]{scpapi:fm}
2113: A.~Sch{\"o}nhage, M.~Paterson and N.~Pippenger, {\em Finding the median}, J.
2114: Comput. System Sci. {\bf 13} (1976) 184--199.
2115:
2116: \bibitem[Val00]{val:iss}
2117: J.~D. Valois, {\em Introspective sorting and selection revisited},
2118: Software--Practice and Experience {\bf 30} (2000) 617--638.
2119:
2120: \end{thebibliography}
2121: \normalsize
2122: % *** END OF REFERENCES ***
2123: %
2124: \end{document} % End of document.
2125: