cs0107025/corr.tex
1: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2: %
3: % File:         Expansions/OnLine/Texte/jacm.tex
4: %
5: % Author(s):    Marc Daumas
6: %               <Marc.Daumas@Lip.Ens-Lyon.Fr>
7: %               Laboratoire de l'Informatique du Parallelisme
8: %
9: % Last modified on Fri May 05 15:10:03 GMT MET DST 2000 by daumas 
10: %
11: 
12: %%%%%%%%%%%%%%%%
13: %%%%% TODO %%%%%
14: %%%%%%%%%%%%%%%%
15: % Rechercher les TODO
16: 
17: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
18: 
19: \documentclass[a4paper]{article} 
20: %\documentclass[a4paper,twoside]{lipreport} 
21: 
22: %\usepackage{RR} 
23: 
24: \usepackage[latin1]{inputenc} 
25: \usepackage[T1]{fontenc} 
26: \usepackage[english]{babel} 
27: 
28: %\usepackage{styles}
29: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30: %
31: % File:         styles.sty
32: %
33: % Author(s):    Marc Daumas
34: %               <Marc.Daumas@Lip.Ens-Lyon.Fr>
35: %               Laboratoire de l'Informatique du Parallelisme
36: %
37: % Last modified on Thu Jan 28 14:33:02 MET 1999 by daumas 
38: %      modified on Wed Feb 26 09:02:03 MET 1997 by mdaumas
39: %
40: 
41: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
42: %
43: % Detection LaTeX vs. LaTeX2e
44: %
45: 
46: \ifx\usepackage\undefined
47: \def\inca#1{\input #1.sty}
48: \def\incb#1#2{\input #1.sty}
49: \else 
50: \newcommand{\inca}[1]{\usepackage{#1}}
51: \newcommand{\incb}[2]{\usepackage[#2]{#1}}
52: \fi
53: 
54: %\if@compatibility
55: %\def\inca#1{\input #1.sty}
56: %\def\incb#1#2{\input #1.sty}
57: %\fi
58: 
59: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
60: %
61: % Styles usuels dans un ordre que LaTeX accepte
62: %
63: 
64: \ifx\pdfoutput\undefined
65:   \incb{graphics}{dvips}
66:   \incb{epsfig}{dvips}
67:   \def\dvipsselector#1#2{#1}
68:   \def\dvipsoption{dvips}
69: \else
70:   \incb{graphics}{pdftex}
71:   \def\dvipsselector#1#2{#2}
72:   \def\dvipsoption{pdftex}
73: \fi
74: 
75: %\inca{tgrind}
76: \inca{boxedminipage,url}
77: \inca{amstext,amsfonts,latexsym}
78: \ifx\usepackage\undefined\else\inca{latexsym}\fi
79: \inca{epic}
80: % \inca{eepic}
81: 
82: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
83: %
84: % Macro personnelles
85: %
86: 
87: %\inca{macro}
88: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
89: %
90: % File:         macro.sty
91: %
92: % Author(s):    Marc Daumas
93: %               <Marc.Daumas@Lip.Ens-Lyon.Fr>
94: %               Laboratoire de l'Informatique du Parallelisme
95: %
96: % Last modified on Wed Mar 01 12:44:17 GMT MET 2000 by daumas  
97: %      modified on Fri Feb 28 16:22:11 MET 1997 by mdaumas 
98: %
99: 
100: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
101: %
102: % Quelques alias utiles
103: %
104: 
105: \def\ov#1{\overline{#1}}
106: \def\mc#1#2#3{\multicolumn{#1}{#2}{#3}}
107: \def\ita{\vspace{-\itemsep}\item}
108: 
109: % \newcommand{\interligne}[1]{\baselineskip = #1pt}
110: % \renewcommand{\baselinestretch}{1.5}
111: 
112: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
113: %
114: % Environnements
115: %
116: 
117: \ifx\frenchname\undefined
118: \newtheorem{defin}{Definition}
119: \newtheorem{prop}{Proposition}
120: \newtheorem{lem}{Lemma}
121: \newtheorem{theo}{Theorem}
122: \newtheorem{corol}{Corollary}
123: \newtheorem{conj}{Conjecture}
124: \newtheorem{observ}{Observation}
125: \newtheorem{exemple}{Example}
126: \else
127: \newtheorem{defin}{Définition}
128: \newtheorem{prop}{Proposition}
129: \newtheorem{lem}{Lemme}
130: \newtheorem{theo}{Théorème}
131: \newtheorem{corol}{Corollaire}
132: \newtheorem{conj}{Conjecture}
133: \newtheorem{observ}{Observation}
134: \newtheorem{exemple}{Exemple}
135: \fi
136: 
137: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
138: %
139: % Ensembles mathématiques usuels
140: %
141: 
142: \def\R{\mathbb R}
143: \def\Q{\mathbb Q}
144: \def\Z{\mathbb Z}
145: \def\N{\mathbb N}
146: \def\F{\mathbb F}
147: \def\M{\mathbb M}
148: \def\K{\mathbb K}
149: \def\C{\mathbb C}
150: \def\Fb{\overline{\mathbb F}}
151: \def\Rb{\overline{\mathbb R}}
152: 
153: \ifx\mathbb\undefined
154: \def\mathbb{\Bbb}
155: \fi
156: 
157: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
158: %
159: % Fonction mathématiques utilisées en informatique
160: %
161: 
162: \def\mod{\mathop{\rm mod}\nolimits}
163: \def\ulp{\mathop{\rm ulp}\nolimits}
164: \def\sign{\mathop{\rm sign}\nolimits}
165: \def\trunc{\mathop{\rm Trunc}\nolimits}
166: \def\tronc{\mathop{\rm Tronc}\nolimits}
167: 
168: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
169: %
170: % Expressions mathématiques
171: %
172: 
173: \def\ceil#1{ \left\lceil  #1 \right\rceil }
174: \def\floor#1{\left\lfloor #1 \right\rfloor}
175: \def\fl#1{\left\lfloor \log #1 \right\rfloor}
176: \def\cl#1{\left\lceil  \log #1 \right\rceil }
177: 
178: \def\CG{\left[}   \def\CD{\right]}
179: \def\PG{\left(}   \def\PD{\right)}
180: \def\AG{\left\{}  \def\AD{\right\}}
181: 
182: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
183: %
184: % Modes d'arrondi normalisés IEEE
185: %
186: 
187: \def\Rx{\mathop{\Box}\nolimits}
188: 
189: \def\RO{\mathop{\bowtie}\nolimits}
190: \def\RU{\mathop{\triangle}\nolimits}
191: \def\RN{\mathop{\circ}\nolimits}
192: \def\RD{\mathop{\nabla}\nolimits}
193: 
194: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
195: %
196: % Inclusion de figures
197: %
198: 
199: \newenvironment{pfigure}{\begin{figure}}{\end{figure}}
200: \def\figdir{Fig} \def\figepsdir{\figdir}
201: \def\figext{\dvipsselector{pstex_t}{pdftex_t}}
202: \def\epsdir{../EPS}
203: 
204: \newcommand{\figtex}[2]{%
205:   \begin{pfigure}
206:     \begin{center}  
207:       \input \figdir/#1.\figext
208:     \end{center}
209:     \caption{#2}
210:     \label{fig/#1}
211:     \label{fig:#1}
212:   \end{pfigure}
213: }
214: 
215: \newcommand{\figeps}[2]{%
216:   \begin{pfigure}
217:     \centerline{\epsfig{file=\epsdir/#1.eps}}
218:     \caption{#2}
219:     \label{fig/#1}
220:     \label{fig:#1}
221:   \end{pfigure}
222: }
223: 
224: \newcommand{\figpic}[2]{%
225:   \begin{pfigure}
226:     \begin{center}  
227:       \input \figdir/#1.eepic
228:     \end{center}
229:     \caption{#2}
230:     \label{fig/#1}
231:     \label{fig:#1}
232:   \end{pfigure}
233: }
234: 
235: \newcommand{\figinc}[2]{%
236:   \begin{pfigure}
237:     \begin{center}  
238:       \input \figdir/#1.tex
239:     \end{center}
240:     \caption{#2}
241:     \label{fig/#1}
242:     \label{fig:#1}
243:   \end{pfigure}
244: }
245: 
246: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
247: %
248: % Dessins
249: %
250: 
251: \newcommand{\pictex}[1]{%
252:   \begin{center}
253:     \input \figdir/#1.\figext
254:   \end{center}
255:   }
256: 
257: \newcommand{\piceps}[1]{\centerline{\epsfig{file=\epsdir/#1.eps}}}
258: 
259: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
260: %
261: % Encarts, listings
262: %
263: 
264: \newcommand{\boxlabel}{}
265: \newcommand{\boxcaption}{}
266: 
267: \newenvironment{encart}[2]{%
268:   \fboxsep=5mm
269:   \renewcommand{\boxlabel}{#1}
270:   \renewcommand{\boxcaption}{#2}
271:   \begin{pfigure}
272:     \begin{center}
273:       \begin{boxedminipage}{.9\textwidth}
274: }{%
275:         \vspace{-1.5\baselineskip}
276:         \hspace*{\fill}
277:       \end{boxedminipage}
278:     \end{center}
279:     \caption{\boxcaption}
280:     \label{enc/\boxlabel}
281:   \end{pfigure}
282: }
283: 
284: \newenvironment{encart2}[2]{%
285:   \fboxsep=5mm
286:   \renewcommand{\boxlabel}{#1}
287:   \renewcommand{\boxcaption}{#2}
288:   \begin{pfigure}
289:     \begin{center}
290:       \begin{boxedminipage}{.475\textwidth}
291: }{%
292:         \vspace{-1.5\baselineskip}
293:         \hspace*{\fill}
294:       \end{boxedminipage}
295:     \end{center}
296:     \caption{\boxcaption}
297:     \label{enc/\boxlabel}
298:   \end{pfigure}
299: }
300: 
301: \usepackage{times,palatino}
302: %\usepackage{fullpage}
303: %\usepackage{rri}
304: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
305: %
306: % File:         rri.sty
307: %
308: % Author(s):    Marc Daumas
309: %               <Marc.Daumas@Lip.Ens-Lyon.Fr>
310: %               Laboratoire de l'Informatique du Parallelisme
311: %
312: % Last modified on Wed Mar 01 12:44:17 GMT MET 2000 by daumas  
313: %      modified on Fri Feb 28 16:22:11 MET 1997 by mdaumas 
314: %
315: 
316: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
317: %
318: % Environnements
319: %
320: 
321: \ifx\titleLIP\undefined
322:   \ifx\RRetitle\undefined
323:     \newcommand{\RRItitle}[1]{}
324:     \newcommand{\RRItitre}[1]{}
325:     \newcommand{\RRIdate}[1]{}
326:     \newcommand{\RRIauthor}[1]{}
327:     \newcommand{\RRIthead}[1]{}
328:     \newcommand{\RRIahead}[1]{}
329:     \newcommand{\RRIabstract}[1]{\newcommand{\RRIpre}{\begin{abstract}#1\end{abstract}}}
330:     \newcommand{\RRIresume}[1]{}
331:     \newcommand{\RRIkeywords}[1]{}
332:     \newcommand{\RRImotscles}[1]{}
333:     \newcommand{\RRIbegin}{}
334:     \newcommand{\RRInumber}[1]{}
335:     \newcommand{\RRItheme}[1]{}
336:     \newcommand{\RRIprojet}[1]{}
337:     \newcommand{\RRIno}[1]{#1}
338:     \newcommand{\RRIyes}[1]{}
339:     \newcommand{\RRIinria}[1]{}
340:     \newcommand{\RRIlip}[1]{}
341:   \else
342:     \newcommand{\RRItitle}[1]{\RRetitle{#1}}
343:     \newcommand{\RRItitre}[1]{\RRtitle{#1}}
344:     \newcommand{\RRIdate}[1]{\RRdate{#1}}
345:     \newcommand{\RRIauthor}[1]{\RRauthor{#1}}
346:     \newcommand{\RRIthead}[1]{\titlehead{#1}}
347:     \newcommand{\RRIahead}[1]{\authorhead{#1}}
348:     \newcommand{\RRIabstract}[1]{\RRabstract{#1}}
349:     \newcommand{\RRIresume}[1]{\RRresume{#1}}
350:     \newcommand{\RRIkeywords}[1]{\RRkeyword{#1}}
351:     \newcommand{\RRImotscles}[1]{\RRmotcle{#1}}
352:     \newcommand{\RRIbegin}{\URRhoneAlpes \makeRR}
353:     \newcommand{\RRIpre}{}
354:     \newcommand{\RRInumber}[1]{}
355:     \newcommand{\RRItheme}[1]{\RRtheme{#1}}
356:     \newcommand{\RRIprojet}[1]{\RRprojet{#1}}
357:     \newcommand{\RRIno}[1]{}
358:     \newcommand{\RRIyes}[1]{#1}
359:     \newcommand{\RRIinria}[1]{#1}
360:     \newcommand{\RRIlip}[1]{}
361:     \newcommand{\french}{} 
362:     \newcommand{\english}{} 
363:   \fi
364: \else
365:     \usepackage{fancyheadings}
366:     \newcommand{\RRItitle}[1]{\titleLIP{#1}}
367:     \newcommand{\RRItitre}[1]{}
368:     \newcommand{\RRIdate}[1]{\dateLIP{#1}}
369:     \newcommand{\RRIauthor}[1]{\authorLIP{#1}}
370:     \newcommand{\RRIthead}[1]{\newcommand{\RRItheadv}{#1}}
371:     \newcommand{\RRIahead}[1]{\newcommand{\RRIaheadv}{#1}}
372:     \newcommand{\RRIabstract}[1]{\abstractLIP{#1}}
373:     \newcommand{\RRIresume}[1]{\resumeLIP{#1}}
374:     \newcommand{\RRIkeywords}[1]{\keywordsLIP{#1}}
375:     \newcommand{\RRImotscles}[1]{\motsclesLIP{#1}}
376:     \newcommand{\RRIbegin}{\rapportLIP\pagenumbering{arabic}\pagestyle{fancy}\rhead[\RRIaheadv]{\thepage}\lhead[\thepage]{\RRItheadv}\cfoot{}}
377:     \newcommand{\RRIpre}{}
378:     \newcommand{\RRInumber}[1]{\numberLIP{#1}}
379:     \newcommand{\RRItheme}[1]{}
380:     \newcommand{\RRIprojet}[1]{}
381:     \newcommand{\RRIno}[1]{}
382:     \newcommand{\RRIyes}[1]{#1}
383:     \newcommand{\RRIinria}[1]{}
384:     \newcommand{\RRIlip}[1]{#1}
385: \fi
386: 
387: 
388: \usepackage{color,amssymb}
389: 
390: \bibliographystyle{plain} 
391: 
392: \RRItitle{Computer validated proofs of \\ a toolset for adaptable arithmetic}
393: 
394: \RRItitre{Boîte à outils pour l'arithmétique numérique \\ validée par un preuve sur ordinateur}
395: 
396: \RRIdate{Janvier 2001} 
397: 
398: \RRIauthor{%
399:   Sylvie Boldo\RRIinria{, LIP} \\
400:   Marc Daumas\RRIinria{, CNRS} \\
401:   Claire Moreau-Finot\RRIinria{, LIP} \\
402:   Laurent Théry\RRIinria{, INRIA \\~}
403: }%
404: 
405: \RRIthead{Computer validated proofs of a toolset for adaptable arithmetic} 
406: \RRIahead{Sylvie Boldo, Marc Daumas, Claire Moreau-Finot and Laurent Théry} 
407: 
408: \RRInumber{2001-01} 
409: 
410: \RRIabstract{%
411:   Most  existing  implementations  of  multiple  precision  arithmetic
412:   demand  that the  user  sets  the precision  {\em  a priori}.   Some
413:   libraries  are said  adaptable in  the sense  that  they dynamically
414:   change the precision of  each intermediate operation individually to
415:   deliver  the target  accuracy according  to the  actual  inputs.  We
416:   present in this text a new adaptable numeric core inspired both from
417:   floating point expansions and from on-line arithmetic.
418:   
419:   The numeric core is cut down  to four tools.  The tool that contains
420:   arithmetic operations is proved to  be correct. The proofs have been
421:   formally checked  by the Coq  assistant.  Developing the  proofs, we
422:   have formally proved many results published in the literature and we
423:   have extended  a few of  them. This work  may let users  (i) develop
424:   application specific adaptable libraries  based on the toolset and /
425:   or (ii) write new formal proofs based on the set of validated facts.
426: }%
427: 
428: \RRIresume{%
429:   La plupart  des codes à  précision multiple existants  demandent que
430:   l'utilisateur spécifie la précision des calculs {\em a priori}.  Une
431:   solution est d'utiliser pour les calculs intermédiaires la précision
432:   la  plus  élevée  pour  atteindre  la  précision  demandée  pour  le
433:   résultat. On  dit que certaines bibliothèques  sont adaptatives dans
434:   le sens  qu'elles changent dynamiquement la précision  de chacun des
435:   calculs  intermédiaires  en  fonction  des  valeurs  effectives  des
436:   entrées pour atteindre la précision demandée pour le résultat.  Nous
437:   présentons  dans  ce  texte  un nouveau  noyau  numérique  adaptatif
438:   inspiré  à  la  fois  des  expansions de  nombres  flottants  et  de
439:   l'arithmétique en-ligne.
440:   
441:   Le noyau  numérique se décompose  en cinq outils.  Le  premier outil
442:   qui contient de nombreuses opérations arithmétiques est prouvé.  Les
443:   preuves  ont  été validées  formellement  par  l'assistant Coq.   En
444:   développant  les  preuves, nous  avons  redéveloppé formellement  de
445:   nombreux résultats déjà publiés dans la littérature et nous en avons
446:   étendu  certains.   Ce travail  permettra  aux  utilisateurs de  (i)
447:   développer   des  bibliothèques   adaptatives   spécifiques  à   une
448:   application basées  sur la  boîte à  outils et /  ou (ii)  écrire de
449:   nouvelles preuves formelles basées sur notre socle de faits validés.
450: }%
451: 
452: \RRIkeywords{Multiple  precision,  expansion,  on-line, formal  proof, Coq.}
453: 
454: \RRImotscles{Précision multiple, expansion, en-ligne, preuve formelle, Coq.}
455: 
456: \RRItheme{2}
457: 
458: \RRIprojet{Arénaire}
459: 
460: \RRIinria{\RRnote{%
461:     This  text  is  also  available   as  a  research  report  of  the
462:     Laboratoire    de     l'Informatique    du    Parallélisme    {\tt
463:       http://www.ens-lyon.fr/LIP}.
464: }}%
465: 
466: \begin{document} 
467: 
468: \RRIbegin
469: 
470: \newtheorem{proc}{Lemma}  
471: \newcommand{\ptys}[2]{{\noindent \bf{#1}}{\it{#2}}}
472: 
473: \title{%
474:   {\bf
475:     Computer validated proofs of \\
476:     a toolset for adaptable arithmetic}\thanks{%
477:     Authors  may be  reached via  e-mail  at Sylvie.Boldo@ENS-Lyon.Fr,
478:     Marc.Daumas@ENS-Lyon.Fr,       Claire.Moreau@ENS-Lyon.Fr       and
479:     Laurent.Thery@INRIA.Fr.  \RRIlip{This text  is also available as a
480:       research  report  of  the  Institut  National  de  Recherche  en
481:       Informatique et en Automatique {\tt http://www.inria.fr}.}
482:     }%
483: }%
484: 
485: \author{
486:   {\bf Sylvie Boldo, Marc Daumas, Claire Moreau-Finot} \\
487:   Laboratoire de l'Informatique du Parallélisme        \\
488:   UMR 5668 - CNRS - ENS de Lyon - INRIA                \\[24pt]
489:   {\bf Laurent Théry}                                  \\
490:   INRIA Nice---Sophia-Antipolis
491: }%
492: \date{}
493: 
494: \maketitle
495: \RRIpre
496: 
497: \section*{Introduction}
498: 
499: We will  first present examples of numerical  computations coming from
500: four  fields  of application.   With  each  example  we introduce  new
501: properties, new goals and some needed references.  As the subject will
502: become clear, we  will present the contributions of  this text and its
503: summary.
504: 
505: \subsection*{Motivation}
506: 
507: As an undergraduate student, we  have all learnt to compute a quantity
508: with a few significant digits first  as 5 miles is about 8 kilometers. 
509: If the question is ``Are 4.995 miles more than 8 kilometers?'', we are
510: close to  a threshold. To answer  the question, we  would compute more
511: (less significant) digits  to find that 4.995 miles  are a little over
512: 38  meters   longer  than   8  kilometers.   We   might  first   do  a
513: multiplication of 5 by 1.6  and then the exact multiplication of 4.995
514: by 1.609344.  However, we could save that 5 times 1.6 is exactly 8 and
515: afterward  just multiply  5 by  9  to obtain  45 meters  with a  small
516: additional  value  that is  less  than  5  meters.  That  answers  the
517: question since 0.005 miles is less than 10 meters.
518: 
519: Numerical  analysts have long  designed algorithms  that react  to the
520: accuracy aimed by the  user.  Countless specific algorithms reduce the
521: error on the final result by iterative refinement in solving algebraic
522: equations,  linear systems, ODEs,  PDEs and  so on  \cite{Hig96}. Such
523: methods are  so robust  that the final  solution can be  very accurate
524: even if the first attempt before any refinement is not close.
525: 
526: All  the  above-mentioned   solutions  assume  that  the  intermediate
527: arithmetic operations are performed up  to a fixed level of precision. 
528: This  level,  usually  the  one  given  by  machine  double  precision
529: arithmetic, is used for all  the operations and is sufficient to store
530: the result to  the accuracy aimed by the  user.  Recent work presented
531: in \cite{Hig96,SorTan91,Bar93}  proposes new enhanced  routines if two
532: levels of precision are available on the machine. The second level can
533: be obtained  by a careful use  of the tools presented  in this article
534: \cite{BolDau01}.  All  these algorithms use the  machine arithmetic as
535: an  error  prone  tool  that  must  be  circumvented  by  mathematical
536: analysis.
537: 
538: % For the solutions presented above, adaptability is defined at the {\em
539: %   application level}, we will see  in the remaining of this text, {\em
540: %   arithmetic level} adaptability.
541: 
542: \subsection*{Adaptability {\em versus} multiple precision}
543: 
544: A  numerical  algorithm  is   only  slightly  modified  to  accommodate
545: adaptable arithmetic or  multiple precision arithmetic.  With multiple
546: precision arithmetic, each operator is  fired only once and the result
547: is computed  with the precision  fixed at compile time.   Static error
548: analysis is  used to guarantee that  the final result is  known to the
549: accuracy specified by  the user.  Being static, the  error analysis is
550: often too pessimistic.  As it is readily available, multiple precision
551: arithmetic is often chosen by users.
552: 
553: When  a code  has  been  modified to  become  adaptable, every  single
554: operator is started with a very limited precision.  The runtime system
555: freezes the operator but it is able to resume its work when necessary.
556: The  program is  not ended  unless the  final result  is known  to the
557: accuracy specified by the user. The evaluation continues until this is
558: the  case,  with  the  system  increasing  automatically  the  working
559: precision of each operator independently.
560: 
561: 
562: Coherency  is key  to the  field of  computational geometry  where one
563: single tiny error  may change a convex hull  to a non-convex, possibly
564: non-planar,  graph.   Numerical  routines  are  used  to  answer  such
565: predicates as ``Is  a point in a circle defined by  3 other points?''. 
566: A Boolean answer cannot be  approximated, it is either right or wrong,
567: and one single wrong answer might  lead a program to an abstract state
568: that cannot  occur in Euclidian geometry.  Recent  works have proposed
569: clever    implementations    of    multiple    precision    arithmetic
570: \cite{ForVan93,KarLiPecYap99}  to  answer  correctly such  questions.  
571: They reinforce the existing  set of general purpose multiple precision
572: libraries \cite{Bre78a,SerVuiHer89,Bai93,Bai95,Gra2Kb}.  Some marginal
573: work does implement iterative  refinements of the geometric predicates
574: \cite{AvnBoiDevPreYvi97}  or change  the algorithm  to  work correctly
575: with limited predicates \cite{BoiPre2K}.
576: 
577: Adaptability is used for our last example. It is the central scheme of
578: Ziv's paper \cite{Ziv91} where the author implements correctly rounded
579: elementary functions (circular and exponential).  Correct rounding for
580: the  floating   point  numbers   is  mathematically  defined   by  two
581: international      standards      (ANSI-IEEE-ISO~754      and      854
582: \cite{Ste.81,Ste.87,Gol91,CodKar.84}).  It  is a monotonous projection
583: from the  real set to the set  of the floating point  numbers.  If all
584: the  numbers of  a given  interval round  to the  same  floating point
585: value,  you can  safely  round this  interval  to the  common result.  
586: Otherwise, you cannot round the interval.
587: 
588: An approximated algorithm such as  the ones used for the evaluation of
589: the elementary  functions \cite{Mul97}  does not produce  directly the
590: rounded result  but it  computes an approximated  result and  an error
591: bound.  This pair defines an interval that contains the exact result.
592: \begin{itemize}
593: \item  If the interval  is narrow,  it rounds  to one  single floating
594:   point  value  that is  the  correctly  rounded  value of  the  exact
595:   elementary  function.
596: \item If we discover from  the current approximation that the interval
597:   contains a  discriminating point, the approximation  must be refined
598:   to  reduce its  width.  This  process continues  until  a satisfying
599:   approximation is obtained to round the interval to one single value.
600:   The set of the discriminating  point is the set of the representable
601:   numbers for the directed roundings and the set of the midpoints when
602:   rounding to the nearest.
603: \end{itemize}
604: 
605: With the first  iteration of Ziv's scheme, machine  precision is used. 
606: For the  result interval to  get sharper, the approximation  scheme is
607: only slightly modified  but the target precision is  increased and the
608: arithmetic  operations become  increasingly slow.  Cases where  a high
609: precision result  is needed are  very infrequent and the  running time
610: seen by  a user will most  probably be the  time for the first  or the
611: second iteration.
612: 
613: Adaptable behaviour  is sometime recreated with Mathematica  or Maple. 
614: These two softwares let the user change the working precision and they
615: provide some criteria to estimate the final accuracy of many numerical
616: routines \cite{ChaGedGonMonWat91}.  Users set  the number of digits of
617: the intermediate precision after a few educated random attempts.
618: 
619: \subsection*{Prior art}
620: 
621: \paragraph{Adaptability}
622: 
623: Some  few  authors  already  presented  a  general  purpose  adaptable
624: arithmetic                                                      library
625: \cite{Boe87,Men94,Men95,BerDau97,MicMor97,BurFleMehSch99},   but  some
626: others stopped with one or two adaptable attempts followed by an exact
627: evaluation   \cite{ForVan93,She97}.     These   last   solutions   are
628: appropriate in many cases because  the first few attempts are fast and
629: they succeed  by far in the  most frequent case  for many applications
630: \cite{DevPre98}. These authors where  forced to stop because they used
631: a  data structure  that  is not  appropriate  for adaptability.   They
632: forced  one  or  two  rounds  of  adaptability  with  great  efforts.  
633: 
634: \paragraph{Floating point expansion}
635: 
636: This  data structure was  introduced by  Priest \cite{Pri91}  based on
637: some earlier  operations \cite{Knu69,Knu73}.  Actually,  M{\o}ller and
638: Dekker were the first in the past to propose such techniques to extend
639: precision  on  the  floating  point unit  \cite{Mol65a,Mol65b,Dek71}.  
640: Kahan  and  Pichat  applied  similar  techniques  for  other  purposes
641: \cite{Kah65,Pic72}.
642: 
643: 
644: Fast algorithms, tightly connected  to the machines, are possible with
645: IEEE-754 compatible commercial floating point units.  Shewchuk was the
646: first one  to present a working  library available on the  net with an
647: actual  application  to  computational  geometry  \cite{She96,She97}.  
648: Assuming that  the floating point unit  is IEEE compatible  gives us a
649: powerful set of axiomatic properties on floating point operations.  We
650: have proposed in the past some algorithms and their implementation for
651: multiplication        and         division        on        expansions
652: \cite{Dau99a,DauFin98,DauFin99a}.  Developments are still undergone as
653: new research teams get  interested by this approach \cite{Sei2K}.  Our
654: contribution forces  us to define a  new kind of  expansions.  We call
655: them pseudo-expansions.
656: 
657: % This     definition    yields     a     more    redundant     notation
658: % \cite{Mat82,NieKor99}.  Although  the proof of  \cite{Maz93a} does not
659: % apply here, it is sensible  that more redundancy in the notation eases
660: % the implementation  of most significant digit  first (MSDF) arithmetic
661: % \cite{Avi61,Mul94a,Fro91}.
662: 
663: \paragraph{On-line arithmetic}
664: 
665: We will  present in  this text that  we can  reduce the growth  of the
666: running time by setting up an appropriate data structure that will not
667: restart  an  adaptable evaluation  from  scratch  as the  intermediate
668: precision is modified.
669: 
670: This  data   structure  is   inspired  by  on-line   arithmetic  where
671: adaptability is natural.  On line  arithmetic was targeted in the past
672: to   hardware  design   with  small   radices  (typically   2   or  4)
673: \cite{TriErc77,CorDupHocMul91,LouErc93,DauMulVui94,DauMulTis97} but it
674: has    proved    to   be    suitable    for   software    applications
675: \cite{MazMer93,BerDau97}.
676: 
677: \subsection*{Contributions}
678: 
679: The first contribution  of this work is a  set of primitives available
680: soon  on   the  Internet  through  our   home  web  site\footnote{{\tt
681:     http://www.ens-lyon.fr/LIP/Arenaire/}.}   and   through  the  {\sc
682:   netlib} repository\footnote{{\tt  http://www.netlib.org/} among many
683:   access protocols.}.  A toy example shows that high level programming
684: with  object  interface  is  too  slow for  the  kernel  of  numerical
685: softwares \cite{Gog2K}.   We ran the  same FLOPS routines  three times
686: \cite{Don92,Don99}.  The first run was performed with object automatic
687: allocation and destruction.  The second run used in-line function calls
688: and no object allocation.  The final code used explicit code in-lining.
689: Removing  object programming  increased the  performance of  133\% and
690: using code  in-lining yielded  another 56\%.  Our  contributed building
691: blocks are pieces  of code assembled by the  designer according to its
692: needs.   They present a  straight interface  to get  a good  trade off
693: between simplicity and speed.
694: 
695: The second  contribution is a  set of validated proofs.   To mechanise
696: our    proofs,     we    have    been    using     the    Coq    proof
697: assistant~\cite{HueKahPau97}.   Systems  like Coq  allow  the user  to
698: define  new objects and  to derive  consequences of  these definitions
699: formally. The language of Coq  is based on a higher-order logic.  With
700: such an expressive logic, it  is possible to state properties in their
701: most  general form.   For example,  universal quantification  has been
702: used to state  properties that are true for an  arbitrary format or an
703: arbitrary  rounding  mode.    Proofs  are  built  interactively  using
704: high-level  tactics. At the  end of  each proof,  Coq records  a proof
705: object that  contains all  the details of  the derivation  and ensures
706: that  the  theorem  is   valid.  Theorem  provers  have  already  been
707: successfully   used   to  mechanically   check   the  correctness   of
708: floating-point algorithms~\cite{Rus98,Har97a}.
709: 
710: Our    formal    development    is    freely    available    on    the
711: Internet\footnote{{\tt  http://www-sop.inria.fr/lemme/AOC/coq/}.}  and
712: it  will  soon  be   available  as  a  Coq  contribution\footnote{{\tt
713:     http://coq.inria.fr/}.}.  A clickage map of the hierarchy makes it
714: possible to browse  into the different components.  At  the moment, it
715: is 22000 line  long and it contains 106 Definitions  and 545 Theorems. 
716: No proof is presented in detail in this manuscript, but we outline the
717: main results  used for the  proofs. Since the proofs  are mechanically
718: validated, the  reader is  sure that the  annoying special  cases have
719: been checked and that all the conditions of the properties used in the
720: proof have been met.  \RRIyes{All the Definitions and Theorems of this
721:   development  are  presented  in  annexes  A through  W.}   For  each
722: mentioned  theorem  we give  its  name as  it  appears  in the  formal
723: development and the file where it is proved.
724: 
725: We first present definitions and validated properties for the floating
726: point numbers,  the expansions  and the pseudo-expansions.   Section 2
727: presents  the  addition, multiplication  and  division toolset.   This
728: presentation ends with concluding remarks in the last Section.
729: 
730: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
731: 
732: \newcommand{\rr}{\mathbb{R}}
733: \newcommand{\nn}{\mathbb{N}}
734: \newcommand{\ideal}[1]{<\!\!#1\!\!>}
735: \newcommand{\lcm}[2]{#1\!\hbox{\^{}}#2}
736: 
737: \newlength{\ppwd}
738: \newlength{\ppht}
739: \newcommand{\rpp}[1]{
740: \settowidth{\ppwd}{#1}
741: \settoheight{\ppht}{#1}
742: \raisebox{\ppht}[0pt]{\makebox[0pt][l]{
743: \hspace{.15\ppwd}{\tiny \tt >}}}#1}
744: \newcommand\pp{\rpp{+}}
745: \newcommand{\rppa}[1]{
746: \settowidth{\ppwd}{#1}
747: \settoheight{\ppht}{#1}
748: \raisebox{\ppht}[0pt]{\makebox[0pt][r]{
749: {\tiny \tt >}\hspace{-5pt}}}#1}
750: \newcommand\ppa{\rppa{+}}
751: %\newcommand\pp{\hbox{\raise6pt\hbox{${\hbox{\raise-5pt\hbox{\tiny \tt >}}}\atop+$}}}
752: \newcommand{\limp}{\Rightarrow}
753: \newcommand{\reduce}{\rightarrow^{\ }_S}
754: \newcommand{\reducep}{\rightarrow^+_S}
755: \newcommand{\reduces}{\rightarrow^*_S}
756: \newcommand{\mon}{M_n}
757: \newcommand{\lm}{\le_{M_n}}
758: \newlength{\hsbw}
759: \newcommand\MSpacing{13pt}
760: \newenvironment{cboxed}{\begin{flushleft}
761: \setlength{\hsbw}{\textwidth}
762: \addtolength{\hsbw}{-\arrayrulewidth}
763: \addtolength{\hsbw}{-\tabcolsep}
764:  \begin{tabular}{@{}|c@{}|@{}}\hline 
765:  \begin{minipage}[b]{\hsbw}
766:  \vspace*{.06in}
767:  \begingroup\small\baselineskip\MSpacing}{\endgroup\end{minipage}\\ \hline 
768:  \end{tabular}
769:  \end{flushleft}}
770: \newcommand{\term}{{\it term}}
771: \newcommand{\pol}{{\it poly}}
772: \newcommand{\grob}{\hbox{\it Gröbner}}
773: \newcommand{\cset}{{\it Set}}
774: \newcommand{\cprop}{{\it Prop}}
775: \newcommand{\cfun}{\rightarrow}
776: \newcommand{\zerop}[1]{{\it zeroP}(#1)}
777: \newcommand{\lpol}{({\it list}\, \pol)}
778: \newcommand{\lterm}{({\it list}\, \term)}
779: \newcommand{\cano}{\mathcal{C}}
780: \newcommand{\olist}{\mathcal{O}}
781: \newcommand{\cforall}[3]{\forall #1\!\!:#2.\, #3}
782: \newcommand{\cforalln}[2]{\forall #1\!\!:#2}
783: \newcommand{\clambda}[3]{\lambda #1\!\!:#2.\, #3}
784: \newcommand{\cexists}[3]{\exists #1\!\!:#2.\, #3}
785: \newcommand{\cin}[2]{#1\,\,{\it in}\,#2}
786: \newcommand{\cdef}[3]{\hbox{{\bf Definition}$\,\, #1{\bf :}\, #2\,{\bf :=}\, \,#3$.}}
787: \newcommand{\cdefq}[2]{\hbox{{\bf Definition}$\,\, #1\,{\bf :=}\, \,#2$.}}
788: \newcommand{\cdefqn}[2]{\hbox{{\bf Definition}$\,\, #1\,{\bf :=}\, \,#2$}}
789: \newcommand{\cdeffn}[3]{\hbox{{\bf Definition}$\,\, #1{\bf :}\, #2\,{\bf :=}\,\,  #3$}}
790: \newcommand{\cdefp}[2]{\hbox{{\bf Definition}$\,\, #1\, {\bf :}\, \, #2$.}}
791: \newcommand{\cdefpn}[2]{\hbox{{\bf Definition}$\,\, #1\, {\bf :}\, \, #2$}}
792: \newcommand{\cdefrn}[2]{\hbox{{\bf Definition}$\,\, #1\, {\bf :}\, \, #2\,{\bf :=}$}}
793: \newcommand{\cthm}[2]{\hbox{{\bf Theorem}$\,\,  #1 {\bf :}\, #2$.}}
794: \newcommand{\cthmn}[2]{\hbox{{\bf Theorem}$\,\,  #1 {\bf :}\, #2$}}
795: \newcommand{\cthmrn}[1]{\hbox{{\bf Theorem}$\,\,  #1 {\bf :}$}}
796: \newcommand{\clem}[2]{\hbox{{\bf Lemma}$\,\, #1{\bf :}\quad  #2$.}}
797: \newcommand{\clemn}[2]{\hbox{{\bf Lemma}$\,\, #1{\bf :}\quad #2$}}
798: \newcommand{\cdefn}[2]{\hbox{{\bf Definition}$\,\, #1{\bf :}\quad  #2\, {\bf :=}$}}
799: \newcommand{\cfix}[3]{\hbox{{\bf Fixpoint}$\,\, #1\,{\bf [}#2{\bf ]:}\quad  #3\, {\bf :=}$}
800: }
801: \newcommand{\cfixn}[4]{\hbox{{\bf Fixpoint}$\,\, #1\,{\bf [}#2{\bf ]:}\quad  #3\, {\bf :=} #4$}
802: }
803: \newcommand{\caxiom}[2]{\hbox{{\bf Axiom}$\,\, #1{\bf :}\quad  #2$.}}
804: \newcommand{\caxiomn}[2]{\hbox{{\bf Axiom}$\,\, #1{\bf :}\quad  #2$}}
805: \newcommand{\cparam}[2]{\hbox{{\bf Parameter}$\,\, #1{\bf :}\quad  #2$.}}
806: \newcommand{\cparamn}[2]{\hbox{{\bf Parameter}$\,\, #1{\bf :}\quad  #2$}}
807: \newcommand{\ccas}[1]{\hbox{$\quad${\bf Cases}$\,\, #1\,\,{\bf of}$}}
808: \newcommand{\cend}{\hbox{$\quad${\bf end}}}
809: \newcommand{\cp}[1]{\hbox{#1.}}
810: \newcommand{\ccasf}[2]{\hbox{$\quad \ \, #1\,\, {\bf \Longrightarrow}\,\, #2$}}
811: \newcommand{\ccasi}[2]{\hbox{$\quad |\, #1\,\, {\bf \Longrightarrow}\,\, #2$}}
812: \newcommand{\clet}[2]{\hbox{$\quad${\bf let }$\, #1=#2\, {\bf in}$}}
813: \newcommand{\clin}[1]{\hbox{$\quad #1$}}
814: \newcommand{\cvar}[2]{\hbox{{\bf Variable}$\,\, #1{\bf :}\, #2$.}}
815: \newcommand{\chyp}[2]{\hbox{{\bf Hypothesis}$\,\, #1{\bf :}\, #2$.}}
816: \newcommand{\chypn}[2]{\hbox{{\bf Hypothesis}$\,\, #1{\bf :}\, #2$}}
817: \newcommand{\cind}[2]{\hbox{{\bf Inductive}$\,\, #1{\bf :}\, #2 :=$}}
818: \newcommand{\cindu}[3]{\hbox{{\bf Inductive}$\,\, #1[#2]{\bf :}\, #3 :=$}}
819: \newcommand{\cicn}[2]{\hbox{${\bf |}\quad #1\,{\bf :}\,#2$}}
820: \newcommand{\cicnf}[2]{\hbox{$\ \quad #1\,{\bf :}\,#2$}}
821: \newcommand{\cicnl}[2]{\hbox{${\bf |}\quad #1\,{\bf :}\,#2$.}}
822: \newcommand{\cicnfl}[2]{\hbox{$\ \quad #1\,{\bf :}\,#2$}}
823: \newcommand{\cic}[1]{\hbox{${\bf |}\quad #1$}}
824: \newcommand{\cicf}[1]{\hbox{$\ \quad #1$}}
825: \newcommand{\cicl}[1]{\hbox{${\bf |}\quad #1$.}}
826: \newcommand{\cicfl}[1]{\hbox{$\ \quad #1$.}}
827: 
828: \section{Definitions and validated properties}
829: 
830: \subsection{Floating point numbers}
831: 
832: An IEEE double precision floating  point number is built from 3 binary
833: fields:  the sign  (1  bit), the  fraction  (52 bits)  and the  biased
834: exponent  (11 bits).   Its  normal interpretation  $x$  as a  rational
835: number is given below.
836: $$
837: \begin{array}{r c l}
838:   \text{mantissa} & = & 1.\text{fraction} \\
839:   \text{exponent} & = & \text{biased exponent} - \text{bias} \\
840:                x  & = & (-1)^{\text{sign}} \times \text{mantissa} \times 2^{\text{exponent}}
841: \end{array}
842: $$
843: 
844: %\figtex{fp}{Binary fields of a floating point number.}
845: 
846: We  will use  in this  text  the more  general notation  $x= n  \times
847: \beta^{e}$ to define a floating point number of radix $\beta$ with two
848: integers, the  significand $n$ and  the amplitude $e$.   This relation
849: gives a  signification to  any pair  $(n, e)$.  We  will use  the name
850: floating point  number or {\bf float}  to represent such  a pair.  Two
851: floats $(n,  e)$ and $(n', e')$  are equivalent if they  have the same
852: value as real  numbers, we write $(n, e) \equiv  (n', e')$.  We extend
853: this notation to the represented real value and we may write $n \times
854: \beta^e  \equiv  (n  ,e)$.   We  will also  use  the  equivalence  for
855: expression whenever there is no ambiguity.  No order is defined on the
856: floats and inequalities are given implicitly on their interpretation.
857: 
858: Given a  pair $(n_{\max}, e_{\min}) \in  \N_*^2$, we say  that a float
859: $(n, e)$ is {\bf bounded} if and only if it satisfies
860: \begin{equation}
861: \label{eqn/bounded}
862:   |n| \le n_{\max}
863: ~~~~~ \text{and} ~~~~~
864:   - e_{\min} \le e.
865: \end{equation}
866: 
867: We do  not use  the overflow bound  of $e_{\max}$.  The  IEEE standard
868: defines an overflow  when the rounded value of  the result exceeds the
869: largest  representable  number.   It  implies that  the  rounding  is
870: defined  on a data  type that  does not  have an  upper bound  for the
871: exponent.   All the  results of  this text  are true  provided neither
872: overflow  nor   exact  infinity  (division  by  0)   occurred  in  the
873: computation.  If this is not the case, infinity and {\em not a number}
874: quantities will proliferate in the results.
875: 
876: The bound for an IEEE standard inspired representation with $p$ digits
877: of mantissa and $r$ digits  of exponent is given Table~\ref{tab/ieee}. 
878: We will use the common fraction notation for the examples although the
879: integer  notation  is  used  for   the  proofs.   We  use  only  radix
880: independent generic IEEE standard inspired  bounds in the rest of this
881: text.   A  bounded  float is  {\bf  normal}  if  $|n| \times  \beta  >
882: n_{\max}$.  It is {\bf de-normal} if $|n| \times \beta \le n_{\max}$. A
883: de-normal float such that $e = -e_{\min}$ is called {\bf subnormal}. We
884: define the  {\bf ulp} value as  one unit in  the last place of  1 that
885: equals $\beta / (n_{\max} + 1)$.
886: 
887: \begin{table}
888: \caption{IEEE standard inspired bounds}
889: \label{tab/ieee}
890: \begin{center}
891: \begin{tabular}{| c | c | c | c |}\hline
892:            & Radix independent                            & \mc{2}{| c |}{$\beta = 2$}                     \\
893:            & (Generic)                                    & Single                & Double                 \\ \hline   
894: $p$        & Mantissa width                               & 24                    & 53                     \\
895: $n_{\max}$ & $\beta^p - 1$                                & 16~777~215            & 9~007~199~254~740~991  \\
896: $r$        & Exponent width                               & 8                     & 11                     \\
897: bias      & $\lceil  \beta^r / 2 \rceil  - 1$            & 127                   & 1023                   \\
898: $e_{\min}$ & $\lceil  \beta^r / 2 \rceil  + p - 3$        & 149                   & 1074                   \\
899: $e_{\max}$ & $\lfloor \beta^r / 2 \rfloor - p$            & 124                   & 971                    \\
900: ulp        & $\beta^{1 - p}$                              & $1.19 \times 10^{-7}$ & $2.22 \times 10^{-16}$ \\ \hline
901: \end{tabular}
902: \end{center}
903: \end{table}
904: 
905: \begin{theo}[FnormalizeCanonic and FcanonicUnique in Form]
906:   ~ Any bounded float has one unique equivalent float $(n, e)$ that is
907:   normal or subnormal.  This float  is the direct transcription of the
908:   number in machine with
909:   $$
910:   \begin{array}{r c l}
911:     n & = &  \text{mantissa} \times \beta^{p - 1}            \\
912:     e & = & (\text{biased exponent} - \text{biais}) - p + 1. \\
913:   \end{array}
914:   $$%
915:   It  is  later   referred  in  this  text  as   the  {\bf  canonical}
916:   representation. 
917: \end{theo}
918: 
919: The IEEE  standard describes four  rounding modes but the  rounding to
920: the nearest floating point number is the rounding mode used by default
921: in most  computers.  A  rounding is generally  defined as  a monotonous
922: projection onto the set of  the canonical floats.  Considering all the
923: floats rather than just the  canonical ones, we would rather define it
924: as a projection onto subsets of bounded floats.
925: 
926: For  any real  number  $x$, we  define  its bounded  floor $\lfloor  x
927: \rfloor$ as the class of the  maximum bounded floats smaller than $x$. 
928: It means  that a bounded float $(n,  e)$ is in $\lfloor  x \rfloor$ if
929: and only if, for all the bounded floats $(n', e')$ 
930: $$
931: (n', e') \le x ~~~~~ \Longrightarrow ~~~~~ (n', e') \le (n, e)
932: $$%
933: We define identically its bounded  ceil $\lceil x \rceil$ as the class
934: of  the  minimum  bounded  floats  larger than  $x$.   We  define  the
935: truncated subset $F(x)$ as follows.
936: $$
937: F(x) = \left\{\begin{array}{l c}
938:   \lfloor x \rfloor                     & x > 0 \\
939:   \lceil  x \rceil                      & x < 0 \\
940:   \{ (0, e), ~~ -e_{\min} \le e \}      & x = 0 \\
941: \end{array}\right.
942: $$
943: 
944: We also define the class of the bounded floats nearest to $x$ as $\Box
945: (x)$  with $(n,  e)$ is  in $\Box  (x)$ if  and only  if, for  all the
946: bounded floats $(n', e')$
947: $$
948: | (n, e) - x | \le | (n', e') - x|
949: $$
950: 
951: When $\Box  (x)$ contains two distinct equivalence  classes of bounded
952: floats,  we  define $\circ(x)$  as  the class  of  $\Box  (x)$ of  the
953: elements where  the canonical representation has an  even significand. 
954: In other cases,  $\circ(x) = \Box (x)$.  The  following theorem states
955: that this definition is appropriate.
956: 
957: \begin{theo}[*RoundedModeP in Fround and Closest]
958:   ~ The  relations rounding down  $\lfloor \cdot \rfloor$,  up $\lceil
959:   \cdot \rceil$, to zero $F  (\cdot)$ and to the nearest (even) $\circ
960:   (\cdot)$ define  each a total  monotonous projection from  the reals
961:   onto the  bounded floats compatible  with the interpretation  of the
962:   floats.
963: \end{theo}
964: 
965: We now give a few results on the rounding to the nearest.  The theorem
966: could be defined  on the canonical representation but  it is better to
967: prove it for any bounded float.
968: 
969: \begin{theo}[ClosestErrorBound in ClosestProp]
970:   \label{theo/Round/Error}% 
971:   Let $x$ be any real number and $(n, e)$ any float of $\Box (x)$,
972:   $$
973:   |x - n \beta^e| \le \beta^e  / 2.
974:   $$
975:   That relation  is independent of the rounding being  even or not. 
976:   Even rounding provides that if both  $(n, e) \in \circ(x)$ and $|x -
977:   (n, e)| \equiv \beta^e / 2$ then $n$ is even.
978: \end{theo}
979: 
980: The following theorem gives a relation for every representation of the
981: rounded value and the error provided that the error is non zero and it
982: does have  a bounded representation.  The universal  quantifier is the
983: key to prove the theorems~\ref{theo/TS/Exp} and \ref{theo/M/Exp}.
984: 
985: \begin{theo}[RoundedModeErrorExpStrict in FRoundProp]
986:   \label{theo/Round/Error/Strict}
987:   Given an arbitrary rounding mode, let  $x$ be a real number and $(n,
988:   e)$  a  bounded  float  rounded  from  $x$, such  that  $x$  is  not
989:   represented by  $(n, e)$  ({\em ie.}  $x  \neq n \beta^e$).  For all
990:   bounded  float  $(n',  e')$ that  is  a  representation  of $x  -  n
991:   \beta^e$,
992:   $$
993:   e' < e.
994:   $$
995:   The bound  is tight as $(n', e')  = (1, e - 1)$  is an acceptable
996:   round-off error.
997: \end{theo}
998: 
999: The  result of  any implemented  operation, namely  the  addition, the
1000: multiplication, the  division and the  square root extraction,  is the
1001: rounded result  of the exact mathematical operation.   For example, if
1002: $a \oplus b$ is  the machine addition then $a \oplus b  \in \circ (a +
1003: b)$.  The machine  operation could  have  been defined  to return  the
1004: canonical  float  in  $\circ(a  +  b)$.  We  preferred  to  define  an
1005: acceptable implementation as a  function that return any bounded float
1006: in $\circ(a + b)$.
1007: 
1008: When a theorem is independent of the tie breaking rule implemented, we
1009: use the boxed symbols  $\boxplus$, $\boxminus$ and $\boxtimes$ for the
1010: addition, the  subtraction and the  multiplication in the  hypothesis. 
1011: Figure~\ref{fig:fpop}  presents  the   symbol  of  the  four  standard
1012: floating point operators used in  this work.  All the figures use even
1013: rounding as no other implementation exists.
1014: 
1015: \figtex{fpop}{Standard  floating  point   operations  rounded  to  the
1016:   nearest (even) value.}
1017: 
1018: % Given  a single  or  double precision,  we  can define  the {\bf  ulp}
1019: % function that stands  for the weight of one unit in  the last place of
1020: % $x$ and that is the weight of the last bit of the mantissa of $x$. Any
1021: % non  zero  normalized  bounded  floating point  number  $x$  satisfies
1022: % $\omega (x)  \ge ulp  (x) = \alpha  (x) ulp  (1)$.  We will  later use
1023: % these equations  and the fact  that $\alpha$ and $ulp$  are monotonous
1024: % functions.  The last equality  does not hold for denormalized numbers,
1025: % yet treating these  numbers is beyond our goal in  this work.  For any
1026: % non zero floating point number, we define $m(x)=\frac{x}{\alpha(x)}$ ;
1027: % $m(x)$ is the mantissa of $x$. We know that $|m(x)| \in [1,2-ulp]$.
1028: 
1029: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1030: 
1031: \subsection{Exact operations}
1032: 
1033: We used for our proofs of the exact operation a result first published
1034: in \cite{Ste74}. It was later  presented in \cite{Gol91} for the radix
1035: 2 notation.  This fact is true for any bounded representation with any
1036: radix.
1037: 
1038: \begin{theo}[Sterbenz in Fprop]
1039:   Given two  bounded floats $x$ and $y$  such that ${y \over  2} \le x
1040:     \le 2  y$, the rational  $x - y$  can be represented by  a bounded
1041:     float.
1042: \end{theo}
1043: 
1044: We immediately prove  the next result from its  negation after setting
1045: $\beta$ to be 2. We use it later in our proofs.
1046: 
1047: \begin{theo}[plusClosestLowerBound in Closest2Plus]
1048:   Given two bounded floats radix 2, $x$ and $y$, such that $x + y \neq
1049:   x \oplus y$, we know that $|x \oplus y| \ge \max (|x|, |y|) / 2$.
1050: \end{theo}
1051: 
1052: M{\o}ller  first,  then  Knuth   proposed  the  common  exact  sum  of
1053: Figure~\ref{fig:badd}-(a) involving  4 floating point  additions and 2
1054: floating point subtractions on an IEEE compliant computer.  The result
1055: is a pair of  floats $(a', b')$ such that $a' = a  \oplus b$ and $a' +
1056: b' \equiv a  + b$.
1057: 
1058: \figtex{badd}{Exact sum}
1059: 
1060: We have proved this algorithm  correct (Theorem {\bf Knuth in TwoSum})
1061: using  the proof given  by Priest  in~\cite{Pri91}.  Actually  we will
1062: never use  directly the algorithm but  only the property  that the sum
1063: and the  rounding error  can be represented.   This is  proved picking
1064: bounded representations that have special properties:
1065: 
1066: % We did not  validate this long published algorithm, but  we set up its
1067: % specification  and   we  deduced  the  following   properties  on  its
1068: % outputs\footnote{In  fact, the  correct behavior  of the  algorithm is
1069: %   proved for radix 2 in file {\bf TwoSum} although it is not necessary
1070: %   in  the course  of this  work \cite{DauRidThe01}.}.   Since  we only
1071: % prove that there exists  one bounded representation, the theorem gives
1072: % some properties of this special representation.
1073: 
1074: % \begin{theo}[errorBoundedPlus in ClosestPlus]
1075: %   \label{theo/TS/exists}
1076: %   Given two bounded floats $a =  (n_a, e_a)$ and $b = (n_b, e_b)$, for
1077: %   any $a' \in a \boxplus b$, there exists $n'$ such that $(n', e')$ is
1078: %   a bounded float with
1079: %   $$
1080: %     a' + (n', e') \equiv a + b
1081: %     ~~~~~ \text{and} ~~~~~
1082: %     e' = \min (e_a, e_b).
1083: %   $$
1084: % \end{theo}
1085: 
1086: %The rounded  sum $a \oplus  b$ and  the additional error  $a + b  - (a
1087: %\oplus b)$  are both  integer multiple of  $\beta^l$ where $l$  is the
1088: %lowest of  the ulps  of $a$  and $b$.  No  significant bit  is created
1089: %right of the rightmost non zero  significant bit of the inputs and the
1090: %ulp of the sum is limited as we can see in the next fact.
1091: 
1092: \begin{theo}[plusExactExp in ClosestPlus]
1093:   \label{theo/TS/Exp}
1094:   Given two bounded floats $a =  (n_a, e_a)$ and $b = (n_b, e_b)$, for
1095:   all $a' \in a \boxplus b$, we  can define $(n, e) \equiv a'$ and $b'
1096:   = (n', e')$, bounded floats, such that
1097:   $$
1098:     a' + b' \equiv a + b
1099:     ~~~~~ \text{and} ~~~~~
1100:     \min (e_a, e_b) = e' \le e \le \max (e_a, e_b) + 1.
1101:   $$
1102: \end{theo}
1103: 
1104: This theorem uses the Theorem~\ref{theo/Round/Error} and Theorems {\bf
1105:   errorBoundedPlus}, {\bf  plusExpBound} not presented  here to define
1106: an    existing    representation    of    $a'$    and    $b'$.     The
1107: theorem~\ref{theo/Round/Error/Strict} is used  to connect both results
1108: since it is true for any  representation of $a'$ and $b'$. The special
1109: case where $b' = 0$ is handled independently.
1110: 
1111: % \begin{theo}[plusExact1 and plusExact1 in FroundPlus]
1112: %   Given two  bounded representations $(n_a, e_a)$ and  $(n_b, e_b)$ of
1113: %   the floating  point numbers $a$ and  $b$. Let $(n, e)$  be a bounded
1114: %   representation of $\circ  (a + b)$.  The rounded  sum is exact, {\em
1115: %     id}  $\circ (a  + b)  = a  +  b$, when  any one  of the  following
1116: %   conditions is satisfied:
1117: %   \begin{itemize}
1118: %   \item $e \le  \min (e_a, e_b)$.
1119: %   \item $(n_a, e_a)$ is canonical and $e < e_a - 1$
1120: %   \end{itemize}
1121: % \end{theo}
1122: 
1123: The last Theorem prompts the fact  that the addition is accurate up to
1124: one unit  in the last  place. Such result  is needed to show  that the
1125: numerical tools of the next Section actually do some work.
1126: 
1127: \begin{theo}[plusErrorBoundUlp in ClosestPlus]
1128:   \label{theo/work}
1129:   Given two bounded floats $a$ and  $b$, for all $a' \in a \boxplus b$
1130:   such that $a' \not\equiv 0$, we know that
1131:   $$
1132:   |a + b - a'| \le       |a'|      {\ulp  \over 2}
1133:                \le \max (|a|, |b|)  \ulp.
1134:   $$%
1135:   The last  inequality is true only  working radix 2. It  is proved in
1136:   theorem plusErrorBound2 in file Closest2Plus.
1137: \end{theo}
1138: 
1139: It has also  been proved by M{\o}ller and  presented in Knuth's second
1140: edition that one can get a correct pair $(a', b')$ by an early exit of
1141: the  exact  sum  provided  $|b|  \le |a|$.   The  conditional  sum  of
1142: Figure~\ref{fig:badd}-(b) returns an exact pair $(a', b'') \equiv (a',
1143: b')$  with only  2 floating  point  additions and  one floating  point
1144: subtraction.
1145: 
1146: Knuth suggested that the early exit  is still valid if $a$ and $b$ are
1147: both canonical  and share the same  exponent.  We prove  here a result
1148: rephrased from \cite{Dau99a} that $a$  and $b$ just have to be bounded
1149: and the exponent be in order.
1150: 
1151: \begin{theo}[ExtDekker in EFast2Sum]
1152:   On a  radix 2 IEEE inspired  floating point unit,  given two bounded
1153:   floats $a  = (n_a, e_a)$ and  $b = (n_b, e_b)$,  the conditional sum
1154:   presented Figure~\ref{fig:badd}-(b) returns an exact pair $(a \oplus
1155:   b, a + b - a \oplus b)$ provided
1156:   $$e_b \le e_a.$$
1157: \end{theo}
1158: 
1159: This  theorem is  proved only  for radix  two notations  as it  is not
1160: necessarily  correct for other  radices.  An  example where  the early
1161: exit is  not correct radix 10 is  given for $n_{\max} =  99$ with both
1162: inputs  being  $9.9$.   The theorem  is  correct  for  radix 2  and  3
1163: \cite{Knu73}. One can  check one last example radix  4: $n_{\max} = 3$
1164: and both inputs are 3.
1165: 
1166: An exact multiplication  is also available.  It computes  a pair $(a',
1167: b')$ such that  $a' = a \otimes b$ and  $a' + b' = a  \times b$ with 7
1168: floating  point  multiplications, 5  floating  point  additions and  5
1169: floating  point   subtractions.   These  operators   are  surveyed  in
1170: \cite{Dau99a}.   Here are  some  properties obtained  from the  desired
1171: specification of $a'$ and $b'$, not from the algorithm.  The condition
1172: on the exponent is not necessary but it is sufficient to discard cases
1173: of underflow.
1174: 
1175: \begin{theo}[multExactExpCan in ClosestMult]
1176:   \label{theo/M/Exp}
1177:   Given two bounded floats $a = (n_a, e_a)$ and $b = (n_b, e_b)$, with
1178:   $e_a +  e_b \ge -e_{\min}  + p$, for  all $a'$ rounded to  a nearest
1179:   float of $a \times b$, we  can define $(n, e) \equiv a'$ canonic and
1180:   $b' = (n', e')$ bounded float, such that
1181:   $$
1182:     a' + b' \equiv a \times b
1183:     ~~~~~ \text{and} ~~~~~
1184:     e' = e - p.$$
1185: \end{theo}
1186: 
1187: It was proved  that comparable error quantities, that  always fit in a
1188: common  floating point  number, may  be defined  for the  division and
1189: square root \cite{BohWalKorMat91}.
1190: 
1191: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1192: 
1193: \subsection{Floating point expansions}
1194: 
1195: A floating point {\bf expansion} is defined as a finite sequence ${\bf
1196:   x} = (x_0, x_1, \cdots , x_{n - 1})$ of floating point numbers.  The
1197: value represented  is the  exact, not rounded,  sum of  its components
1198: $\sum  x_i$.   The  length of  the  expansion  is  the number  of  its
1199: components.
1200: 
1201: %We say that  $\widehat{x}$ is a {\bf fair  most significant component}
1202: %of ${\bf  x}$ if  $\widehat{x}$ equals to  the rounded to  the nearest
1203: %value of $x'$ with $|x' -  \sum x_i| \leq \beta^e / n_{\max}$ and $(n,
1204: %e)$ is  the canonical representation of  $\widehat{x}$.  This includes
1205: %the case where  $\widehat{x}$ equals the rounded to  the nearest value
1206: %of the sum of the component of ${\bf x}$.
1207: 
1208: Any  component  of  an  expansion  may  be  equal  to  zero,  but  the
1209: subsequence of  the non-zero components $x_i$ must  be non overlapping
1210: and ordered  by magnitude.  The  non overlapping condition  means that
1211: two components cannot have significant  bits with the same weight that
1212: is specified for each $i > 0$ there exists a bounded float $(n, e)$
1213: such that
1214: $$
1215: x_i \equiv (n, e) ~~~~~ \text{and} ~~~~~ |x_{i + 1}| < \beta^e.
1216: $$
1217: 
1218: %\figtex{superp}{Floating point numbers A and B do not overlap whereas B and C do.}
1219: 
1220: %A {\bf  non-adjacent expansion}  is an expansion  ${\bf x}$  where the
1221: %non-zero  subsequence has non-adjacent  components.  The  condition is
1222: %specified below.  There is at least one  bit set to 0  between the two
1223: %nearest non-zero (signed) bits of two consecutive components.
1224: %$$\exists (n, e) \in \F_{\cal  R}
1225: %%    ~~~ \text{such that} ~~~ (n, e) \equiv x_i
1226: %    ~~~ \text{and}       ~~~ |x_{i - 1}| < \beta^e / 2
1227: %$$
1228: 
1229: We have  already presented two examples in  \cite{Dau99a} showing that
1230: any float  $(n, e)$  where $e  \ge -e_{\min}$ can  be expressed  as an
1231: expansion.
1232: 
1233: As noted in the introduction,  previous works by Priest, Shewchuck and
1234: ourselves have produced arithmetic  operators on expansions and useful
1235: primitives for computational geometry.  In this process, we recognized
1236: that the  operators for the addition and  the multiplication computing
1237: the  least significant  digits  first do  not  produce length  optimal
1238: results  but  tend  to  break  the  expansion in  a  large  number  of
1239: components with  small significands. A compression routine  is used to
1240: group  together the  components and  reduce the  length of  the result
1241: expansion.
1242: 
1243: % (see figure \ref{fig:lsdf}(a))
1244: % The
1245: % program has to compress the result to reduce the memory occupation and
1246: % the length of  the expansion.  The exact floating  point addition puts
1247: % the 52 most  significant bits in the first word of  the result and any
1248: % remaining  bit   in  the  correcting  word.    Thereafter,  using  the
1249: % correcting  word as the  output of  a process  will definitely  have a
1250: % tendency to produce long expansions  of components with a small number
1251: % of significant bits.
1252: 
1253: % \begin{figure}[ht]
1254: % \begin{center}
1255: % \input{lsdf.pstex_t}
1256: % \caption{(a) LSDF operators : the result is a not optimal expansion (b) MSDF operator : the result is a pseudo-expansion}
1257: % \label{fig:lsdf}
1258: % \end{center}
1259: % \end{figure}
1260: 
1261: It is  possible to compute the arithmetic  operations most significant
1262: digits first and compute directly components with a large significand.
1263: The  common,  both restoring  and  non  restoring, division  operators
1264: compute the components of the quotient like this.  As a drawback, some
1265: of  the   components  may  overlap  slightly.   In   our  former  work
1266: \cite{DauFin99a}, the sequence of the  quotient digits is cleaned by a
1267: compression routine that produces an acceptable expansion.
1268: 
1269: % (see figure \ref{fig:lsdf}(b))
1270: 
1271: We define {\bf pseudo-expansions}  as slightly overlapping expansions.
1272: The condition on the subsequence of non zero components as that
1273: $$|x_{i  + 1}|  \le \epsilon |x_i|.$$%
1274: We do not give the value of $\epsilon < {1 \over 2}$ now as it will be
1275: fixed  in the next  section considering  the numerical  algorithms. We
1276: deduce that
1277: $$|\sum_{j > i} x_j| \le {\epsilon \over 1 - \epsilon} |x_i|.$$
1278: Conversely, given a sequence where 
1279: $$|\sum_{j > i} x_j| \le \lambda |x_i|,$$
1280: for all $i$, we deduce that
1281: $$|x_{i  + 1}|  \le {\lambda \over 1 - \lambda} |x_i|.$$%
1282: 
1283: % The  division of  pseudo-expansions is  copied from  the  existing one
1284: % except  that  it does  not  require  the  final cleaning  loops.   The
1285: % addition  and  the  multiplication   are  inspired  from  the  on-line
1286: % operators  to  produce the  result  most  significant  digit first  as
1287: % components are available  and safe.  As we do  for on-line arithmetic,
1288: % we recognize  that for the  three operators, least  significant digits
1289: % are not  necessary at the beginning  of the process  and the operators
1290: % are  transformed  from most  significant  digit  first operators  that
1291: % require its  inputs to  be exactly known  to adaptable  operators that
1292: % need only a  limited number of most significant  components to deliver
1293: % the first components of the output.
1294: 
1295: % \begin{theo}[errorBoundedMult in FroundMult]
1296: %   \label{theo/seq}
1297: %   Given  a sequence $(x_i)_{i \in \N}$ and provided $\epsilon < 1$. We know that
1298: % \begin{itemize}
1299: % \item $
1300: %   \displaystyle \text{If} ~~~~~
1301: %   \forall i \in \N, ~~~~~ |x_{i + 1}| \le \epsilon |x_i|,
1302: %   ~~~~~ \text{then} ~~~~~
1303: %   \forall i \in \N, ~~~~~ |\sum_{j \ge i} x_j| \le \epsilon' |x_i|$.
1304: % \item $
1305: %   \displaystyle \text{If} ~~~~~
1306: %   \forall i \in \N, ~~~~~ |\sum_{j \ge i} x_j| \le \epsilon |x_i|,
1307: %   ~~~~~ \text{then} ~~~~~
1308: %   \forall i \in \N, ~~~~~ |x_{i + 1}| \le \epsilon' |x_i|$.
1309: % \end{itemize}
1310: % \end{theo}
1311: 
1312: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1313: 
1314: \section{Toolset for the addition, the multiplication and the division}
1315: 
1316: We combine in this Section  tools to create the arithmetic operations:
1317: MQ,  PP, PQ  and $\Sigma_3$.  The tools  have been  implemented in  C. 
1318: Among them, the numerical behavior of $\Sigma_3$ alone is difficult to
1319: analyze as the  three other tools only sort  and produce components as
1320: they are needed.
1321: 
1322: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1323: 
1324: \subsection{Addition}
1325: 
1326: \figtex{addition}{Representation of the addition.}
1327: \figtex{insert3}{Definition of the $\Sigma_3$ operator.}
1328: 
1329: Figure~\ref{fig/addition}  represents  the  algorithm  implemented  to
1330: compute  the  sum  of  pseudo-expansions  $A$ and  $B$.   We  use  two
1331: primitive operators MQ (merge queue) and $\Sigma_3$ (insert and sum of
1332: Figure~\ref{fig/insert3}).  Arrows represent streams of floating point
1333: number flowing  most significant digits first.  The  components of the
1334: result are in the  stream named $C$ in Figure~\ref{fig/addition}.  The
1335: output  is  only a  pseudo-expansion  since  the  $a'$ are  sorted  by
1336: decreasing  order of  magnitude but  they may  partially  overlap each
1337: other.
1338: 
1339: MQ is an  operator fired only if it  has a value on both  inputs or if
1340: the flow  of one input  is finished  and it has  a value on  the other
1341: input. The  largest input,  in magnitude, is  then transmitted  to the
1342: output.  Cases of tie  are not  critical. The  MQ operator  merges two
1343: flows sorted by magnitude and produces a single flow once again sorted
1344: by magnitude.  The following theorem  states that this flow  of values
1345: can be represented  by a flow of floats to be  fed into the $\Sigma_3$
1346: operator.
1347: 
1348: \begin{theo}[IsRleExpRevIsExp in FexpAdd]
1349:   Given a list of floats $(x_i)_{i \in L}$ sorted by magnitude, we can
1350:   define the bounded floats $(n_i, e_i)$ equivalent to the $x_i$s such
1351:   that the list $(e_i)_{i \in L}$ is sorted.
1352: \end{theo}
1353: 
1354: We deduce the correct behavior  of the operation by induction from the
1355: following Theorem.   The property on  the input floating  point number
1356: $c$ is inherited from the fact  that the two input streams $A$ and $B$
1357: are pseudo-expansions merged by the MQ operator by order of magnitude.
1358: 
1359: \begin{theo}[bound3Sum and exp3Sum in ThreeSum2]
1360:   \label{theo/sum3}
1361:   Let $a = (n_a,  e_a)$, $b = (n_b, e_b)$ and $c  = (n_c, e_c)$ be the
1362:   bounded     inputs    of     the     $\Sigma_3$    operator     (see
1363:   Figure~\ref{fig/insert3}).  Provided that $e_a  \ge e_b \ge e_c$ the
1364:   $\Sigma_3$   operator  returns  three   numbers  $a'$,   $b'$,  $c'$
1365:   represented  by $(n_a',  e_a')$, $(n_b',  e_b')$ and  $(n_c', e_c')$
1366:   such that $e_a' \ge e_b' \ge e_c'  = e_c$ and finaly either $c' = 0$
1367:   or
1368:   $$|b' + c'|   \le  3 \ulp                   |a'|,$$
1369:   $$\beta^{e_c} \le {3 \ulp^2 \over 2 - \ulp} |a'|.$$
1370:   
1371:   If $a = 0$ or $|b| + n_{\max} \beta^{e_c} \le n_{\max} \beta^{e_a}$,
1372:   all the exact additions can use the early exit.
1373: \end{theo}
1374: 
1375: If $c'$  equals to zero, $a'$  and $b'$ are not  relevant enough. They
1376: remain in the operator as $a$  and $b$ for the next iteration.  On the
1377: opposite, if $c'$  is not equal to zero, $a'$ is  relevant.  It is one
1378: component of  the result, $b'$ and  $c'$ are kept in  the operator for
1379: the next iteration.
1380: 
1381: \paragraph{\em Proof sketch:}
1382: 
1383: We first define $u$, $v$, $a'$,  $b'$ and $c'$ as they are computed in
1384: Figure~\ref{fig/insert3}. We are interested in the case where $c' \neq
1385: 0$. It implies that $w \neq 0$. We bound
1386: $$
1387: \begin{array}{r c l}
1388:   |w| & \le & {\ulp \over 2} |a'| \\
1389:   |v| & \le & \ulp \max (|u|, |a|)
1390: \end{array}
1391: $$
1392: Since $w \neq 0$, we know that 
1393: $$|a'| \ge {\max (|u|, |a|) \over 2}.$$
1394: The first bound follows since
1395: $$|b' + c'| = |v + w| \le |v| + |w|.$$
1396: 
1397: We continue for the second bound
1398: $$
1399: 3 \ulp |a'| \ge |b' + c'|
1400:             \ge |b'| - |c'|
1401:             \ge \left( 1 - {\ulp \over 2} \right)|b'|.
1402: $$
1403: We conclude with  the representation  of Theorem~\ref{theo/TS/Exp}
1404: $$
1405: \beta^{e_c} = \beta^{e_c'}
1406:             \le |c'|
1407:             \le {\ulp \over 2} |b'|
1408:             \le {3 \ulp^2 \over 2 - \ulp} |a'|.
1409: $$
1410: 
1411: \begin{flushright}
1412: $\Box$
1413: \end{flushright}
1414: 
1415: The last theorem does the induction over time. It is written from both
1416: statement and proof of the theorem in Coq.
1417: 
1418: \begin{theo}[FexpAdd in FexpAdd]
1419:   Given a list $(n_i, e_i)_{i \in  L}$ of $\#L$ bounded floats so that
1420:   $(e_i)_{i   \in   L}$  is   sorted   the   addition  operator   (see
1421:   Figure~\ref{fig/addition}) produces  a list $(c_i)_{i \in  L}$ of at
1422:   most $(\#L + 1)$ floats with
1423:   $$
1424:   |c_{i + 1}| \le \epsilon |c_i|
1425:   ~~~~~ \text{with} ~~~~~
1426:   \epsilon \le {6 \#L + 6 \over n_{\max} - 1 - 6\#L}
1427:   $$
1428:   under the condition that $\epsilon < 1$.
1429: \end{theo}
1430: 
1431: \paragraph{\em Proof sketch:}
1432: 
1433: We have  to show that the  input conditions are met  at each iteration
1434: using  pre-conditions  and  post-conditions.   In the  same  time,  we
1435: establish that
1436: $$|\sum_{j > i} c_j| \le 3(1 + 2 \#L) \ulp |c_i|,$$
1437: and
1438: $$|c_{i  + 1}|       \le 6(1 + 2 \#L) \ulp |c_i|.$$%
1439: Being more careful, we proved the tighter bound in Coq.
1440: 
1441: \begin{flushright}
1442: $\Box$
1443: \end{flushright}
1444: 
1445: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1446: 
1447: \subsection{Multiplication}
1448: 
1449: \figtex{multiplication3}{Representation of the multiplication.}
1450: 
1451: The  multiplication  of  the   expansions  $A$  (size  $n_A$)  by  the
1452: expansions  $B$ (size $n_B$)  generates the  $n_A \times  n_B$ partial
1453: products and  computes their sum. The  problem is to  sort the partial
1454: products  $a_{i_j} \times  b_j$  knowing that  the  lists $(a_i)$  and
1455: $(b_j)$ are  sorted by  magnitude. It  is related to  $X +  Y$ sorting
1456: \cite{HarPaySavStr75,SteStr94}.        Figure~\ref{fig/multiplication3}
1457: represents  the  algorithm  implemented.   We  use  new  and  extended
1458: primitives: PP, a  partial product generator, PQ and  a priority queue
1459: extending the MQ.
1460: 
1461: % \figtex{prods}{Generation of partial product and priority queue.}
1462: 
1463: Working with pseudo-expansions, we have  to make sure that the list of
1464: the  partial products  is sorted  by magnitude.   As $b_j$  arrives at
1465: position  $j$,  the PP  tool  initializes the  $i_j$  index  to 0  and
1466: computes $a_{i_j} \otimes b_j$.  Its  magnitude is inserted at the end
1467: of PQ that is updated in $\lfloor \log j \rfloor$ operations. When the
1468: partial  product $a_{i_j}  \otimes  b_j$  is output  by  PQ, the  tool
1469: increments  $i_j$.  The  generation  is frozen  until both  components
1470: $a_{i_j}$  and  $b_{j +  1}$  are  known.   Then the  cell  containing
1471: $|a_{i_j} \otimes b_j|$  in PQ is updated in  $\lfloor \log j \rfloor$
1472: operation and the tools can generate the next partial product.
1473: 
1474: % \figtex{somme5}{Definition of the $\Sigma_5$ operator.}
1475: 
1476: Numbers  flow by  pair since  $a_i \times  b_j$ produces  two floating
1477: point  numbers. The  upper part  $p_{ij} =  a_i \otimes  b_j$  is used
1478: immediately. The  lower part $a_i \times  b_j - p_{ij}$ is  put into a
1479: waiting  queue.   The  two  streams  are merged  together  by  the  MQ
1480: operators. It is fired as long as it has an input on both entries.  It
1481: does not compare the values but  the keys that are associated with the
1482: floats.   The operator does  not produce  a list  of floats  sorted by
1483: magnitude but directly a list sorted by amplitude as we use the second
1484: part of theorem~\ref{theo/M/Exp}.
1485: 
1486: As $a_i \times  b_j - p_{ij}$ cannot be  selected before $p_{ij}$, its
1487: insertion  into  the  waiting queue  is  not  in  the critical  path.  
1488: Instruction reordering will allow these operation to be hidden in dead
1489: cycles of the $\Sigma_3$ operator.   The waiting list cannot hold more
1490: than $2n_B$ elements since the bound on $\epsilon$ is a few ulps.
1491: 
1492: % $(A_i \times  B_j)_{High}$ that  is inserted in  $c$ and  $(A_i \times
1493: % B_j)_{Low}$    inserted    in   $e$.     A    formal   extension    of
1494: % Theorem~\ref{theo/sum3} will guarantee that this operator produces the
1495: % desired output.
1496: 
1497: % If $d'$ equals to zero, then $e'  = 0$ and $a'$, $b'$ and $c'$ are not
1498: % relevant enough. They  remain in the operator for  the next iteration. 
1499: % If $d' \neq 0$, $a'$ is relevant.   It is one component of the result. 
1500: % If  $e'$ is  also  non zero,  $b'$  is immediately  used  as the  next
1501: % component of  the result. The  three remaining floating  point numbers
1502: % are kept in the operator for the next iteration.
1503: 
1504: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1505: 
1506: \subsection{Division}
1507: 
1508: % \subsubsection{Common division}
1509: 
1510: \figtex{division3}{Representation of the non-restoring division.}
1511: 
1512: Figure~\ref{fig/division3}  represents  the division  of  $R$ by  $D$,
1513: pseudo-expansions. We want to compute $Q$  such that $Q = R / D$, {\em
1514:   ie.}  $R - QD = 0$. Each  component $q_i$ of $Q$ is deduced from the
1515: previous ones by computing
1516: $$R_i = R - \sum_{j < i} q_j \times D,$$%
1517: before evaluation $q_i \approx R_i /  D$. We will see later that $d_0$
1518: is handled separately, so our scheme computes $Q \times (d_0 - D)$.
1519: 
1520: The  goal is  to ensure  that the  values $c$  used as  inputs  of the
1521: $\Sigma_3$ operator  are sorted by  amplitude.  The PP-PQ-MQ  chain is
1522: close   to   the   one    used   for   the   multiplication   in   the
1523: Figure~\ref{fig/multiplication3}. The  generation is frozen  only when
1524: $d_{j + 1}$ is not known.
1525: 
1526: The test  on $q_{i_j}$ is replaced  by a new  condition: the operation
1527: stops before it may insert a value $c$  such that $c < 4 |a + b| \ulp$. 
1528: A new  approximate quotient  digit $q_i$ is  then guessed from  a fair
1529: most significant  component $d_0$ of the  divisor $D$ and  a fair most
1530: significant component $a' \oplus b'$.
1531: 
1532: %  of the remainder $R_i$: $q_i =
1533: % \circ(a' / d_l)$.
1534: % The less  significant components of the dividend  are stored unchanged
1535: % with all the components of the terms $-q_i \cdot D$ that have not been
1536: % reduced so far.   A priority queue (PQ) extracts  the most significant
1537: % components  among this  set.  These  numbers are  accumulated  until a
1538: % relevant $a'$ can be estimated again.
1539: 
1540: % Since  $R_i$  is  at  least  divided  by a  constant  factor  at  each
1541: % iteration, the algorithm is converging.
1542: % $$
1543: % \displaystyle \frac{R_0}{D}=\sum_{i=0}^{n-1}{q_i}+\frac{R_n}{D}
1544: % $$
1545: 
1546: % We  use four  primitive operators.   The partial  product PP,  and the
1547: % priority  queue PQ,  sort  by  magnitude the  pairs  generated by  the
1548: % product of $q_i$  by $D - d_l$.  The merging queue  MQ, takes the most
1549: % significant numbers between the  components of the numerator that have
1550: % not been used so far and the numbers given by the priority queue.  The
1551: % $\Sigma_5$  operator   adds  the   new  component  to   the  left-over
1552: % components.  The number  $a'$ is significant enough when  $d' \neq 0$. 
1553: % This  process guarantees  that  $a'$ is  a  relevant approximation  of
1554: % $R_i$. We will prove that $e' = 0$.
1555: 
1556: The following  theorem states that one  division step is  very robust. 
1557: For a reasonable value of $\epsilon$, it guarantees that the remainder
1558: decreases almost linearly.
1559: 
1560: \begin{theo}[DivConv in FexpDiv]
1561:   \label{theo/Div/Converge}
1562:   Let $W$ and $D$ be two non zero real numbers approximated by $w$ and
1563:   $d$ with  a relative error  bound $\epsilon <  1$ and let $q$  be an
1564:   approximation of $w / d$ with the same relative error bound.
1565:   $$
1566:   |W - q D | \le \epsilon {3 + \epsilon \over 1 - \epsilon} |W|
1567:   $$
1568: \end{theo}
1569: 
1570: To reduce $\epsilon$, we replace the initial values $d_0$ and $d_1$ by
1571: $d_0 \oplus d_1$  and $d_0 + d_1 - d_0 \oplus  d_1$. As a consequence,
1572: the worst  error on the Theorem~\ref{theo/Div/Converge} is  the one on
1573: the  remainder and  it  is bounded  by  construction by  4 ulps.  This
1574: guarantees with the  loop condition that any term  $q_i \times d_j$ is
1575: smaller than  all the  terms already produced  by the  partial product
1576: generation chain except  $q_i \times d_0$. This last  value is handled
1577: separately as $a'$ and  $b'$ are replaced by $a' + b'  - q_i d_0$. The
1578: loop condition also ensures that $c' = 0$.
1579: 
1580: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1581: 
1582: % \subsubsection{Pre-scaled division}
1583: 
1584: % In  modern computers,  the  floating  point division  is  a very  slow
1585: % operator.   For  example,  Intel  Pentium processors  need  39  cycles
1586: % \cite{Int95a}  whereas  multiplications   and  additions  may  have  a
1587: % throughput of 1 cycle.  Prescaled  division, can be used to reduce the
1588: % number of floating point division to 1.
1589: 
1590: % As a  price, we have to  exactly prescale the  numerator $R$ involving
1591: % $n_R$ multiplications.  As  a matter of fact, the  prescaling fits the
1592: % size of the second input of the MQ operator that is waiting for a pair
1593: % of floating point digits from the PQ and now also from the numerator.
1594: 
1595: % \begin{figure}[ht]
1596: % \begin{center}
1597: % \input{division2.pstex_t}
1598: % \caption{Representation of the pre-scaled division}
1599: % \label{fig:div2}
1600: % \end{center}
1601: % \end{figure}
1602: 
1603: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1604: 
1605: \section*{Conclusion}
1606: 
1607: We  have presented  a new  adaptable numeric  core inspired  both from
1608: floating point expansions and from on-line arithmetic.  Our choice was
1609: to  present only  the arithmetic  core as  we have  presented  in past
1610: publications  how a  core can  be used  efficiently to  compute matrix
1611: determinants for example \cite{DauFin99a}.  Building a general purpose
1612: runtime environment raises question  in areas like parallelism (demand
1613: driven {\em vs.}  data flow...)  and in compiler techniques (efficiency
1614: of  object oriented  code generated  by existing  compilers...)  among
1615: others.
1616:   
1617: The numeric core  is cut down to four  tools.  The $\Sigma_3$ operator
1618: that  contains  many arithmetic  operations  is  proved correct.   The
1619: proofs  have been  validated  by the  Coq  assistant.  Developing  the
1620: proofs,  we have  formally proved  many result  long published  in the
1621: literature and  we have  extended a  few of them.   This work  may let
1622: users
1623: \begin{itemize}
1624: \item Develop  application specific  adaptable libraries based  on the
1625:   toolset.
1626: \item  Easily write  new formal  proofs based  on the  growing  set of
1627:   sensible validated facts.
1628: \end{itemize}
1629: 
1630: The set  of the validated facts  is now sufficient to  shield the user
1631: from   the  difficult   implementation  details   of   floating  point
1632: arithmetic.  The proof  of the key Theorem on  the $\Sigma_3$ operator
1633: does not  use any low  level result. It  just uses the many  lower and
1634: upper bounds defined in the Theorems of Section~1.
1635: 
1636: \small
1637: 
1638: %\bibliography{/home/arenaire/public_html/Bibliographies/daumas_ref}
1639: %\bibliography{p:/Arenaire/Bibliographies/daumas_ref}
1640: 
1641: \begin{thebibliography}{10}
1642: 
1643: \bibitem{AvnBoiDevPreYvi97}
1644: Francis Avnaim, Jean-Daniel Boissonnat, Olivier Devillers, Franco Preparata,
1645:   and Mariette Yvinec.
1646: \newblock Evaluating signs of determinants using single precision arithmetic.
1647: \newblock {\em Algorithmica}, 17(2):111--132, 1997.
1648: 
1649: \bibitem{Bai93}
1650: David~H. Bailey.
1651: \newblock Algorithm 719, multiprecision translation and execution of fortran
1652:   programs.
1653: \newblock {\em ACM Transactions on Mathematical Software}, 19(3):288--319,
1654:   1993.
1655: 
1656: \bibitem{Bai95}
1657: David~H. Bailey.
1658: \newblock A {F}ortran-90 based multiprecision system.
1659: \newblock {\em ACM Transactions on Mathematical Software}, 21(4):379--387,
1660:   1995.
1661: 
1662: \bibitem{Bar93}
1663: Jesse~L. Barlow.
1664: \newblock Error analysis of update methods for the symmetric eigenvalue
1665:   problem.
1666: \newblock {\em SIAM Journal of Matrix Analysis and Applications},
1667:   14(2):598--618, 1993.
1668: 
1669: \bibitem{BerDau97}
1670: David Berthelot and Marc Daumas.
1671: \newblock Computing on sequences of embedded intervals.
1672: \newblock {\em Reliable Computing}, 3(3):219--227, 1997.
1673: 
1674: \bibitem{Boe87}
1675: Hans-Juergen Boehm.
1676: \newblock Constructive real interpretation of numerical programs.
1677: \newblock {\em ACM SIGPLAN Notices}, 22(7):214--221, 1987.
1678: 
1679: \bibitem{BohWalKorMat91}
1680: Gerd Bohlender, Wolfgang Walter, Peter Kornerup, and David~W. Matula.
1681: \newblock Semantics for exact floating point operations.
1682: \newblock In Peter Kornerup and David Matula, editors, {\em Proceedings of the
1683:   10th Symposium on Computer Arithmetic}, pages 22--26, Grenoble, France, 1991.
1684: 
1685: \bibitem{BoiPre2K}
1686: Jean-Daniel Boissonnat and Franco~P. Preparata.
1687: \newblock Robust plane sweep for intersecting segments.
1688: \newblock {\em SIAM Journal on Computing}, 29(5):1401--1421, 2000.
1689: 
1690: \bibitem{BolDau01}
1691: Sylvie Boldo and Marc Daumas.
1692: \newblock Performances d'implantations de l'addition en pr{\'e}cision
1693:   quad-double sur diff{\'e}rentes machines.
1694: \newblock In {\em 7{\`e}me Symposium sur les Architectures Nouvelles de
1695:   Machines}, Paris, France, 2001.
1696: 
1697: \bibitem{Bre78a}
1698: Richard~P. Brent.
1699: \newblock A fortran multiple-precision arithmetic package.
1700: \newblock {\em ACM Transactions on Mathematical Software}, 4(1):57--70, 1978.
1701: 
1702: \bibitem{BurFleMehSch99}
1703: C.~Burnikel, R.~Fleischer, K.~Mehlhorn, and S.~Schirra.
1704: \newblock Efficient exact geometric computation made easy.
1705: \newblock In {\em Proceedings of the 15th Annual ACM Symposium on Computational
1706:   Geometry}, pages 341--350, Miami, Florida, 1999.
1707: 
1708: \bibitem{ChaGedGonMonWat91}
1709: Bruce~W. Char, Keith~O. Geddes, Gaston~H. Gonnet, Michael~B. Monagan, and
1710:   Stephen~M. Watt.
1711: \newblock {\em Maple {V}: langage reference manual}.
1712: \newblock Springer Verlag, 1991.
1713: 
1714: \bibitem{CodKar.84}
1715: William~J. Cody, Richard Karpinski, et~al.
1716: \newblock A proposed radix and word-length independent standard for floating
1717:   point arithmetic.
1718: \newblock {\em IEEE Micro}, 4(4):86--100, 1984.
1719: 
1720: \bibitem{CorDupHocMul91}
1721: G.~Corbaz, Jean Duprat, Bertrand Hochet, and Jean-Michel Muller.
1722: \newblock Implementation of a {VLSI} polynomial evaluator for real-time
1723:   applications.
1724: \newblock In {\em International Conference on Application-Specific Array
1725:   Processors}, 1991.
1726: 
1727: \bibitem{Dau99a}
1728: Marc Daumas.
1729: \newblock Multiplications of floating point expansions.
1730: \newblock In Israel Koren and Peter Kornerup, editors, {\em Proceedings of the
1731:   14th Symposium on Computer Arithmetic}, pages 250--257, Adelaide, Australia,
1732:   1999.
1733: 
1734: \bibitem{DauFin98}
1735: Marc Daumas and Claire Finot.
1736: \newblock Division of floating point expansions.
1737: \newblock In {\em IMACS-GAMMM International Symposium on Scientific Computing,
1738:   Computer Arithmetic and Validated Numerics}, pages 45--46, Budapest,
1739:   Hungaria, 1998.
1740: 
1741: \bibitem{DauFin99a}
1742: Marc Daumas and Claire Finot.
1743: \newblock Division of floating point expansions with an application to the
1744:   computation of a determinant.
1745: \newblock {\em Journal of Universal Computer Science}, 5(6):323--338, 1999.
1746: 
1747: \bibitem{DauMulTis97}
1748: Marc Daumas, Jean-Michel Muller, and Arnaud Tisserand.
1749: \newblock Very high radix on-line arithmetic for accurate computations.
1750: \newblock In {\em Proceedings of the 15th IMACS World Congress on Computational
1751:   and Applied Mathematics}, volume~2, pages 347--352, Berlin, Germany, 1997.
1752: 
1753: \bibitem{DauMulVui94}
1754: Marc Daumas, Jean-Michel Muller, and Jean Vuillemin.
1755: \newblock Implementing on-line arithmetic on {PAM}.
1756: \newblock In Reiner~W. Hartenstein and Michael~Z. Serv{\'\i}t, editors, {\em
1757:   Field-Programmable Logic: Architectures Synthesis and Applications}, pages
1758:   196--207, Prague, Czech Republic, 1994. Springer Verlag.
1759: 
1760: \bibitem{Dek71}
1761: Theodorus~J. Dekker.
1762: \newblock A floating point technique for extending the available precision.
1763: \newblock {\em Numerische Mathematik}, 18(3):224--242, 1971.
1764: 
1765: \bibitem{DevPre98}
1766: Olivier Devillers and Franco Preparata.
1767: \newblock A probabilistic analysis of the power of arithmetic filters.
1768: \newblock {\em Discrete and Computational Geometry}, 20(4):523--547, 1998.
1769: 
1770: \bibitem{Don92}
1771: Jack~J. Dongarra.
1772: \newblock Performance of various computers using standard linear equations
1773:   software.
1774: \newblock {\em Computer architecture news}, 20(3):22--44, 1992.
1775: 
1776: \bibitem{Don99}
1777: Jack~J. Dongarra.
1778: \newblock Performance of various computers using standard linear equations
1779:   software.
1780: \newblock Report CS-89-85, University of Tennessee, 1999.
1781: 
1782: \bibitem{ForVan93}
1783: Steven Fortune and Christopher~J. {Van Wyk}.
1784: \newblock Efficient exact arithmetic for computational geometry.
1785: \newblock In {\em Proceedings of the 9th ACM Symposium on Computational
1786:   Geometry}, pages 163--172, 1993.
1787: 
1788: \bibitem{Gog2K}
1789: Brice Goglin.
1790: \newblock Calcul sur des expansions de taille fixe.
1791: \newblock Technical report, {\'E}cole Normale Sup{\'e}rieure de Lyon, 2000.
1792: 
1793: \bibitem{Gol91}
1794: David Goldberg.
1795: \newblock What every computer scientist should know about floating point
1796:   arithmetic.
1797: \newblock {\em ACM Computing Surveys}, 23(1):5--47, 1991.
1798: 
1799: \bibitem{Gra2Kb}
1800: Tobj\"orn Granlund.
1801: \newblock {\em The {GNU} multiple precision arithmetic library}, 2000.
1802: \newblock Version 3.1.
1803: 
1804: \bibitem{HarPaySavStr75}
1805: L.~H. Harper, Terry~H. Payne, John~E. Savage, and E.~Straus.
1806: \newblock Sorting $x + y$.
1807: \newblock {\em Communications of the ACM}, 18(6):347--349, 1975.
1808: 
1809: \bibitem{Har97a}
1810: John Harrison.
1811: \newblock Floating point verification in {HOL} light: the exponential function.
1812: \newblock Technical Report 428, University of Cambridge Computer Laboratory,
1813:   1997.
1814: 
1815: \bibitem{Hig96}
1816: Nicholas~J. Higham.
1817: \newblock {\em Accuracy and stability of numerical algorithms}.
1818: \newblock SIAM, 1996.
1819: 
1820: \bibitem{HueKahPau97}
1821: G{\'e}rard Huet, Gilles Kahn, and Christine Paulin-Mohring.
1822: \newblock The {Coq} {P}roof {A}ssistant: {A} {T}utorial: Version 6.1.
1823: \newblock Technical Report 204, Institut National de Recherche en Informatique
1824:   et en Automatique, Le Chesnay, France, 1997.
1825: 
1826: \bibitem{Kah65}
1827: William Kahan.
1828: \newblock Further remarks on reducing truncation errors.
1829: \newblock {\em Communications of the ACM}, 8(1):40, 1965.
1830: 
1831: \bibitem{KarLiPecYap99}
1832: V.~Karamcheti, C.~Li, I.~Pechtchanski, and Chee Yap.
1833: \newblock A core library for robust numeric and geometric computation.
1834: \newblock In {\em Proceedings of the 15th Annual ACM Symposium on Computational
1835:   Geometry}, pages 351--359, Miami, Florida, 1999.
1836: 
1837: \bibitem{Knu69}
1838: Donald~E. Knuth.
1839: \newblock {\em The art of computer programming: Seminumerical Algorithms}.
1840: \newblock Addison Wesley, 1969.
1841: \newblock First edition.
1842: 
1843: \bibitem{Knu73}
1844: Donald~E. Knuth.
1845: \newblock {\em The art of computer programming: Seminumerical Algorithms}.
1846: \newblock Addison Wesley, 1973.
1847: \newblock Second edition.
1848: 
1849: \bibitem{LouErc93}
1850: Marianne~E. Louie and Milo\v{s}~D. Ercegovac.
1851: \newblock On digit recurrence division implementation for field programmable
1852:   gate arrays.
1853: \newblock In Earl Swartzlander, Mary~Jane Irwin, and Graham Jullien, editors,
1854:   {\em Proceedings of the 11th Symposium on Computer Arithmetic}, pages
1855:   202--209, Windsor, Ontario, 1993.
1856: 
1857: \bibitem{MazMer93}
1858: Christophe Mazenc and Xavier Merrheim.
1859: \newblock Handling numbers in high precision and controlling the numerical
1860:   precision in a serial system on a parallel {MIMD} machine.
1861: \newblock In {\em 26th International Conference on System Sciences}, pages
1862:   1196--1198, Honolulu, Hawaii, 1993.
1863: 
1864: \bibitem{Men94}
1865: Val{\'e}rie M{\'e}nissier.
1866: \newblock {\em Arithm{\'e}tique Exacte~: Conception, Algorithmique et
1867:   Performances d'une Implantation Informatique en Pr{\'e}cision Arbitraire}.
1868: \newblock PhD thesis, Universit{\'e} Paris VI, Paris, France, 1994.
1869: 
1870: \bibitem{Men95}
1871: Val\'erie M\'enissier-Morain.
1872: \newblock Conception et algorithmique d'une repr\'esentation d'arithm\'etique
1873:   r\'eelle en pr\'ecision arbitraire.
1874: \newblock In {\em 1st Real Numbers and Computers Conference}, pages 47--64,
1875:   St-\'Etienne, France, 1995.
1876: 
1877: \bibitem{MicMor97}
1878: Dominique Michelucci and Jean-Michel Moreau.
1879: \newblock Lazy arithmetic.
1880: \newblock {\em IEEE Transactions on Computers}, 46(9):961--975, 1997.
1881: 
1882: \bibitem{Mol65b}
1883: Ole M{\o}ller.
1884: \newblock Note on quasi double-precision.
1885: \newblock {\em BIT}, 5(4):251--255, 1965.
1886: 
1887: \bibitem{Mol65a}
1888: Ole M{\o}ller.
1889: \newblock Quasi double-precision in floating point addition.
1890: \newblock {\em BIT}, 5(1):37--50, 1965.
1891: 
1892: \bibitem{Mul97}
1893: Jean-Michel Muller.
1894: \newblock {\em Elementary functions, algorithms and implementation}.
1895: \newblock Birkhauser, 1997.
1896: 
1897: \bibitem{Pic72}
1898: Mich\`ele Pichat.
1899: \newblock Correction d'une somme en arithm\'etique \`a virgule flottante.
1900: \newblock {\em Numerische Mathematik}, 19:400--406, 1972.
1901: 
1902: \bibitem{Pri91}
1903: Douglas~M. Priest.
1904: \newblock Algorithms for arbitrary precision floating point arithmetic.
1905: \newblock In Peter Kornerup and David Matula, editors, {\em Proceedings of the
1906:   10th Symposium on Computer Arithmetic}, pages 132--144, Grenoble, France,
1907:   1991.
1908: 
1909: \bibitem{Rus98}
1910: David~M. Russinoff.
1911: \newblock A mechanically checked proof of {IEEE} compliance of the floating
1912:   point multiplication, division and square root algorithms of the {AMD-K7}
1913:   processor.
1914: \newblock {\em LMS Journal of Computation and Mathematics}, 1:148--200, 1998.
1915: 
1916: \bibitem{Sei2K}
1917: Peter-Michael Seidel.
1918: \newblock Exact arithmetic based on floating-point numbers.
1919: \newblock In {\em IMACS-GAMMM International Symposium on Scientific Computing,
1920:   Computer Arithmetic and Validated Numerics}, page 123, Karlsruhe, Germany,
1921:   2000.
1922: 
1923: \bibitem{SerVuiHer89}
1924: Bernard Serpette, Jean Vuillemin, and Jean-Claude Herv\'{e}.
1925: \newblock {BigNum}: a portable and efficient package for arbitrary-precision
1926:   arithmetic.
1927: \newblock Technical Report~2, Digital Equipment Corporation, Paris Research
1928:   Laboratory, 1989.
1929: 
1930: \bibitem{She96}
1931: Jonathan~R. Shewchuk.
1932: \newblock Robust adaptative floating point geometric predicates.
1933: \newblock In {\em Proceedings of the 12th Annual ACM Symposium on Computational
1934:   Geometry}, pages 141--150, Philadelphia, Pensylvania, 1996.
1935: 
1936: \bibitem{She97}
1937: Jonathan~R. Shewchuk.
1938: \newblock Adaptive precision floating-point arithmetic and fast robust
1939:   geometric predicates.
1940: \newblock In {\em Discrete and Computational Geometry}, volume~18, pages
1941:   305--363, 1997.
1942: 
1943: \bibitem{SorTan91}
1944: Danny~C. S{\o}rensen and Ping Tak~Peter Tang.
1945: \newblock On the orthogonality of eigenvectors computed by divide and conquer
1946:   methods techniques.
1947: \newblock {\em SIAM Journal of Numerical Analysis}, 28(6):1752--1775, 1991.
1948: 
1949: \bibitem{SteStr94}
1950: William Steiger and Ileana Streinu.
1951: \newblock A pseudo-algorithmic separation of line from pseudo-lines.
1952: \newblock In {\em Proceedings of the Sixth Canadian Conference on Computational
1953:   Geometry}, Saskatoon, Saskatchewan, 1994.
1954: 
1955: \bibitem{Ste74}
1956: Pat~H. Sterbenz.
1957: \newblock {\em Floating point computation}.
1958: \newblock Prentice Hall, 1974.
1959: 
1960: \bibitem{Ste.81}
1961: David Stevenson et~al.
1962: \newblock A proposed standard for binary floating point arithmetic.
1963: \newblock {\em IEEE Computer}, 14(3):51--62, 1981.
1964: 
1965: \bibitem{Ste.87}
1966: David Stevenson et~al.
1967: \newblock An american national standard: {IEEE} standard for binary floating
1968:   point arithmetic.
1969: \newblock {\em ACM SIGPLAN Notices}, 22(2):9--25, 1987.
1970: 
1971: \bibitem{TriErc77}
1972: Kishor~S. Trivedi and Milo\v{s}~D. Ercegovac.
1973: \newblock On-line algorithms for division and multiplication.
1974: \newblock {\em IEEE Transactions on Computers}, 26(7):681--687, 1977.
1975: 
1976: \bibitem{Ziv91}
1977: Abraham Ziv.
1978: \newblock Fast evaluation of elementary mathematical functions with correctly
1979:   rounded last bit.
1980: \newblock {\em ACM Transactions on Mathematical Software}, 17(3):410--423,
1981:   1991.
1982: 
1983: \end{thebibliography}
1984: 
1985: 
1986: \appendix
1987: \section{A brief overview of Coq}
1988: 
1989: In this appendix,  we give a quick overview of the  Coq system.  For a
1990: more complete introduction, we refer the reader to~\cite{HueKahPau97}.
1991: 
1992: Coq is a generic prover based on type-theory. In this system,
1993: users can define new objects {\it and} prove properties that
1994: derive logically from these definitions. Objects in Coq are
1995: typed and functions are first-class objects using a Lisp-like
1996: notation. The system is distributed with standard libraires that
1997: define types like {\tt nat}, {\tt Z} and {\tt R} which correspond to
1998: the natural numbers, the relative numbers and the reals respectively. 
1999: To give an example, the addition for natural number is represented by 
2000: the function {\tt plus}
2001: whose type is {\tt nat $\rightarrow$ nat $\rightarrow$ nat}.
2002: A function {\tt plus3} that does the sum of 3 natural numbers is
2003: defined by the following command:
2004: {\small \begin{verbatim}
2005: Definition plus3 := [a,b,c:nat] (plus a (plus b c)).
2006: \end{verbatim}}
2007: \noindent 
2008: Arguments between square brackets provide the parameters of the function.
2009: The type of {\tt plus3} is the expected one: {\tt nat $\rightarrow$  nat $\rightarrow$ 
2010: nat $\rightarrow$ nat}.
2011: 
2012: In our formalization, we first define the type {\tt float} that
2013: represents the record composed of the mantissa and the exponent
2014: by the command:
2015: {\small \begin{verbatim}
2016: Record float: Set := Float {Fnum: Z; Fexp: Z}.
2017: \end{verbatim}}
2018: \noindent
2019: This command creates a new type {\tt float}, a constructor {\tt Float}
2020: of type {\tt Z $\rightarrow$  Z $\rightarrow$ float} and two
2021: destructors {\tt Fnum} of type {\tt  float $\rightarrow$ Z} and
2022: {\tt Fexp} of type {\tt  float $\rightarrow$ Z}. So for example,
2023: we can define the function {\tt Fzero} that takes an object of
2024: type {\tt float} and returns an object of same exponent but
2025: with the mantissa set to zero:
2026: {\small \begin{verbatim}
2027: Definition Fzero := [p:float](Float 0 (Fexp p)).
2028: \end{verbatim}}
2029: \noindent
2030: In a similar way, we define the notion of bound. It contains
2031: two integers: one for the mantissa and one for the exponent:
2032: {\small \begin{verbatim}
2033: Record Fbound: Set := Bound {vNum: nat;dExp: nat}.
2034: \end{verbatim}}
2035: \noindent
2036: In order to prove properties that are independent of a particular
2037: bound, we add the command:
2038: {\small \begin{verbatim}
2039: Variable b:Fbound.
2040: \end{verbatim}}
2041: \noindent
2042: With this arbitrary bound, the notion of being bounded is defined as:
2043: {\small \begin{verbatim}
2044: Definition Fbounded := [p:float]
2045:     ((Zle (Zopp (vNum b)) (Fnum p))  /\ (Zle (Fnum p) (vNum b))) /\
2046:     (Zle (Zopp (dExp b)) (Fexp p)).
2047: \end{verbatim}}
2048: \noindent
2049: A pretty-printed version of this definition could be:\\
2050: \vbox{
2051: \vskip5pt
2052: \hbox{{\tt Definition Fbounded := $\lambda$p:float.\,}}
2053: \hbox{ {\tt -(vNum b) $\le$ (Fnum p) $\le$ (vNum b) $\land$ -(dExp b) $\le$ (Fexp p).}}
2054: \vskip5pt
2055: }\\
2056: Now that we have these definitions we can prove a very simple
2057: first theorem:
2058: {\small \begin{verbatim}
2059: Theorem BoundedZero: (p:float) (Fbounded p) ->(Fbounded (Fzero p)).
2060: \end{verbatim}}
2061: \noindent
2062: A pretty-printed version of this theorem could be:\\
2063: \vbox{
2064: \vskip5pt
2065: \hbox{{\tt Theorem BoundedZero: $\forall$p:float.\, (Fbounded p) $\Rightarrow$ (Fbounded (Fzero p)).}}
2066: \vskip5pt
2067: }\\
2068: When the previous command is received by the system, it enters 
2069: the proof mode. The statement of the theorem is put om the top
2070: of the goal stack. By applying a command called {\it tactic} we
2071: replace the goal at the top of the stack by a (possibly empty)
2072: list of subgoals. The proof is finished when the stack is empty.
2073: Our initial stack has only one goal. Each goal contains a list
2074: of hypothesis and a conclusion separated by a bar:
2075: {\small \begin{verbatim}
2076: 1 subgoal
2077:   
2078:   ============================
2079:    (p:float)(Fbounded p)->(Fbounded (Fzero p))
2080: \end{verbatim}}
2081: \noindent
2082: The first step is to introduce the hypothesis. For this,
2083: we apply the tactic {\tt Intros}.
2084: {\small \begin{verbatim}
2085: Intros p H.
2086: \end{verbatim}}
2087: \noindent
2088: We are now reduce to prove {\tt (Fbounded (Fzero p))} in
2089: the contect where {\tt p} is a {\tt float} and {\tt H} is a 
2090: proof that p is bounded:
2091: {\small \begin{verbatim}
2092: 1 subgoal
2093: 
2094:   p : float
2095:   H : (Fbounded p)
2096:   ============================
2097:    (Fbounded (Fzero p))
2098: \end{verbatim}}
2099: \noindent
2100: Next we expand the definition of {\tt Fbounded} in the goal. For this,
2101: we use the tactic {\tt Red} and get the new goal:
2102: {\small \begin{verbatim}
2103: 1 subgoal
2104:   
2105:   p : float
2106:   H : (Fbounded p)
2107:   ============================
2108:    ((Zle (Zopp (vNum b)) (Fnum (Fzero p)) /\ 
2109:     (Zle (Fnum (Fzero p)) (vNum b))) /\
2110:     (Zle (Zopp (dExp b)) (Fexp (Fzero p)))
2111: \end{verbatim}}
2112: \noindent
2113: This goal can be simplified using the definition of
2114: {\tt Fzero} by the tactic {\tt Simpl}:
2115: {\small \begin{verbatim}
2116: 1 subgoal
2117:   
2118:   p : float
2119:   H : (Fbounded p)
2120:   ============================
2121:    ((Zle (Zopp (vNum b)) 0) /\ (Zle 0 (vNum b))) /\
2122:     (Zle (Zopp (dExp b)) (Fexp p))
2123: \end{verbatim}}
2124: \noindent
2125: We now need to break the conjunctions into separate
2126: subgoals. This is done by the tactic {\tt Split}. As
2127: the conjunction is nested, this tactic needs to be
2128: applied repeatedly: 
2129: {\small \begin{verbatim}
2130: Repeat Split.
2131: \end{verbatim}}
2132: \noindent
2133: We get the expected three subgoals.
2134: {\small \begin{verbatim}
2135: 3 subgoals
2136:   
2137:   p : float
2138:   H : (Fbounded p)
2139:   ============================
2140:    (Zle (Zopp (vNum b)) 0) 
2141: 
2142: subgoal 2 is:
2143:    (Zle 0 (vNum b))
2144: 
2145: subgoal 3 is:
2146:    (Zle (Zopp (dExp b)) (Fexp p))
2147: \end{verbatim}}
2148: \noindent
2149: The first goal is simple enough to be proved automatically
2150: by the following tactic {\tt Intuition}. We are left with two subgoals.
2151: {\small \begin{verbatim}
2152: 2 subgoals
2153:   
2154:   p : float
2155:   H : (Fbounded p)
2156:   ============================
2157:    (Zle 0 (vNum b))
2158: 
2159: subgoal 2 is:
2160:    (Zle (Zopp (dExp b)) (Fexp p))
2161: \end{verbatim}}
2162: \noindent
2163: Applying the same tactic to these two subgoals ends the proof.
2164: The final proof script looks like:
2165: {\small \begin{verbatim}
2166: Theorem BoundedZero: (p:float) (Fbounded p) ->(Fbounded (Fzero p)).
2167: Intros p H.
2168: Red.
2169: Simpl.
2170: Repeat Split.
2171: Intuition.
2172: Intuition.
2173: Intuition.
2174: Qed.
2175: \end{verbatim}}
2176: \noindent
2177: Using the composition of tactics ";", it can be shorten to:
2178: {\small \begin{verbatim}
2179: Theorem BoundedZero: (p:float) (Fbounded p) ->(Fbounded (Fzero p)).
2180: Intros p H; Red; Simpl; Repeat Split; Intuition.
2181: Qed.
2182: \end{verbatim}}
2183: 
2184: \end{document}
2185: 
2186: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2187: