cs0409038/corr.tex
1: \documentclass{tlp}
2: \usepackage{epic}
3: \usepackage{eepic}
4: \usepackage{latexsym}
5: \usepackage{afterpage}
6: % Some magic to get the tables and figures to stay put (more or less)
7: \renewcommand{\topfraction}{1}
8: \renewcommand{\textfraction}{0.0}
9: \renewcommand{\floatpagefraction}{1}
10: \setcounter{topnumber}{4}
11: 
12: 
13: 
14: \newcommand{\mod}[0]{\mbox{~mod~}}
15: 
16: \newcommand{\nt}[1]{\textsf{#1}}
17: \newcommand{\func}[1]{\textsc{#1}}
18: 
19: \newcommand{\ourtop}{\top}%%{\top^*}
20: \newcommand{\ipred}{\$\texttt{ipred}\$}
21: \newcommand{\gpred}{\$\texttt{gpred}\$}
22: \newcommand{\svars}[1]{\$\texttt{old}(\mbox{$#1$})\$}
23: \newcommand{\sgrounds}[1]{\$\texttt{ground}(\mbox{$#1$})\$}
24: 
25: \newcommand{\algsin}[1]{#1}
26: \newcommand{\short}[1]{}
27: \newcommand{\llong}[1]{#1}
28: 
29: \newcommand{\oo}{\mbox{$[\![$}}
30: \newcommand{\cc}{\mbox{$]\!]$}}
31: 
32: \newcommand{\ttnew}{\texttt{new}}
33: \newcommand{\ttold}{\texttt{old}}
34: \newcommand{\ttground}{\texttt{ground}}
35: \newcommand{\ntnew}{\nt{new}}
36: \newcommand{\ntold}{\nt{old}}
37: \newcommand{\ntground}{\nt{ground}}
38: 
39: \def\tuple#1{\langle #1 \rangle}
40: \setlength{\parskip}{0.0in}
41: 
42: \newenvironment{grammar}{%
43:          \begin{center} \small%
44:          $\begin{array}{rcll}
45:          }{%
46:          \end{array}$\end{center}\ignorespaces%
47:          }
48: 
49: \newenvironment{definition}{%
50:          \begin{defn} \rm%
51:          }{%
52:          $\Box$ \end{defn}\ignorespaces%
53:          }
54: \newenvironment{example}{%
55:          %\vspace{-1mm}
56:          \begin{ex} \rm%
57:          }{%
58:          $\Box$\end{ex}\ignorespaces%
59:          %\vspace{-1mm}
60:          }
61: \newtheorem{theorem}{Theorem}
62: \newtheorem{lemma}{Lemma}
63: \newtheorem{ex}[theorem]{Example}
64: \newtheorem{defn}[theorem]{Definition}
65: 
66: \newenvironment{ttline}{\begin{trivlist}\small
67:                          \tt\item}{\end{trivlist}}
68: \newenvironment{ttprog}{\begin{trivlist}\small
69:                          \tt\item\begin{tabbing}}%
70:                        {\end{tabbing}\end{trivlist}}
71: 
72: 
73: \newcommand{\Inst}{\emph{Inst}}
74: \newcommand{\fresh}{\emph{fresh}}
75: \submitted{6 December 2002}
76: \revised{16 March 2004}
77: \accepted{15 September 2004}
78: 
79: \begin{document}
80: 
81: \title[Checking Modes of HAL Programs]{Checking Modes of HAL Programs\footnote{A preliminary version of this
82: paper appeared under the title ``Mode Checking in HAL,''
83:         in the {\em Conference on Computational Logic (CL'2000)},
84:         London, June 2000.
85: }}
86: 
87: \author[M.\ Garc\'{\i}a de la Banda et al.]{
88: MARIA GARCIA DE LA BANDA, 
89: WARWICK HARVEY, 
90: KIM MARRIOTT \\
91:   School of Computer Science \& Software Engineering,
92:   Monash University, Australia \\
93: \email{\{mbanda,marriott\}@csse.monash.edu.au}
94: \email{wh@icparc.ic.ac.uk}
95: \and PETER J. STUCKEY \\
96:  Department of Computer Science \& Software Engineering,
97:   University of Melbourne, Australia\\
98: \email{pjs@cs.mu.oz.au}
99: \and
100: BART DEMOEN \\
101: Department of Computer Science, 
102: Catholic University Leuven, Belgium\\
103: \email{bmd@cs.kuleuven.ac.be}
104: }
105: 
106: \maketitle
107: 
108: \textbf{Note:} This article is to published in
109: \emph{Theory and Practice of Logic Programming}.
110: \copyright Cambridge University Press.
111: 
112: \begin{abstract}
113: Recent constraint logic programming (CLP) languages, such as HAL and Mercury,
114: require type, mode and determinism
115: declarations for predicates. This information allows the generation of
116: efficient target code and the detection of many errors at compile-time.
117: Unfortunately, mode checking in such languages is difficult. One of the main 
118: reasons is that, for each predicate mode declaration,
119: the compiler is required to appropriately re-order literals in the
120: predicate's definition.
121: The task is further complicated by the need to 
122: handle complex instantiations
123: (which interact with type declarations and 
124: higher-order predicates) and automatic initialization of solver variables.
125: Here we define mode checking
126: for strongly typed CLP languages
127:  which require reordering of clause body literals. 
128: In addition, we show how to handle a simple case of 
129: polymorphic modes by using the
130: corresponding polymorphic types. 
131: \end{abstract}
132: \begin{keywords}
133: Strong modes, mode checking, regular grammars
134: \end{keywords}
135: 
136: \section{Introduction}
137: 
138: While traditional logic and constraint logic programming (CLP) languages are
139: untyped and unmoded, recent languages such as Mercury~\cite{mercury} and
140: HAL~\cite{cp99,flops2002} require type, mode and determinism declarations for
141: (exported) predicates. This information allows the generation
142: of efficient target code (e.g.\ mode information can provide 
143: an order of magnitude
144: speed improvement~\cite{iclp99}), improves robustness and facilitates
145: efficient integration with foreign language procedures.  
146: %Type information also allows predicate and function overloading to 
147: %be resolved at compile-time, thus resulting in natural syntax for constraints.
148: Here we describe our experience with mode checking in the
149: HAL compiler. 
150: 
151: HAL is a CLP language designed
152: to facilitate ``plug-and-play'' experimentation with different solvers.
153: %compiles to Mercury.It extends Mercury by 
154: To achieve this it provides support for 
155: user-defined constraint solvers, global variables
156: and dynamic scheduling. 
157: Mode checking in HAL
158: is one of the most complex stages in the compilation.
159: Since predicates can be given multiple mode declarations,
160: mode checking is performed for each of these modes and 
161: the compiler creates a 
162: specialized \emph{procedure} for each mode (i.e.\
163: it performs multi-variant specialization).
164: Mode checking involves traversing each predicate mode declaration
165: to check that if the predicate is called
166: with the input instantiation specified by the mode declaration then the
167: following two properties are satisfied. First, the predicate mode
168: declaration is \emph{input-output correct}, that is, 
169: it is guaranteed that if the input instantiation satisfies this
170: declaration then the result is an output instantiation that
171: satisfies the declaration.
172: And second, the predicate is
173: \emph{call correct}, 
174: that is, if the input instantiation satisfies this
175: declaration then each literal occurring in the definition of the
176: predicate is called with an input instantiation satisfying
177: one of its declared modes.
178: 
179: Call correctness may require the compiler to 
180: re-order literals in the body of each rule, 
181: so that literals are indeed called with an appropriate input instantiation. 
182: Such reordering is essential in logic programming
183: languages which wish to support multi-moded predicates while,
184: at the same time, retaining a Prolog programming style in which 
185: a single predicate definition is provided for all modes of usage. 
186: And an important function of this reordering is to  
187: appropriately order the equalities inserted by the compiler during program 
188: normalisation for matching/constructing 
189: non-variable predicate arguments.
190: The need to reorder rule bodies is one reason why
191: mode checking is a rather complex task.
192: However, it is not the only reason. Three other issues 
193: exacerbate the difficulty of mode checking.
194: First, 
195: instantiations (which describe the possible states of program variables) 
196: may be very complex and
197: interact with the type declarations. 
198: Second, accurate mode checking of higher-order predicates
199:  is difficult.
200: Third, the compiler needs to handle automatic initialization 
201: of solver variables.
202: 
203: Although
204: mode inference and checking of logic programs has been a fertile research
205: field for many years, 
206: almost all research has focused on
207: mode checking/inference in traditional (and thus untyped) 
208: logic programming languages where
209: the analysis assumes the given literal ordering is fixed
210: and cannot assume that a program is type correct.
211: Thus, a main contribution of this paper is
212: a complete definition of mode checking
213: in the context of CLP languages which are strongly typed
214: and which may require reordering of rule body literals during mode
215: checking. 
216: 
217: 
218: A second contribution of the paper is
219: to describe the algorithms for mode checking currently employed in the
220: HAL compiler. Since HAL and the logic programming language Mercury
221: share similar type and mode systems,\footnote{In part, because HAL is compiled 
222: to Mercury.} much of our description and formalization also applies to 
223: mode checking in Mercury (which has not been previously described\footnote{
224: Recently a thesis has been completed on Mercury mode checking~\cite{dmo}}). 
225: However, there
226: are significant differences between mode checking in the two languages.
227: In HAL  there is the need to
228:  handle automatic initialization of solver variables,
229: and, in general, complex modes (other than \texttt{in} and
230: \texttt{out}) are used more frequently since constraint solver variables are 
231: usually not ground.
232: Furthermore, determining the best reordering in HAL is more complex 
233: than in Mercury because the order in which constraints are 
234: solved can have a more
235: significant impact on efficiency \cite{3r}.
236: Also,  HAL handles a limited form of polymorphic mode checking.
237: On the other hand, Mercury's mode system allows the specification of
238: additional information about data structure liveness and usage.
239: 
240: The rest of the paper is organized as follows. In the following
241: section we review related work. Section \ref{sec:lang} provides an
242: informal view of the role of types, modes, and instantiations in the
243: HAL language. Its aim is to give insight into the more rigorous
244: formalization provided in section \ref{sec:trees} which introduces
245: type-instantiation grammars for combining type and instantiation
246: information as the basis for mode checking in HAL. Section
247: \ref{sec:basic} describes the basic steps performed for mode checking
248: HAL programs. Section \ref{sec:init} focuses on the automatic
249: initialization needed by the modes of usage of some predicates.
250: Section \ref{sec:higher} discusses mode checking of higher order
251: predicates and objects, while Section \ref{sec:poly} shows how to
252: handle simple polymorphic modes. Finally, Section \ref{sec:concl}
253: provides our conclusions and discusses some future work.
254: 
255: 
256: \section{Related Work}
257: 
258: Starting with~\cite{Mellish87,deb-acm89} there has been considerable
259: research into mode checking and inference in traditional logic
260: programming languages.  However, as indicated above, there are two fundamental
261: differences between that work and ours.
262: 
263: First, almost all research
264: assumes that mode analysis is not required to reorder clause bodies.
265: Second, while almost all
266: research has focused on untyped logic programming languages, mode
267: checking of HAL relies on predicates and program variables
268: having a single (parametric polymorphic) 
269: Hindley-Milner type and the type correctness of the program
270: with respect to this type.
271: Access to  type information allows us to handle
272: more complex instantiations than are usually considered in mode analysis and
273: also to handle  mode checking of higher-order
274: predicates in a more rigorous fashion: in most previous work 
275: higher-order predicates are largely ignored.
276: 
277: Another important difference is that we are dealing with constraint
278: logic programming languages in which program variables
279: need to be appropriately initialized before being sent to
280: some constraint solver as part of a constraint. 
281: Requiring explicit initialization of solver variables puts additional
282: burden on the programmer and makes it impossible to write
283: multi-moded predicate definitions for which different modes require 
284: different variable initialisations. We have consequently chosen for 
285: the HAL compiler to
286: {\em automatically} initialize solver variables, i.e.\ the compiler
287: generates initialization code whenever necessary.
288: %
289: In order to perform such automatic initialization
290: mode checking in HAL must track
291: which program variables are currently uninitialized (in our terminology are
292: \ntnew{}). 
293: Tracking of uninitialized variables 
294: also supports powerful optimizations which can greatly improve performance.
295: For this reason the Mercury mode checker also tracks uninitialized variables.
296: 
297: This need to track uninitialized program variables 
298: is a significant difference between mode checking
299: in the Mercury and HAL languages, and most logic
300: programming work on modes.  
301: It is not the same as tracking so-called ``free'' variables in traditional
302: logic programming: first free variables may be aliased to other variables,
303: something that is not possible with uninitialized variables, second,
304: uninitialized variables have to be tracked exactly:
305: %
306: %% the compiler must not
307: %% fail to initialize a variable, or initialize it more than once.
308: %
309: the compiler must not fail to initialize a variable, neither should it
310: initialize a variable more than once.
311: %
312: We will now review  selected related work in detail.
313: 
314: The original work on mode checking in strongly typed logic languages
315: with reorderable clause bodies is that of~\cite{Zoltan87}, which gives
316: an informal presentation of a mode system based on types. This is
317: perhaps the closest work in spirit since it was the basis of mode
318: checking in Mercury. However, its mode system is much simpler than
319: ours and it does not consider higher-order predicates or the problems of
320: automatic initialization.  The remaining work does not consider compile-time
321: reordering.
322: 
323: Perhaps the most closely related work in traditional logic programming
324: language analysis is the early work of~\cite{gerda} which uses regular
325: trees to define types and instantiations, and uses 
326: these trees to perform mode inference. Main differences 
327: are that~\cite{gerda} does not consider
328: reordering or tracking uninitialized variables. Other 
329: more technical differences are that,
330: although we use deterministic tree grammars to formalize types, our
331: type analysis~\cite{typec} is based on a Hindley-Milner approach. A
332: key difference with this and other work such as that of~\cite{mal}
333: is that we describe instantiations for polymorphic types, including
334: higher-order objects.  Also, in~\cite{gerda}
335: depth restrictions are imposed to make the generated regular trees
336: finite. This is not needed in our approach. Finally, they use definite
337: and possible sharing analysis to improve instantiation information.
338: This is not done yet in HAL for complexity reasons (sharing analysis
339: is quite expensive and thus a danger for practical compilation), however
340: a simple sharing and aliasing analysis should indeed prove to be
341: useful.
342: 
343: After the early work of~\cite{gerda}, there has been a significant
344: amount of research aimed at improving the precision of the analysis by
345: providing additional information about the structure of the terms.
346: Initially, this was achieved by performing some simple pattern
347: analysis and then providing this information to other analyses (see
348: for example, \cite{LeCharlier94:toplas,mulkers-iclp95}). Later, with
349: the gradual success of typed languages, pattern information was
350: substituted by type information with which more accurate results could
351: be obtained, i.e., type information was annotated with different kinds
352: of information some of which were mode information (see, for example,
353: \cite{rid-boiz99,smaus}). But most of this work was designed to either
354: provide a general framework for combining type information with other
355: kinds of information, or to infer some particular kind of information
356: (such as mode information) from a program without reordering the
357: literals in the body of predicates. Furthermore, they were not
358: interested in tracking uninitialized variables  nor keeping
359: enough instantiation information 
360: (i.e.\ which particular tree constructors can occur)
361: for optimizations such as switch
362: detection~\cite{mercury-switch}. Again, further differences arise
363: since we consider higher-order mode inference and polymorphic modes.
364: 
365: Recent work on \emph{directional types}~(see e.g.~\cite{mal}) 
366: is much more analogous to HAL mode checking.  
367: There, they are interested in determining mode-correctness of a program
368: given (user supplied) mode descriptions (called directional types).
369: Apart from previously mentioned differences, the framework
370: of~\cite{mal} uses directional types that are much simpler than
371: the instantiations that we deal with here.
372: %%PJS  .. \pjs{something}                 
373: Interestingly, 
374: the work of~\cite{mal} uses directional type correctness to show that
375: a run-time reordering of a well-typed program will not deadlock,
376: somewhat analogous to our compile-time reordering.
377: 
378: Type dependency analysis~\cite{mike} is also related to mode checking.
379: Their analysis determines 
380: type dependencies from which we can read all the 
381: correct modes or directional types of a program.  
382: The framework is however restricted to use types (and modes) defined by 
383: unary function symbols and an ACI operator.
384: 
385: Other related work has been on mode checking for concurrent logic
386: programming languages \cite{Codognet90}: 
387: There the emphasis has been on detecting
388: communication patterns and possible deadlocks.
389: 
390: The only other logic programming system we are aware of
391: which does significant mode checking is Ciao~\cite{CIAO}.
392: The Ciao logic programming system~\cite{CIAO} does mode
393: checking using its general assertion checking framework
394: CiaoPP based on abstract interpretation~\cite{SAS-CIAO}. 
395: Modes are considered as simply one form of assertion,
396: and indeed the notion of what is a mode is
397: completely redefinable.
398: The default modes are 
399: analyzed by the CiaoPP preprocessor using a combination of
400: regular type inference and groundness, freeness
401: and sharing analyses.  Ciao modes are more akin to directional types,
402: than the strong modes of HAL and Mercury, and the compiler will check
403: them if possible, and optionally add run-time tests for modes that
404: could not be checked at compile time.  
405: As with other earlier
406: work the fundamental differences with the HAL mode system are
407: in treatment of uninitialized variables, reordering, higher-order and
408: polymorphic modes.
409: 
410: \section{HAL by example}\label{sec:lang}
411: 
412: This section provides an informal view of the role of types, modes, and
413: instantiations in the HAL language. The aim is to provide insight into
414: the more rigorous formalization that will be provided in the following
415: sections. We do this by explaining the %main elements of the
416: example HAL program shown in Figure~\ref{fig:stack}, which implements a
417: polymorphic stack using lists. Note that HAL follows the basic CLP syntax,
418: with variables, rules and predicates defined as usual (see, for example,
419: \cite{CLPbook} for an introduction to CLP).
420: 
421: \begin{figure}
422: 
423: \begin{center}
424: \begin{ttprog}
425: :- typedef list(T) -> ([] ; [T|list(T)]). \\
426: \\
427: :- instdef elist -> []. \\
428: :- instdef list(I) -> ([]; [I|list(I)]). \\
429: :- instdef nelist(I) -> [I|list(I)]. \\
430: \\
431: :- modedef out(I) -> (new -> I). \\
432: :- modedef in(I) -> (I -> I). \\
433: \\
434: :- pred push(list(T),T,list(T)). \\
435: :- mode push(in,in,out(nelist(ground))) is det. \\
436: push(S0,E,S1) :- S1 = [E|S0]. \\
437: \\
438: :- pred pop(list(T),T,list(T)). \\
439: :- mode pop(in,out,out) is semidet. \\
440: :- mode pop(in(nelist(ground)),out,out) is det. \\
441: pop(S0,E,S1) :- S0 = [E|S1]. \\
442: \\
443: :- pred empty(list(T)). \\
444: :- mode empty(in) is semidet. \\
445: :- mode empty(out(elist)) is det. \\
446: empty(S) :- S = [].  \\
447: %% \\
448: %% :- pred stackadd(list(T), list(T), list(T)). \\
449: %% :- mode stackadd(in, in, out) is det. \\
450: %% :- mode stackadd(out, out, in) is nondet. \\
451: %% stackadd(X,Y,Z) :- X = [], Y = Z. \\
452: %% stackadd(X,Y,Z) :- X = [A|X1], Z = [A|Z1], stackadd(X1,Y,Z1).
453: \end{ttprog}
454: \end{center}
455: \caption{Example HAL program implementing a polymorphic stack.\label{fig:stack}}
456: 
457: \end{figure}
458: 
459: \subsection{Types}
460: 
461: %
462: %% Informally, a ground type describes the complete set of fixed 
463: %% values a program variable can take and
464: %% is, therefore, invariant over the life of the variable.  
465: %
466: Informally, a ground type describes a set of ground
467: terms and is used as a reasonable approximation of the
468: ground values a particular program variable can take. It is therefore
469: an invariant over the life time of the variable.
470: %
471: Types in HAL are prescriptive rather than descriptive, they restrict
472: the possible values of a variable.
473: Unlike much of the work performed on types for logic programming languages, 
474: our types only include the
475: ground (also called fixed) values that a variable can take.  
476: Later we will describe how instantiations are used to express when a 
477: variable takes a value which is not completely fixed.
478: 
479: Types are specified using type definition statements. 
480: For instance, in the example shown in
481: Figure~\ref{fig:stack}, the line
482: \begin{ttline}
483: :- typedef list(T) -> ([] ; [T|list(T)]).
484: \end{ttline}
485: defines the polymorphic type constructor \texttt{list/1} where \texttt{list(T)} is the
486: type of lists with elements of parametric type\footnote{In order to clearly distinguish
487:  between program variables and any other kinds of variables (type variables, instantiation variables, etc) we will refer to all other kinds of variables as parameters (i.e.,
488: type parameters, instantiation parameters, etc).} \texttt{T}. These lists are
489: made up using the \texttt{[]/0} and \texttt{./2} (represented by
490: \texttt{[$\cdot$|$\cdot$]}) tree constructors. 
491: 
492: HAL includes the usual set of built-in basic types: \texttt{float} 
493: (floating point numbers), \texttt{int} (integers), \texttt{char}
494: (characters) and \texttt{string} (strings). Like most typed languages, HAL
495: provides the means to define type equivalences. For example, the statement
496: \begin{ttline}
497: :- typedef vector = list(int).
498: \end{ttline}
499: defines the type \texttt{vector} to be a list of integers. Equivalence
500: types are simply macros for type expressions, and the compiler replaces
501: equivalence types by their definition (circular type equivalences are not
502: allowed). From now on we assume that equivalence types have been eliminated
503: from the type expressions we consider. This can be achieved
504: straightforwardly by applying substitution.
505: 
506: Finally, HAL allows a type to be declared 
507: as \emph{hidden} so that its definition
508: is not visible outside the module in which it is defined.  We note that the
509: treatment of hidden types is almost identical 
510: to that of type parameters and so omit
511: them for simplicity.
512: 
513: It is important to note that a program variable's type is used by 
514: a compiler to determine the
515: \emph{representation format} for that variable, i.e., the particular way
516: in which program variables are stored during execution. As a result,
517: %
518: %% program variables may have different types even though the set of values
519: %% they can take is in some sense identical.
520: %
521: two program variables may have different types
522: even though the representation of their values can be identical.
523: %
524: For example, in a language
525: providing both the ASCII character set and an extended
526: international character set, variables
527: representing each kind of character would need to have different types
528: since their internal representation is different. 
529: 
530: \subsection{Solvers}
531: 
532: In HAL a constraint solver is defined using a new type.
533: Assume for example, that a programmer wishes 
534: to implement a constraint solver over floating point numbers. 
535: From the point of view of the user, the variables will take
536: floating point values and 
537: thus one might expect them to have the built-in type
538: \texttt{float}.  
539: But their internal representation cannot be a float
540: as they need to keep track of internal information
541: for the solver.
542: As a result, the type of the variables cannot
543: be the built-in type \texttt{float} but must be some other type defined by the
544: solver, and whose implementation is hidden from the outside world.
545: This is were we use abstract types, to hide this view from the
546: outside world. 
547: 
548: \begin{example}\label{ex:cfloat}
549: For example a floating point solver type \texttt{cfloat}
550: might be defined as
551: \begin{ttline}
552: :- typedef cfloat -> var(int) ; val(float). 
553: \end{ttline}
554: where the integer in the \texttt{var} tree constructor 
555: refers to a column number in a (global) simplex tableaux,
556: and the \texttt{val} constructor is used to represent simple
557: fixed value floating point numbers.
558: \end{example}
559: 
560: %Note that then each solver must define one single type. 
561: Types defined by solvers 
562: are called \emph{solver types} and variables with a solver type are called
563: \emph{solver variables}.
564: Solvers must also provide an initialization procedure
565: (\texttt{init/1}) and at least the equality (\texttt{=/2}) constraint for
566: the type, although many other constraints will be usually
567: provided. Note that solver variables must be initialized before they can be
568: involved in any constraint. This is required so that the solver can
569: keep track of its variables and initialize the appropriate internal data-structures
570: for them.
571: 
572: The case of Herbrand solver types (i.e., types for which there is a
573: full unification solver) is somewhat special. Any user-defined type
574: can be declared to be a Herbrand solver type by annotating its type
575: definition with the words ``deriving solver''. For
576: example:
577: \begin{ttline}
578: :- typedef hlist(T) -> ([] ; [T|hlist(T)]) deriving solver.
579: \end{ttline}
580: defines the \texttt{hlist} Herbrand type. The compiler will then
581: automatically create an initialization predicate for the type (which
582: is actually identical for all Herbrand types) and an equality
583: predicate for the type which handles not only simple construction,
584: deconstruction and assignment of non-variable terms (which is the only
585: equality support provided for non-solver types), 
586: but full unification. As a
587: result, while variables with non-solver type \texttt{list} are always
588: required to be bound at run-time to a list of fixed length (so that the
589: limited support provided by construction, deconstruction and
590: assignment is enough),\footnote{Note that the elements inside the list need
591:   not be ground!} 
592: variables with type \texttt{hlist} may be bound to open
593: ended lists, where the tail of the list is an unbound (list)
594: variable.
595: 
596: 
597: \subsection{Instantiations}
598: 
599: Instantiations define the set of values, 
600: within a type, that a program variable may have at a
601: particular program point in the execution, as well
602: as the possibility that the variable (as yet) takes no value.
603: Instantiation information is vital to the compiler to determine
604: whether equations on terms are being used to
605: construct terms, deconstruct terms or check the equality of
606: two terms. Furthermore, instantiation information is needed to infer 
607: the determinism of predicates (i.e., how many answers a predicate has) 
608: and to perform many other low-level optimizations.
609: 
610: Although instantiations may seem very similar to types, 
611: they should not be confused: a type
612: is invariant over the life of the variable, while instantiations 
613: change.  Additionally, instantiations reflect the possibility
614: of a variable having no value yet, 
615: or being ``constrained'' to some unknown set of values.
616: 
617: HAL provides three \emph{base} instantiations for
618: a variable: {\tt ground}, {\tt old} and {\tt new}. A variable is
619: \texttt{ground} if it is known to have a unique value; the compiler might
620: not know exactly which value within the type (it might depend on the
621: particular execution), but it knows it is fixed (for a solver variable this
622: happens whenever the variable cannot be constrained further). 
623: 
624: A variable is \texttt{new} if it has not been initialized and it has
625: never appeared in a constraint (thus the name \texttt{new}). Thus, it
626: is known to take no value yet.  
627: As we have indicated, the instantiation \texttt{new} leads
628: to a crucial difference between mode checking in Mercury and HAL, 
629: and that investigated in most other research into mode checking
630: of logic programs.  Mercury and HAL demand that at each point
631: in execution the compiler knows whether a variable has a value or not.
632: This allows many compiler optimizations, and is a key to the
633: difference in execution speed of Mercury and HAL to most other logic
634: programming systems. The requirement to always have accurate
635: instantiation information about which variables are \texttt{new}
636: drives many of the decisions made in the mode checking system.
637: In particular, it means that a \texttt{new} variable is not allowed
638: to appear inside a data structure, and can only be given a value by
639: assignment or, if it is a solver variable, after initialization.
640: %%PJS\kim{added solver variable case} 
641: 
642: Finally, the instantiation \texttt{old} is used to describe a solver
643: variable that has been initialized but for which nothing is known
644: about its possible values.  Note that the variable might be
645: unconstrained, it might be ground, or anything in between (e.g.~be
646: greater than 5); the compiler simply does not know.  In the case in
647: which \texttt{old} is associated with a non-solver variable, it is
648: deemed to be equivalent to \texttt{ground}.  Note that in Mercury,
649: where there are no solver types, each variable always has an
650: instantiation which is either \texttt{new} or (a subset of)
651: \texttt{ground}.
652: 
653: %%PJS\kim{somewhat repetitive now}
654: It is important to note that \texttt{new} is not analogous to free in
655: the usual logic programming sense. A free variable in the HAL context
656: is an \texttt{old} variable (thus, it has been initialized by the
657: appropriate solver) which has never been bound to a non-variable term.
658: Thus, free variables might have been aliased, while \texttt{new}
659: variables cannot.
660: %
661: %% As a result, there cannot be two occurrences of a
662: %% \texttt{new} variable. Indeed, new variables need not be stored at run-time; 
663: %% only the value they will be assigned needs to be
664: %% stored.
665: %
666: This is exploited by the compiler by not giving a run-time
667: representation to new variables. As a consequence, a \texttt{new}
668: variable cannot occur syntactically more than once.
669: %
670: %% In the case of a solver variable
671: %% it also means that the variable has not been initialized yet (this is vital
672: %% to distinguish between initialized and non-initialized variables). 
673: 
674: For data structures such as trees or lists of solver variables, more
675: complex instantiation states may be used. These instantiations are
676: specified using instantiation definition statements 
677: which look very much like type definitions, the only
678: difference being that the arguments themselves are instantiations rather
679: than types.
680: For instance, in
681: the example shown in Figure~\ref{fig:stack}, the lines
682: \begin{ttline}
683: :- instdef elist -> []. \\
684: :- instdef list(I) -> ([] ; [I|list(I)]). \\
685: :- instdef nelist(I) -> [I|list(I)]. 
686: \end{ttline}
687: define the instantiation constructors \texttt{elist/0}, \texttt{list/1}
688: and \texttt{nelist/1}, which in the example are associated with variables of
689: type \texttt{list/1}. In that context, the instantiation \texttt{elist} describes
690: empty lists. The polymorphic instantiation \texttt{list(I)} describes lists
691: with elements of parametric instantiation \texttt{I} (note the deliberate reuse of the
692: type name). Finally, the instantiation \texttt{nelist(I)} describes
693: non-empty lists with elements of parametric instantiation \texttt{I}.
694: 
695: 
696: When associated with a variable, 
697: an instantiation requires the variable to be
698: bound to one of the outer-most 
699: functors in the right-hand-side of its
700: definition, and the arguments of the functor 
701: to satisfy the instantiation of the
702: corresponding arguments in the instantiation definition. 
703: In the case of \texttt{elist}, it would
704: mean the variable is \texttt{ground}. In the remaining two cases, it would
705: depend on the parametric instantiation \texttt{I}, but at the very least the variable
706: %% would be known to be a list of finite length.
707: would be known to be a nil-terminated list, i.e.\ its length is fixed.
708: 
709: Note that the separation of instantiation information from type information
710: means we can associate the same instantiation for different types.
711: For example, a program variable with solver type \texttt{hlist(int)} and instantiation 
712: \texttt{list(ground)} indicates that the program variable 
713: has a fixed length list as its value.
714: A program variable with non-solver type \texttt{list(int)} and instantiation 
715: \texttt{list(ground)} indicates the same, but since the type is not a solver type, 
716: this would always be the case.
717: %%PJS
718: The separation of instantiation information from type information
719: also makes the handling of 
720: polymorphic application much more  straightforward,
721: since we will simply associate a different type with the same instantiation.
722: 
723: 
724: As mentioned before, the instantiation \texttt{new} is not allowed to appear as an
725: argument of any other instantiation. As a result, a variable can only be inserted
726: in a data structure if it is either \texttt{ground} or initialized (and
727: thus must be \texttt{old}). The main reason for this is the requirement
728: for accurate mode information about \texttt{new} variables. It quickly
729: becomes very difficult to \emph{always} have correct instantiation
730: information about which variables (and parts of data structures) are
731: \texttt{new}. 
732: While sharing and aliasing analyses might allow us
733: to keep track which variables are \texttt{new} in more situations,
734: inevitably they lead to situations where we cannot determine whether
735: the value of a variable is \texttt{new} or not, which is not acceptable to
736: the compiler.  We do however plan to use sharing and aliasing analysis
737: to keep track of initialized (\texttt{old}) variables that 
738: have yet to be constrained (analogous to free variables in Prolog).
739: 
740: 
741: \subsection{Modes}
742: 
743: A mode is of the form $\Inst_1 \rightarrow \Inst_2$ where $\Inst_1$ describes
744: the \emph{call} (or input) instantiation and $\Inst_2$ describes the
745: \emph{success} (or output) instantiation. 
746: The {\em base} modes are mappings from one base instantiation to another:
747: we use two letter codes (\texttt{oo}, \texttt{no}, \texttt{og},
748: \texttt{gg}, \texttt{ng}) 
749: based on the first letter of the instantiation, e.g.~\texttt{ng} 
750: is \texttt{new}$\rightarrow$\texttt{ground}.
751: The usual modes \texttt{in}  and
752: \texttt{out} are also provided (as renamings of \texttt{gg} and \texttt{ng}, respectively).
753: 
754: Modes are specified using mode definition statements. For instance, in the
755: example shown in Figure~\ref{fig:stack}, the lines
756: \begin{ttline}
757: :- modedef out(I) -> (new -> I). \\
758: :- modedef in(I) -> (I -> I).
759: \end{ttline}
760: are mode definitions, defining macros for modes. 
761: The \texttt{out(I)}
762: mode requires a new object on call and returns an object with instantiation
763: \texttt{I}. The \texttt{in(I)} mode requires instantiation \texttt{I} on
764: call and has the same instantiation on success.
765: 
766: 
767: HAL allows the programmer to define mode equivalences and instantiation
768: equivalences. As for type equivalences, from now on we assume that these
769: equivalences have been eliminated from the program. 
770: For example
771: \begin{ttline}
772: :- modedef in = in(ground). \\
773: :- modedef out = out(ground).
774: \end{ttline}
775: define \texttt{in} as equivalent to \texttt{ground -> ground},
776: and \texttt{out} as \texttt{new -> ground}.
777: 
778: \subsection{Equality}
779: 
780: The equality constraint is a special predicate in HAL.  
781: Equality will be normalized in HAL programs to take one of two
782: forms $x_1 = x_2$, and $x = f(x_1, \ldots, x_n)$ where
783: $x, x_1, \ldots, x_n$ are variables. 
784: Each form of equality supports a number of modes.
785: 
786: The equality $x_1 = x_2$ can be used in two modes.
787: In the first mode, \textbf{copy} (\texttt{:=}), either $x_1$
788: or $x_2$ must be \textsf{new} and the other variable must not be
789: \textsf{new}.  Assuming $x_1$ is new the value of $x_2$ is copied
790: into $x_1$. In the second mode \textbf{unify} (\texttt{==}) 
791: both $x_1$ and $x_2$ must not be new. This requires a full unification.
792: 
793: The equality $x = f(x_1, \ldots, x_n)$ can also be used in
794: two modes.  In the first mode, \textbf{construct} (\texttt{:=}),
795: $x$ must be \textsf{new} and each of $x_1, \ldots, x_n$ not \textsf{new}.
796: A new $f$ structure is built on the heap, and the values
797: of $x_1, \ldots, x_n$ are copied into this structure.
798: In the second mode, \textbf{deconstruct} (\texttt{=:}),
799: each of $x_1, \ldots, x_n$ must be \textsf{new} and $x$ must not be
800: \textsf{new}. If $x$ is of the form $f(a_1, \ldots, a_n)$ then
801: the value of $a_i$ is copied into $x_i$,
802: otherwise the deconstruct fails.\footnote{This is a
803: simplistic high level view, 
804: actually the system uses PARMA bindings and things are more complicated.
805: See~\cite{iclp99} for details.}
806: As we shall see later, mode checking shall extend the use
807: of these modes for other implicit modes. 
808: 
809: 
810: \subsection{Predicate Declarations}
811: 
812: HAL allows the programmer to declare the type and modes
813: of usage of predicates. In our example of Figure~\ref{fig:stack}, the lines
814: \begin{ttline}
815: :- pred pop(list(T),T,list(T)). \\
816: :- mode pop(in,out,out) is semidet. \\
817: :- mode pop(in(nelist(ground)),out,out) is det. 
818: \end{ttline}
819: give such declarations for predicate \texttt{pop/3}.  
820: The first line is a
821: polymorphic type declaration (with parametric type \texttt{T}). It specifies the
822: types of each of the three arguments of \texttt{pop/3}.  The second and
823: third lines are mode declarations specifying the two different modes in
824: which the predicate can be executed. For example, in the 
825: first mode the first argument
826: is \texttt{ground} on call and success, while the second and third
827: arguments are \texttt{new} on call and \texttt{ground} on success.
828: 
829: Each mode declaration for a predicate defines a \emph{procedure},
830: a different way of executing the predicate. The role of mode checking
831: is not just to show these modes are correct, but also to reorder
832: conjunctions occurring in the predicate definition in order to
833: create these procedures.
834: 
835: 
836: 
837: The second and third lines also contain a determinism declaration.
838: These describe how many answers a predicate may have for a particular mode of usage:
839: \texttt{nondet} means any number of solutions; \texttt{multi} at least one
840: solution; \texttt{semidet} at most one solution; \texttt{det} exactly one
841: solution; \texttt{failure} no solutions; and \texttt{erroneous} a run-time
842: error.
843:  Thus, in the second line, since \texttt{pop/3} for this
844: mode of usage is guaranteed to have at most one solution  but might fail 
845: (when the first argument is an empty list), the determinism
846: is \texttt{semidet}.
847: For the second mode, the first argument is not only known to be ground but
848: also to be a non-empty list. As a result, the predicate can be ensured to
849: have exactly one solution and so its determinism is \texttt{det}.
850: Notice how by providing more complex instantiations we can improve the 
851: determinism information of the predicate.  They also lead to
852: more efficient code, since unnecessary checks (e.g.~that the
853: first argument of \texttt{pop/3} is bound to \texttt{./2}) 
854: are eliminated.
855: 
856: %% One of the reasons to allow the compiler to reorder code is to support
857: %% multi-moded predicates. For example the \texttt{stackadd/3} predicate
858: %% (which is just \texttt{append/3}) cannot be moded without reordering.
859: %% The two different modes lead to different orders of literals
860: %% (see Example~\ref{ex:stackadd}).
861: 
862: Currently, HAL requires predicate mode declarations for each predicate and
863: checks they are correct. Predicate type declarations, on the other hand,
864: can be omitted and, if so, will be inferred by the compiler~\cite{typec}.
865: 
866: 
867: \section{Type, Instantiation and Type-Instantiation Grammars}\label{sec:trees}
868: 
869: In this section we formalize
870: type and instantiation definitions in terms of (extended) regular
871: tree grammars. 
872: Then we introduce type-instantiation (ti-) grammars
873: which combine type and instantiation information
874: and are the basis for mode checking in HAL.
875: Throughout the section we will use \texttt{teletype}
876: font when referring to (fixed) type and
877: instantiation expressions, and 
878: \textsf{sans serif} font when referring
879: to non-terminals of tree grammars.
880: 
881: \subsection{HAL Programs}
882: 
883: We begin by defining basic terminology and HAL programs.
884: 
885: A \emph{signature} 
886: $\Sigma$ is a set of pairs $f/n$ where $f$ is a \emph{function symbol} and $n
887: \geq 0$ is the integer \emph{arity} of $f$.  
888: A function symbol with 0 arity is called a \emph{constant}.
889: Given a signature $\Sigma$ the set of
890: all \emph{trees} (the Herbrand Universe), 
891: denoted $\tau(\Sigma)$, is defined as the least set satisfying:
892: $$
893: \tau(\Sigma) = \bigcup_{f/n \in \Sigma} \{ f(t_1, \ldots, t_n) ~|~ 
894:                 \{t_1, \ldots, t_n\} \subseteq \tau(\Sigma) \}.
895: $$
896: We assume (for simplicity) that $\Sigma$ contains at least one constant
897: symbol (i.e.\ a symbol with arity 0).
898: 
899: Let $V$ be a set of symbols called \emph{variables}. 
900: The set of all \emph{terms} over $\Sigma$ and $V$, denoted
901: $\tau(\Sigma, V)$, is similarly defined as the least set satisfying:
902: $$
903: \tau(\Sigma, V) = V \cup \bigcup_{f/n \in \Sigma} \{ f(t_1, \ldots, t_n) ~|~ 
904:                 \{t_1, \ldots, t_n\} \subseteq \tau(\Sigma, V) \}
905: $$
906: 
907: A \emph{substitution} over signature $\Sigma$ and
908: variable set $V$ is a mapping from variables to terms in $\tau(\Sigma,V)$,
909: written $\{x_1/t_1, \ldots, x_n/t_n\}$.
910: We extend substitutions to map terms in the usual way.
911: A \emph{unifier} for two terms $t$ and $t'$ is a substitution 
912: $\theta$ such that $\theta(t)$ and $\theta(t')$ are syntactically identical.
913: A \emph{most general unifier} of two terms $t$ and $t'$, denoted $mgu(t,t')$
914: is a unifier $\theta$ such for every other unifier $\theta'$ of $t$ and $t'$
915: there exists a substitution $\theta''$ such that $\theta'$ is the composition
916: of $\theta$ with $\theta''$. 
917: %%PJS\kim{reworded to make this the standard definition}
918: %syntactically
919: %identical to the result of performing the composition $\theta'' \cdot
920: %\theta$.  
921: Note that the only substitutions we shall deal with are
922: over type and instantiation parameters.
923: 
924: As we will be dealing with programs, types and instantiations there
925: will be a number of signatures of interest.
926: Let $V_{prog}$ be the set of program variable symbols,
927: and $\Sigma_{tree}$ be the tree constructors appearing in the program,
928: and $\Sigma_{pred}$ be the predicate symbols appearing in the program.
929: Let $V_{type}$ and $\Sigma_{type}$ be the
930: type variables and type constructors, and similarly let
931: $V_{inst}$ and $\Sigma_{inst}$ be the instantiation variables
932: and instantiation constructors. Note that these alphabets may overlap.
933: 
934: An \emph{atom} is of the form $p(s_1, \ldots, s_n)$ 
935: where $\{s_1, \ldots, s_n\} \subseteq \tau(\Sigma_{tree}, V_{prog})$
936: and $p/n \in \Sigma_{pred}$.
937: A \emph{literal} is either an atom, 
938: a variable-variable equation $x_1 = x_2$ where
939: $\{x_1, x_2\} \subseteq V_{prog}$, or a variable-functor equation 
940: $x = f(x_1, \ldots, x_n)$ where $f/n \in \Sigma_{tree}$ and
941: $x, x_1, \ldots, x_n$ are distinct elements of $V_{prog}$.
942: A \emph{goal} $G$ is a literal, a conjunction of goals
943: $G_1, \ldots, G_n$, a disjunction of goals $G_1 ; \cdots ; G_n$
944: or an if-then-else 
945: $G_i \texttt{~->~} G_t ; G_e$ (where $G_i, G_e, G_t$ are goals). 
946: A \emph{predicate definition} is of the form
947: $A \texttt{~:-~} G$ where $A$ is an atom and $G$ is a goal.
948: 
949: 
950: Note that we are assuming the programs have been normalized,
951: so that each literal has distinct
952: variables as arguments, each equality is either of the form $x_1 = x_2$ 
953: or $x = f(x_1,\ldots,x_n)$, where $x,x_1,\ldots,x_n$ are distinct variables,
954: and multiple bodies for a single predicate have been replaced by 
955: one disjunctive body. 
956: 
957: A \emph{predicate type declaration} is of the form
958: \begin{center}\tt
959: :- pred $p(t_1, \ldots, t_n)$
960: \end{center}
961: where
962: $\{t_1, \ldots, t_n\} \subseteq \tau(\Sigma_{type},V_{type})$ are
963: type expressions.
964: A \emph{predicate mode declaration} is of the form
965: \begin{center}\tt
966: :- mode $p(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$ 
967: \end{center}
968: where 
969: $\{c_1, \ldots, c_n, s_1, \ldots, s_n\} 
970: \subseteq \tau(\Sigma_{inst})$ are ground
971: instantiation expressions.
972: A \emph{complete predicate definition} for  predicate  symbol $p/n \in \Sigma_{pred}$
973: consists of a
974: predicate definition, a predicate type declaration, and a non-empty set of
975: predicate mode declarations for  $p/n$.
976: A \emph{program} is a collection of complete predicate definitions
977: for distinct predicate symbols.
978: 
979: \subsection{Tree Grammars}
980: 
981: Tree grammars 
982: are a well understood formalism (see, for example, \cite{gs,tata97})
983: for defining regular tree languages.
984: We first review the standard definitions for tree grammars
985: since we shall have to
986: extend these
987: in order to handle the complexities of mode checking.
988:  
989: A \emph{tree grammar} 
990: $r$ over signature $\Sigma$ and
991: \emph{non-terminal set} $NT$ 
992: is a finite set of \emph{production rules} of 
993: the form $x \rightarrow t$ where
994: $x \in NT$ and $t$ is of the form $f(x_1, \ldots, x_n)$
995: where $f/n \in \Sigma$ and $\{x_1, \ldots, x_n\} \subseteq NT$.
996: For each $x \in NT$ and $f/n \in \Sigma$ we require
997: that there is at most one rule of the form
998: $x \rightarrow f(x_1, \ldots, x_n)$; hence the grammars
999: are \emph{deterministic}.
1000: 
1001: %% We have chosen
1002: %% to  restrict ourselves to  deterministic tree grammars
1003: %% because they are expressive enough for our purposes
1004: %% since we use Hindley-Milner types and because they give rise to
1005: %% simpler, more efficient algorithms---an important consideration for
1006: %% a compiler designed for large real-world programs.
1007: We have chosen to restrict ourselves to deterministic tree grammars:
1008: these grammars are expressive enough for Hindley-Milner types and they
1009: give rise to simpler, more efficient algorithms---an important
1010: consideration for a compiler designed for large real-world programs.
1011: %
1012: 
1013: We assume that from a grammar $r$ 
1014: we can determine its \emph{root non-terminal},
1015: denoted $root(r)$.  In reality this is an additional piece of
1016: information attached to each grammar.
1017: We shall write grammars so that the root non-terminal
1018: appears on the left hand side of
1019: the first production rule in $r$.
1020: 
1021: It will often be useful to extract a sub-grammar $r'$ from a 
1022: grammar $r$ defining some non-terminal $x$ appearing in $r$.  
1023: If $x$ is a non-terminal occurring in grammar
1024: $r$, then $subg(x,r)$ is the set of rules in $r$ for $x$ and
1025: all other non-terminals reachable from $x$. Or more precisely, 
1026: $subg(x,r)$ is the smallest 
1027: set of rules satisfying
1028: \begin{eqnarray*}
1029: subg(x,r) & \supseteq & \{ x \rightarrow t \in r \} \\
1030: subg(x,r) & \supseteq & \{ x' \rightarrow t \in r ~|~ x' \in NT, 
1031:                       \exists x'' \rightarrow g(x''_1, \ldots, x', \ldots,
1032:                    x''_m) \in subg(x,r)\}
1033: \end{eqnarray*}
1034: The root of the grammar $subg(x,r)$ is $x$, i.e.\ $root(subg(x,r)) = x$.
1035: 
1036: \begin{example}\label{ex:gs}
1037: Consider the signature 
1038: $\{[]/0, \textrm{`}.\textrm{'}/2, a/0, b/0, c/0, d/0\}$ and the
1039: non-terminal set \{\nt{abc}, $\nt{list(abc)}$, $\nt{bcd}$, 
1040: $\nt{evenlist(bcd)}$, $\nt{oddlist(bcd)}$\}, 
1041: then two example regular tree
1042: grammars over this signature and non-terminal set are $r_1$:
1043: \begin{grammar}
1044: \nt{list(abc)} & \rightarrow & [] \\
1045: \nt{list(abc)} & \rightarrow & [\nt{abc} | \nt{list(abc)}] \\
1046: \nt{abc} & \rightarrow & a \\
1047: \nt{abc} & \rightarrow & b \\
1048: \nt{abc} & \rightarrow & c \\
1049: \end{grammar}
1050: and $r_2$:
1051: \begin{grammar}
1052: \nt{evenlist(bcd)} & \rightarrow & [] \\
1053: \nt{evenlist(bcd)} & \rightarrow & [ \nt{bcd} | \nt{oddlist(bcd)} ] \\
1054: \nt{oddlist(bcd)} & \rightarrow & [ \nt{bcd} | \nt{evenlist(bcd)} ] \\
1055: \nt{bcd} & \rightarrow & b \\
1056: \nt{bcd} & \rightarrow & c \\
1057: \nt{bcd} & \rightarrow & d \\
1058: \end{grammar}
1059: The root non-terminal of $r_1$ is \nt{list(abc)}, while
1060: the root non-terminal of $r_2$ is \nt{evenlist(bcd)}.
1061: The grammar $subg(\nt{abc}, r_1)$ consists of  the last three rules of $r_1$
1062: while the grammar $subg(\nt{oddlist(bcd)}, r_2)$ includes all of the
1063: rules of $r_2$ but we would write the third rule in the first position,
1064: to indicate the root non-terminal was \nt{oddlist(bcd)}.
1065: \end{example}
1066: 
1067: A production of form  $x \rightarrow s$ in some
1068: grammar $r$ can be used to rewrite a term $t \in \tau(\Sigma,NT)$
1069: containing an occurrence of $x$  to the term $t' \in \tau(\Sigma,NT)$ 
1070: where $t'$ is obtained by
1071: replacing the occurrence of $x$ in $t$ by $s$. This is called
1072: a  \emph{derivation step} and is denoted by
1073: $t \Rightarrow t'$. 
1074: We let $\Rightarrow^*$ be the transitive,
1075: reflexive closure of $\Rightarrow$.
1076: The \emph{language generated} by $r$, denoted by $\oo{} r \cc{}$,
1077: is the set 
1078: $$\{ t \in \tau(\Sigma) \mid  root(r) \Rightarrow^* t \}$$
1079: 
1080: 
1081: 
1082: \begin{example}
1083: For example, consider the grammars of Example~\ref{ex:gs}.
1084: The set $\oo r_1 \cc$ is all lists of $a$'s, $b$'s and $c$'s, while
1085: $\oo r_2 \cc$ is all even length lists of 
1086: $b$'s, $c$'s and $d$'s.
1087: \end{example}
1088: 
1089: For brevity we shall often write tree grammars in a more compressed form.
1090: We use
1091: $$
1092: x \rightarrow t_1 ; t_2 ; \cdots ; t_n
1093: $$
1094: as shorthand for the set of production rules:
1095: $x \rightarrow t_1$, $x \rightarrow t_2$, \ldots, $x \rightarrow t_n$.
1096: 
1097: The $\oo{} \cdot \cc{}$ function 
1098: induces a pre-order on 
1099: tree grammars: $r_1 \preceq r_2$ 
1100: iff $\oo{} r_1 \cc{} \subseteq \oo{} r_2 \cc{}$. 
1101: If
1102: we regard grammars with the same language as equivalent,  
1103: $\preceq$ gives rise
1104: to a natural partial order over these equivalence classes
1105: of tree grammars. In fact they form a lattice.
1106: However, we shall largely ignore 
1107: these equivalence classes since all of our operations work on concrete
1108: grammars. 
1109: 
1110: We shall also make use of two special grammars. The first is the
1111: \emph{least tree grammar}, which we denote by $\bot$.
1112: We define that $\oo \bot \cc = \emptyset$, and so, as its name suggests
1113: we have that $\bot \preceq r$ for all grammars $r$.
1114: During mode checking the $\bot$ grammar indicates where 
1115: execution is known to fail. 
1116: The second special grammar is the \emph{error grammar},
1117: denoted by $\ourtop$. It is used to indicate that a mode error has occurred
1118: and we define that $r \preceq \ourtop$ for all tree grammars $r$.
1119:  
1120: We use $\sqcap$ to 
1121: denote the meet (i.e.\ greatest  lower bound) operator 
1122: on grammars, 
1123: and $\sqcup$ to denote the join (i.e.\ least upper bound)
1124: operator. %\kim{Should we make this precise?}
1125: We assume that the non-terminals appearing in the two grammars
1126: to be operated on are renamed apart. 
1127: 
1128: We have that  $\oo{} r_1 \sqcap
1129: r_2 \cc{} = \oo{} r_1 \cc{} \cap \oo{} r_2 \cc{}$.  Because we restrict
1130: ourselves to deterministic tree grammars 
1131: the join is inexact: That is to say, 
1132: $\oo{} r_1 \sqcup r_2 \cc{} \supseteq \oo{} r_1 \cc{} \cup
1133: \oo{} r_2 \cc{}$, and for some $r_1$ and $r_2$,
1134: $\oo{} r_1 \sqcup r_2 \cc{} \ne \oo{} r_1 \cc{} \cup
1135: \oo{} r_2 \cc{}$.
1136: Of course, since it is the join, it is as precise as possible:
1137: for any grammar $r$ such that 
1138: $\oo{} r \cc{} \supseteq \oo{} r_1 \cc{} \cup\oo{} r_2 \cc{}$, we have that 
1139: $\oo{} r \cc{} \supseteq \oo{} r_1 \sqcup r_2 \cc{}$.
1140: 
1141: Algorithms for determining if $r_1 \preceq
1142: r_2$, and constructing $r_1 \sqcap r_2$ and $r_1 \sqcup r_2$
1143: are straightforward and omitted.\footnote{The final operations of interest are
1144: given in the appendix.}
1145: 
1146: \begin{example}
1147: Consider the grammars  $r_1$ and $r_2$ of Example~\ref{ex:gs}.
1148: Their meet $r_1 \sqcap r_2$ is:
1149: \begin{grammar}
1150: \nt{meet(list(abc),evenlist(bcd))} & \rightarrow & []  ~;~ [ \nt{meet(abc,bcd)} ~|~ \nt{meet(list(abc),oddlist(bcd))}] \\
1151: \nt{meet(abc,bcd)} & \rightarrow & b   ~;~ c \\
1152: \nt{meet(list(abc),oddlist(bcd))} & \rightarrow & [ \nt{meet(abc,bcd)} ~|~ \nt{meet(list(abc),evenlist(bcd))} ] \\
1153: \end{grammar}
1154: while their join $r_1 \sqcup r_2$ is:
1155: \begin{grammar}
1156: \nt{join(list(abc),evenlist(bcd))} & \rightarrow & []  ~;~ [ \nt{join(abc,bcd)} ~|~ \nt{join(list(abc),oddlist(bcd))}] \\
1157: \nt{join(abc,bcd)} & \rightarrow & a ~;~ b  ~;~ c ~;~ d \\
1158: \nt{join(list(abc),oddlist(bcd))} & \rightarrow & []  ~;~ [ \nt{join(abc,bcd)} ~|~ \nt{join(list(abc),evenlist(bcd))} ] \\
1159: \end{grammar}
1160: Note that the language generated by the grammar $r_1 \sqcup r_2$ 
1161: could be represented with fewer rules. In the compiler there is no 
1162: effort to build minimal representations of grammars since 
1163: non-minimal grammars do not seem to occur that often in practice.
1164: \end{example}
1165: 
1166: 
1167: 
1168: 
1169: \subsection{Types} 
1170: 
1171: Types in HAL are polymorphic Hindley-Milner types.
1172: \emph{Type expressions} (or \emph{types}) are 
1173: terms in the language $\tau(\Sigma_{type}, V_{type})$
1174: where $\Sigma_{type}$ are \emph{type constructors} and
1175: variables $V_{type}$ are \emph{type parameters}. 
1176: Each type constructor $f/n \in \Sigma_{type}$ must have
1177: a definition.
1178: 
1179: \begin{definition}
1180: A \emph{type definition} for $f/n \in \Sigma_{type}$ 
1181: is of the form
1182: 
1183: \begin{center}
1184: {\tt :- typedef~} $f(v_1, \ldots, v_n)$ {\tt ~->~} $(f_1(t_1^1,
1185:   \ldots, t^1_{m_1}) ; \cdots ; f_k(t^k_1, \ldots, t^k_{m_k}))$. 
1186: \end{center}
1187: 
1188: \noindent
1189: where $v_1, \ldots, v_n$ 
1190:         are distinct type parameters, 
1191: $\{f_1/m_1, \ldots, f_k/m_k\}   \subseteq \Sigma_{tree}$
1192:   are distinct tree constructor/arity pairs, 
1193:   and $t_1^1, \ldots, t^k_{m_k}$ are type
1194:   expressions involving at most parameters $v_1, \ldots, v_n$.
1195: The type definition for $f/n$ 
1196: may optionally have \texttt{deriving solver}
1197: appended. If so then types of the form $f(t_1, \ldots, t_n)$
1198: are \emph{solver types}, otherwise they are \emph{non-solver types}. 
1199: \end{definition}
1200: 
1201: Clearly, the type definition for $f$ can be viewed 
1202: as simply a set of production rules over signature $\Sigma_{tree}$ 
1203: and non-terminal set
1204: $\tau(\Sigma_{type},V_{type})$.
1205: 
1206: We can associate with each (non-parameter) 
1207: type expression the production 
1208: rules that define the topmost symbol of the type.
1209: Let $t$ be a type expression of the form $f(t_1, \ldots, t_n)$
1210: and let $f/n$ have type definition
1211: 
1212: \begin{center}
1213: {\tt :- typedef~} $f(v_1, \ldots, v_n)$ {\tt ~->~} $(f_1(t_1^1,
1214:   \ldots, t^1_{m_1}) ; \cdots ; f_k(t^k_1, \ldots, t^k_{m_k}))$. 
1215: \end{center}
1216: 
1217: \noindent
1218: We define $rules(t)$ to be the production rules:
1219: $$\theta(f(v_1, \ldots, v_n)) \rightarrow (f_1(\theta(t_1^1),
1220:   \ldots, \theta(t^1_{m_1})) ; \cdots ; f_k(\theta(t^k_1), \ldots,
1221: \theta(t^k_{m_k})))$$
1222: where $\theta = \{v_1/t_1, \ldots, v_n/t_n\}$.
1223: If $t \in V_{type}$ we define $rules(t)$ to be the empty set.
1224: 
1225: We can extend this notation to associate a tree grammar with
1226: a type expression. 
1227: Let $grammar(t)$ be the least set of production rules such that:
1228: \begin{eqnarray*}
1229: grammar(t) & \supseteq & rules(t) \\
1230: grammar(t) & \supseteq & \bigcup \{ rules(t') ~|~ \exists x' \rightarrow
1231: g(t'_1, \ldots, t', \ldots, t'_m) \in grammar(t)) \}
1232: \end{eqnarray*}
1233: We assume that $root(grammar(t)) = t$.
1234: Note at this point we make no distinction between solver types
1235: and non-solver types; this will only occur once we consider
1236: instantiations.
1237: 
1238: 
1239: In order to avoid type expressions that
1240: depend on an infinite number of types 
1241: we restrict the type definitions to 
1242: be \emph{regular}~\cite{mycroft}.
1243: %
1244: A type $t$ is \emph{regular} if $grammar(t)$ is finite.\footnote{Note that 
1245: non-regular types are rarely used (although see~\cite{okasaki}). 
1246: The compiler could be extended
1247: to support mode checking 
1248: for non-regular types as long as we keep the restriction to
1249: regular instantiations.}
1250: 
1251: Consider for example the non-regular type definition:
1252: \begin{ttline}
1253: :- typedef erk(T) -> node(erk(list(T)), T).
1254: \end{ttline}
1255: The meaning of the 
1256: type \texttt{erk(int)} depends on the meaning of the
1257: type \texttt{erk(list(int))}, which depends on the meaning of the
1258: type \texttt{erk(list(list(int)))}, etc. 
1259: By restricting to regular types we are guaranteed that each type
1260: expression only involves a finite number of types.
1261: 
1262: A \emph{ground type expression} $t$ 
1263: is an element of $\tau(\Sigma_{type})$.
1264: The grammar corresponding to ground type expression $t$ 
1265: defines the meaning of the type expression as
1266: a set of trees ($\oo grammar(t) \cc$). 
1267: Note that during run-time every variable (for each invocation of a
1268: predicate) has a unique ground 
1269: type in $\tau(\Sigma_{type})$.
1270: 
1271: \begin{example}
1272: Given the type definitions:
1273: \begin{ttprog}
1274: :- typedef abc -> a ; b ; c. \\
1275: :- typedef list(T) -> [] ; [T | list(T)].
1276: \end{ttprog}
1277: then the grammar $r_1$ shown in Example~\ref{ex:gs} is 
1278: $grammar(\texttt{list(abc)})$. 
1279: The set $\oo r_1 \cc$ is the set 
1280: of lists of \texttt{a}'s, \texttt{b}'s and \texttt{c}'s.
1281: The grammar $grammar(\texttt{list(T)})$
1282: is
1283: \begin{grammar}
1284: \nt{list($T$)} & \rightarrow & [] ~;~ [\nt{$T$}|\nt{list($T$)}] 
1285: \end{grammar}
1286: The set $\oo grammar(\texttt{list(T)}) \cc = \{ [] \}$.
1287: \end{example}
1288: 
1289: Note that the grammars corresponding to non-ground type expressions are
1290: not very interesting, as illustrated in the above example.
1291: We can think of a non-ground type expression as a mapping from
1292: grounding substitutions to (ground) types whose meaning is then given
1293: by their corresponding grammar.
1294: 
1295: 
1296: The built-in types \texttt{float}, \texttt{int}, \texttt{char} and
1297: \texttt{string} are conceptually expressible as (possibly infinite) 
1298: tree grammars. 
1299: For example, \texttt{int} can be thought of as
1300: having the (infinite) definition:
1301: \begin{ttline}
1302: :- typedef int -> 0 ; 1 ; -1 ; 2 ; -2 ; 3 ; ...
1303: \end{ttline}
1304: Though the infinite number of children will render some of the algorithms
1305: on tree grammars ineffective this is easily avoided in the compiler 
1306: by treating the type expressions specially (we omit details
1307: in our algorithms since it is straightforward). 
1308: 
1309: Note that in HAL, type inference and checking is performed using a
1310: constraint-based Hindley-Milner 
1311: approach on the type expressions~\cite{typec}.  
1312: In this paper we assume that type analysis
1313: has been performed previously, and there are no type errors.
1314: For the purposes of mode
1315: checking the type correctness of a program has four main consequences.
1316: First, each program variable is known to have a unique polymorphic
1317: type. Second, all values taken 
1318: by a variable during the execution are known to be
1319: members of this type.  Third, calls to a polymorphic 
1320: predicate are guaranteed to have an equal or more specific type than that
1321: of the predicate. Fourth, all type parameters appearing in the type
1322: of a variable in the body of a predicate are known to also appear in the type
1323: of some variable in the head of the predicate. Together, these
1324: guarantee that whenever we compare grammars during mode checking, they
1325: correspond to exactly the same type.\footnote{Even mode checking
1326: a call to a polymorphic 
1327: predicate will use the calling type, which may be more specific than the
1328: predicate's declared type.}  
1329: This is used to substantially
1330: simplify the algorithms for mode checking (see, for example, the re-definition of 
1331: function $\preceq$ in Section~\ref{sec:type-inst}, and the assumption on the 
1332: existence of type environment 
1333: $\theta$ at the beginning of Section~\ref{sec:sched-ho}).
1334: 
1335: 
1336: \subsection{Values}
1337: 
1338: Types only express sets of fixed values (subsets of $\tau(\Sigma_{tree})$).
1339: However, during execution variables do not always have a fixed value
1340: and it is the role of mode checking to track these changes in variable
1341: instantiation.
1342: Thus, in order to perform mode checking we need to introduce special 
1343: constants, \#\texttt{fresh}\# and \#\texttt{var}\#, to represent the
1344: two kinds of non-fixed values that a program variable can have during
1345: execution.
1346: 
1347: The \#\texttt{fresh}\# constant is used to represent that a 
1348: program variable takes no value (i.e.,~it has not been initialized),
1349: and corresponds to the \ttnew{} instantiation.  
1350: Note that in HAL there is no run-time representation for \#\texttt{fresh}\# 
1351: variables. As a result, the compiler needs to know at all times 
1352: whether a variable is \ttnew{} or not. Thus, 
1353: any tree language including
1354: \#\texttt{fresh}\# and some other term is not a valid
1355: description of the values of a program variable.  
1356: 
1357: The \#\texttt{var}\# constant is used to represent a 
1358: program variable (or part of a value) that has been initialized but
1359: not further constrained. It corresponds to a ``free'' variable 
1360: in the usual logic
1361: programming sense. 
1362:  The \#\texttt{var}\# constructor will arise in 
1363: descriptions of \ttold{} instantiations, where we can define values which
1364: are not fixed.  Of course it will only make sense for variables of 
1365: solver types to take on this value. 
1366: 
1367: The values that a variable can take are thus represented by trees in
1368: $\tau(\Sigma_{tree} \cup \{\#\texttt{var}\#\}) \cup \{ \#\texttt{fresh}\# \}$.
1369: 
1370: \subsection{Instantiations}
1371: 
1372: A type expression by itself represents a set of
1373: fixed values. An instantiation by itself has little meaning, it is just a term
1374: in the language of expressions. Its meaning is only defined when it
1375: is considered in the context of a type expression.  For instance, the 
1376: meaning of \ttground{} depends upon the type of the variable it is 
1377: referring to.
1378: 
1379: In the following section we define a function $\func{rt}(t, i)$ 
1380: which takes a type expression $t$ and an instantiation $i$ and 
1381: returns a tree-grammar defining the set of (possibly non-fixed) 
1382: values that a program variable with the given type and instantiation can
1383: take. 
1384: In this section we define 
1385: the function $\func{base}(t, i)$ which is the function
1386: $\func{rt}(t,i)$ for the particular case in which $i$ is a base instantiation.
1387: In order to avoid name clashes, the
1388: function creates a unique non-terminal grammar symbol
1389: $ti(t,base)$ for the type $t$ and base instantiation $base$
1390: with which it is called and returns this together with the
1391: grammar for $t$ and $base$.
1392: The symbol $ti(t,i)$ represents the root of the tree-grammar
1393: which defines the possible values of a variable of type $t$ and
1394: instantiation $i$.
1395: 
1396: When a program variable is \ttnew{} it can only have one possible
1397: value, \#\texttt{fresh}\#. Hence the grammar returned by
1398: $\func{base}(t, \ttnew)$ for any type $t$ is simply
1399: \begin{grammar}
1400: ti(t,\ntnew) & \rightarrow & \#\texttt{fresh}\#
1401: \end{grammar}
1402: In a slight abuse of notation we will use \ntnew{} to refer to this grammar.
1403: 
1404: 
1405: When a program variable is \ttground{} it can take any fixed value.
1406: If the type $t$ of the variable is ground, then $\func{base}(t,\ttground{})$ 
1407: is identical to the grammar defining its type ($grammar(t)$). 
1408: Type parameters complicate this somewhat. 
1409: Since we are going to reason about
1410: the values of variables with non-ground types we need a way of representing
1411: the possible ground values of a type parameter. We introduce new 
1412: constants of the form $\$\ttground(v)\$$ 
1413: where $v \in V_{type}$ 
1414: to represent these languages. 
1415: So for $t \in V_{type}$ the grammar $\func{base}(t,\ttground)$ is defined as 
1416: \begin{grammar} 
1417: ti(t,\ttground) & \rightarrow & \$\ttground(t)\$
1418: \end{grammar}
1419: For arbitrary types $t$, $\func{base}(t,\ttground)$ is defined as
1420: the union of the rules
1421: \begin{grammar}
1422: ti(t',\ttground)  & \rightarrow & f(ti(t_1,\ttground), \ldots,
1423:  ti(t_n,\ttground))
1424: \end{grammar}
1425: for each $t' \rightarrow f(t_1, \ldots, t_n)$ occurring in $grammar(t)$,
1426: with
1427: \begin{grammar}
1428: ti(t',\ttground) & \rightarrow & \$\ttground(t')\$ 
1429: \end{grammar}
1430: for each $t' \in V_{type}$ occurring in $grammar(t)$.
1431: 
1432: Conceptually, the new constant $\$\ttground(v)\$$
1433: is a place holder for the grammar 
1434: $\func{base}(t',\ttground)$ obtained if 
1435: $v$ were replaced by the ground type $t'$.
1436: 
1437: When a program variable is \ttold{} it can take any initialized value.
1438: This will have a different effect on the
1439: parts of the type which are solver types themselves
1440: and on those which are not.
1441: Non-solver types do not allow the 
1442: possibility of taking an initialized but unbound value 
1443: (represented by the value \#\texttt{var}\#). 
1444: Thus, for solver types $t$ we shall add a production rule 
1445: $t \rightarrow \#\texttt{var}\#$ to the usual rules defining
1446: the type, while non-solver types remain unchanged. 
1447: In order to handle type parameters we introduce another set of
1448: constants $\$\ttold(v)\$$ where $v \in V_{type}$. Each constant is simply a 
1449: place holder for $\func{base}(t',\ttold)$ obtained if $v$ were replaced by 
1450: the ground type $t'$. Thus, $\func{base}(t,\ttold)$ for $t \in V_{type}$ 
1451: is defined as 
1452: \begin{grammar} 
1453: ti(\nt{t},\ttold) & \rightarrow & \$\ttground(t)\$ ~;~ \$\ttold(t)\$
1454: \end{grammar}
1455: and 
1456: otherwise $\func{base}(t,\ttold)$ is defined as the rules
1457: \begin{grammar}
1458: ti(t',\ttold)  & \rightarrow & f(ti(t_1,\ttold), \ldots, ti(t_n,\ttold))
1459: \end{grammar}
1460: for each rule $t' \rightarrow f(t_1, \ldots, t_n)$ in $grammar(t)$,
1461: together with
1462: \begin{grammar}
1463: ti(t',\ttold) & \rightarrow &\#\texttt{var}\#
1464: \end{grammar}
1465: for each solver type $t'$ occurring in $grammar(t)$,
1466: and
1467: \begin{grammar}
1468: ti(t',\ttold) & \rightarrow & \$\ttground(t')\$ ~;~ \$\ttold(t')\$ 
1469: \end{grammar}
1470: for each type variable $t' \in V_{type}$ occurring in $grammar(t)$.
1471: 
1472: The reason we represent an \texttt{old} variable of type $t$ 
1473: using both the $\$\ttground(t')\$$ and $\$\ttold(t')\$$,
1474: is that then a \texttt{ground} variable of type $t$ defines
1475: a sublanguage. This will simplify many algorithms.
1476: 
1477: 
1478: \begin{example}\label{ex:labcs}
1479: Given the type definitions:
1480: \begin{ttprog}
1481: :- typedef abc -> a ; b ; c. \\
1482: :- typedef hlist(T) -> [] ; [T | hlist(T)] deriving solver.
1483: \end{ttprog}
1484: Then $\emph{olabc1} = \func{base}(\texttt{hlist(abc)},\texttt{old})$ is the 
1485: grammar:
1486: \begin{grammar}
1487: ti(\nt{hlist(abc)},\ttold) & \rightarrow & []  ~;~~ [ ti(\nt{abc},\ttold) ~|~ ti(\nt{hlist(abc)},\ttold)] ~;~ \#\texttt{var}\# \\
1488: ti(\nt{abc},\ttold) & \rightarrow & a ~;~ b   ~;~ c
1489: \end{grammar}
1490: 
1491: The set $\oo{} \emph{olabc1} \cc{}$ 
1492: includes the values $[], [a | \#\texttt{var}\# ],
1493: [b], [b,a,c,a | \#\texttt{var}\# ]$.
1494: The symbol \#\texttt{var}\# represents an uninstantiated 
1495: %possible 
1496: %positions of \texttt{hlist(abc)} 
1497: variable, and so the second and fourth values are open-ended lists.
1498: 
1499: As another example,
1500: imagine we swap which type is a solver type. That is,
1501: suppose we have definitions
1502: \begin{ttprog}
1503: :- typedef habc -> a ; b ; c deriving solver. \\
1504: :- typedef list(T) -> [] ; [T | list(T)].
1505: \end{ttprog}
1506: Then $\emph{olabc2} = \func{base}(\texttt{list(habc)},\texttt{old})$ is the grammar:
1507: \begin{grammar}
1508: ti(\nt{list(habc)},\ttold) & \rightarrow & []  ~;~~ [ ti(\nt{habc},\ttold)  ~|~ ti(\nt{list(habc)},\ttold)] \\
1509: ti(\nt{habc},\ttold) & \rightarrow & a ~;~ b   ~;~ c ~;~ \#\texttt{var}\# 
1510: \end{grammar}
1511: The set $\oo{} \emph{olabc2} \cc{}$ 
1512: includes the values $[], [a], [\#\texttt{var}\#, b, \#\texttt{var}\#]$ 
1513: which are all fixed-length lists whose elements may be variables. 
1514: Note that the two occurrences of the symbol \#\texttt{var}\#
1515: in the last tree do not necessarily represent the same solver variable.
1516: 
1517: Finally $\func{base}(\texttt{hlist(T)}, \texttt{old})$ is (using the first
1518: definition) 
1519: \begin{grammar}
1520: ti(\nt{hlist($T$)},\ttold) & \rightarrow & []  ~;~~ [ ti(T,\ttold) ~|~ ti(\nt{hlist($T$)},\ttold) ] ~;~ \#\texttt{var}\# \\
1521: ti(T,\ttold) & \rightarrow &  \$\texttt{ground}(T)\$ ~;~ \$\ttold(T)\$
1522: \end{grammar}
1523: \end{example}
1524: 
1525: 
1526: Let us now consider instantiations in general, rather than only base instantiations.
1527: \emph{Instantiation expressions} (or \emph{instantiations}) 
1528: are terms in the language $\tau(\Sigma_{inst}, V_{inst})$
1529: where $\Sigma_{inst}$ are \emph{instantiation constructors} and
1530: variables $V_{inst}$ are \emph{instantiation parameters}. 
1531: Each instantiation 
1532: constructor $g/n \in \Sigma_{inst}$ must have
1533: a definition.
1534: Often, we will overload functors as both type and 
1535: instantiation constructors
1536: (so $\Sigma_{type}$ and $\Sigma_{inst}$ are not disjoint).
1537: The base instantiations (\texttt{ground}, \texttt{old} and
1538: \texttt{new}) are simply special 0-ary elements of $\Sigma_{inst}$.
1539: 
1540: \begin{definition}
1541: An \emph{instantiation definition} for $g$ is of the form:
1542: 
1543: \begin{center}
1544: {\tt :- instdef~} $g(w_1, \ldots, w_n)$ {\tt ~->~}  $(g_1(i_1^1,
1545:   \ldots, i^1_{m_1}) ; \cdots ; g_k(i^k_1, \ldots, i^k_{m_k})).$
1546: \end{center}
1547: 
1548: \noindent
1549:   where $w_1, \ldots, w_n$ are distinct instantiation 
1550:   parameters, 
1551:   $\{g_1/m_1, \ldots, g_k/m_k\} \subseteq \Sigma_{tree}$
1552:   are distinct tree constructors, and $i_1^1, \ldots, i^k_{m_k}$ 
1553:   are instantiation expressions other than 
1554: \texttt{new}\footnote{As mentioned before, disallowing nesting
1555: of the  \texttt{new} instantiation simplifies mode analysis. It also ensures that all subparts of 
1556: a data structure have a proper representation at run-time.}
1557: involving at most the parameters $w_1, \ldots, w_n$.
1558: Just as for type definitions, we demand that 
1559: instantiation definitions are \emph{regular}.\footnote{It is hard to 
1560: see how to lift this restriction.}
1561: \end{definition}
1562: 
1563: We can associate a set of production rules $rules(i)$ with an
1564: instantiation expression $i$ just as we do for type expressions.
1565: For the base instantiations we define $rules(\ttnew) = 
1566: rules(\ttold) = rules(\ttground) = \emptyset$.
1567: 
1568: A \emph{ground instantiation} is an element of $\tau(\Sigma_{inst})$. 
1569: The existence of instantiation parameters during mode analysis
1570: would significantly complicate the task of the analyzer. 
1571: This is mainly 
1572: because functions to compare type-instantiations or to
1573: compute their join and meet would need to
1574: return a set of constraints involving instantiation parameters. 
1575: Furthermore, predicate mode declarations containing
1576: instantiation parameters might need to express some constraints involving
1577: those instantiations. 
1578: Therefore, for simplicity, HAL (like Mercury\footnote{Recently Mercury has
1579: added a (as yet unreleased) 
1580: feature allowing limited non-ground instantiations in
1581: predicate modes.})  
1582: requires instantiations
1583: appearing in a predicate mode declaration to be ground. As a result,
1584: mode checking only deals with ground instantiations and, from now on,
1585: we will assume all instantiations are ground.
1586: 
1587: The reason this problem does not arise with type parameters is
1588: that, as mentioned before, type correctness guarantees that 
1589: whenever we compare type-instantiations, the two types being compared are syntactically 
1590: identical. Thus, if two type parameters are being compared, they are guaranteed to
1591: be the same type parameter.
1592: 
1593: \subsection{Type-Instantiation Grammars}\label{sec:type-inst}
1594: 
1595: In this section we define the 
1596: function $\func{rt}(t,i)$ which takes a type expression $t$ and a
1597: ground instantiation expression $i$ and returns a 
1598: \emph{type-instantiation tree grammar} (or \emph{ti-grammar}).
1599: Mode checking will manipulate \emph{ti-grammar}s, built
1600: from the types and instantiations occurring in the program.
1601: 
1602: The function \func{rt} 
1603: defines the meaning of combining a type with an instantiation
1604: by extending \func{base} to non-base instantiations.
1605: A non-base instantiation combines with a type in a manner
1606: analogous to the $\sqcap$ operation over the rules defining
1607: each other.  
1608: Intuitively the function \func{rt} intersects the grammars
1609: of $t$ and $i$.  This is not really the case because of special treatment
1610: of type parameters and base instantiations.
1611: 
1612: 
1613: Figure~\ref{fig:rt} gives the algorithm for computing $\func{rt}(t,i)$. 
1614: The 
1615: function $\func{rt}(t,i,P)$
1616:  does all of the work.
1617: It creates a unique grammar symbol
1618: $ti(t,i)$ for the type $t$ and instantiation $i$
1619: with which it is called and returns this with the
1620: type instantiation grammar for $t$ and $i$.
1621: Its last argument $P$ is the
1622: set of grammar symbols constructed in the parent calls:
1623: this is used to check that the symbol $ti(t,i)$
1624: has not already been encountered and so avoids infinite recursion.
1625: The root of the grammar $r$ returned is the symbol $ti(t,i)$.
1626: 
1627: Note that it is a mode error to associate a non-base instantiation
1628: with a parameter type $t \in  V_{type}$, since we cannot know
1629: what function symbols make up the type $t$. 
1630: In this case the algorithm returns the special $\ourtop$ 
1631: grammar indicating a mode error.
1632: 
1633: \begin{figure}
1634: 
1635: \begin{small}
1636: \begin{tabbing}
1637: xx \= xx \= xx \= xx \= xx \= xx \= xx \= xx \= \kill
1638: \func{rt}($t$,$i$) \\
1639: \> $(r,\_)$ := \func{rt}($t$,$i$,$\emptyset$) \\
1640: \> \textbf{return} $r$ \\
1641: \\
1642: \func{rt}($t$,$i$,$P$) \\
1643: \> \textbf{if} ($ti(t,i) \in P$) 
1644:                 \textbf{return} $(\emptyset,ti(t,i)))$\\
1645: \> \textbf{if} ($i$ is a base instantiation) \textbf{return}
1646: (\func{base}($t$,$i$), $ti(t,i)$) \\
1647: \> \textbf{if} ($t \in V_{type}$) \textbf{return} $(\ourtop, \_)$ \\
1648: \> $r$ := $\emptyset$ \\
1649: \> \textbf{foreach} rule $x_i \rightarrow f(x_{i1}, \ldots, 
1650:                 x_{in})$ in $rules(i)$ \\
1651: \> \> \textbf{if} exists rule $x_t \rightarrow f(x_{t1}, \ldots, 
1652:                 x_{tn})$ in $rules(t)$ \\
1653: \> \> \> \textbf{for} $j$ = $1 .. n$ \\
1654: \> \> \> \> $(r_j, x_j)$ := \func{rt}($x_{tj}$,$x_{ij}$, 
1655:                   $P \cup \{ti(t,i)\}$) \\
1656: \> \> \> \> \textbf{if} ($r_j = \ourtop$) \textbf{return} $(\ourtop, \_)$ \\
1657: \> \> \> \textbf{endfor} \\
1658: \> \> \> $r$ := $\{ti(t,i) \rightarrow f(x_1, \ldots, x_n)\} \cup r
1659:  \cup r_1 \cup \cdots \cup r_n$ \\
1660: \> \> \textbf{endif} \\
1661: \> \textbf{endfor} \\
1662: \> \textbf{return} $(r, ti(t,i))$ 
1663: \end{tabbing}
1664: \end{small}
1665: \caption{Algorithm for computing the type instantiation 
1666: grammar $\func{rt}(t,i)$\label{fig:rt}}
1667: 
1668: \end{figure}
1669: 
1670: %%\afterpage{\clearpage}
1671: 
1672: \begin{example}\label{ex:rts}
1673: Consider the types \texttt{list/1} and \texttt{habc} of
1674: Example~\ref{ex:labcs} and instantiation \texttt{nelist/1} from
1675: the program in Figure~\ref{fig:stack}.
1676: Then ti-grammar \linebreak[2] 
1677: $\func{rt}(\texttt{list(habc)},\texttt{nelist(old)})$ 
1678: is 
1679: \begin{grammar}
1680: ti(\nt{list(habc)},\nt{nelist(old)}) & \rightarrow & [ ti(\nt{habc},\ntold) ~|~
1681: ti(\nt{list(habc)},\nt{list(old)}) ] \\
1682: ti(\nt{list(habc)},\nt{list(old)}) & \rightarrow & [] ~;~ [ ti(\nt{habc},\ntold) ~|~
1683: ti(\nt{list(habc)},\nt{list(old)}) ] \\
1684: ti(\nt{habc},\ntold) & \rightarrow & a ~;~ b ~;~ c ~;~ \#\texttt{var}\# 
1685: \end{grammar}
1686: while $\func{rt}(\texttt{list(T)},\texttt{nelist(ground)})$ is
1687: \begin{grammar}
1688: ti(\nt{list($T$)},\nt{nelist(ground)}) & \rightarrow & [ ti(T,\ntground) |
1689: ti(\nt{list($T$)},\nt{list(ground)})] \\
1690: ti(\nt{list($T$)},\nt{list(ground)}) & \rightarrow & [] ~;~ [ ti(T,\ntground) ~|~
1691: ti(\nt{list($T$)},\nt{list(ground)}) ] \\
1692: ti(T,\ntground) & \rightarrow & \$\ttground(T)\$
1693: \end{grammar}
1694: \end{example}
1695: 
1696: 
1697: A ti-grammar is thus a regular tree grammar defined over the signature 
1698: $$
1699: \Sigma_{tree} \cup \{ \$\ttold(v)\$, \$\ttground(v)\$ 
1700: ~|~ v \in V_{type}\} \cup \{ \#\texttt{var}\#, \#\texttt{fresh}\# \}
1701: $$
1702: and non-terminal set
1703: $$
1704: \tau(\Sigma_{type} \cup \Sigma_{inst} \cup \{ti/2\}, V_{type})  
1705: \cup \{ \nt{new} \}
1706: $$
1707: 
1708: Note that by construction the partial ordering and meet and join
1709: on tree grammars extend to ti-grammars including type parameters.
1710: As mentioned before, type correctness guarantees that 
1711: during mode checking we will only compare 
1712: ti-grammars for the same type parameter $v \in V_{type}$. 
1713: For this reason, we only need note
1714: that $\func{rt}(v,\ttground) \preceq \func{rt}(v,\ttold)$
1715: for a parameter $v \in V_{type}$, 
1716: which follows from the construction since
1717: $\oo \func{rt}(v,\ttground) \cc = \{ \$\texttt{ground}(v)\$ \}$
1718: and 
1719: $\oo \func{rt}(v,\ttold) \cc = \{ \$\texttt{ground}(v)\$, 
1720: \$\texttt{old}(v)\$ \}$
1721: and the meet and join operations follow in the natural way.
1722: 
1723: 
1724: The operations that we perform on ti-grammars during mode checking
1725: will be $\preceq$, abstract conjunction and abstract disjunction.
1726: Abstract conjunction differs slightly from $\sqcap$ since we will
1727: be changing variables with a \nt{new} ti-grammar to 
1728: ti-grammars for bound values (whenever the variable becomes instantiated).
1729: The abstract conjunction operation $\wedge$ is defined as:
1730: $$
1731: r_1 \wedge r_2 ~~ = ~~ \left\{ \begin{array}{l} 
1732:                                  r_1, \mbox{~where~} r_2 = \nt{new} \\
1733:                                  r_2, \mbox{~where~} r_1 = \nt{new} \\
1734:                                  r_1 \sqcap r_2, \mbox{~otherwise}
1735:                                  \end{array} \right.
1736: $$
1737: 
1738: Abstract disjunction is again slightly different
1739: from the $\sqcup$ operation. 
1740: Since the compiler needs to know whether the value of
1741: a variable 
1742: is \texttt{new} or not, we need to ensure the abstract
1743: disjunction operation does not create ti-grammars (other than $\ourtop$) 
1744: in which this information is lost, i.e., grammars that include \#\texttt{fresh}\# as well as other terms.
1745: The abstract disjunction operation $\vee$ is defined as:
1746: $$
1747: r_1 \vee r_2 ~~ = ~~ \left\{ \begin{array}{l} 
1748:                                  r_1 \sqcup r_2, \mbox{~where~} r_1 \neq
1749:                          \nt{new} \mbox{~and~}  r_2 \neq
1750:                          \nt{new} \\
1751:                                  \nt{new}, \mbox{~where~} r_1 = \nt{new} \mbox{~and~}   r_2 =
1752:                          \nt{new}\\
1753:                                  \ourtop, \mbox{~otherwise}
1754:                                  \end{array} \right.
1755: $$
1756: 
1757: Finally, we introduce the concept of
1758: a \emph{type-instantiation state} (or \emph{ti-state})
1759: $\{x_1 \mapsto r_1, \ldots, x_n \mapsto r_n\}$, which 
1760: maps program variables to ti-grammars. Ti-grammars 
1761: are used during mode checking
1762: to store the possible
1763: values of the program variables at each program point.
1764: We can extend operations on ti-grammars to ti-states
1765: over the same set of variables in the obvious pointwise manner.  
1766: Given
1767: ti-states $TI = \{x_1 \mapsto r_1, \ldots, x_n \mapsto
1768: r_n\}$ and $TI' = \{x_1 \mapsto r'_1, \ldots, x_n \mapsto r'_n\}$ then:
1769: \begin{itemize}
1770: \item 
1771: $TI
1772: \preceq TI'$ iff $r_l \preceq r'_l$ for all $1 \leq l \leq n$, 
1773: \item
1774: $TI \wedge TI' =
1775: \{ x_l \mapsto r_l \wedge r'_l ~|~ 1 \leq l \leq n \}$ and 
1776: \item
1777: $TI \vee TI' =
1778: \{ x_l \mapsto r_l \vee r'_l ~|~ 1 \leq l \leq n \}$.
1779: \end{itemize}
1780: 
1781: 
1782: \section{Basic Mode Checking} \label{sec:basic}
1783: 
1784: Mode checking is a complex process
1785: which aims to reorder body literals 
1786: to satisfy the mode constraints provided by each mode
1787: declaration. The aim of this is to be able to generate
1788: specialized code for each mode declaration.
1789: The code
1790: corresponding to each mode declaration 
1791: is referred to as a \emph{procedure}, and
1792: calls to the original predicate are replaced by calls to the appropriate
1793: procedure. 
1794: Recall that before mode checking is applied the 
1795: HAL compiler performs type checking (and inference) 
1796: so that each program variable has a type, and the
1797: program is guaranteed to be type correct.
1798: 
1799: \subsection{Well-moded programs}
1800: 
1801: We now define what it means for a HAL program to be well-moded.
1802: 
1803: The execution of a HAL program is performed on \emph{procedures}
1804: which are predicates re-ordered for a particular mode.  
1805: At run-time each type parameter has an associated 
1806: ground type. 
1807: For our purposes we assume a given \emph{type environment} $\theta$
1808: (a ground type substitution) 
1809: describes the run-time types associated with each type parameter.
1810: 
1811: A \emph{call} to a procedure $p/n$ in mode $p(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$ is a type environment $\theta$ and a \emph{value}
1812: $d_i$ for each argument $1 \leq i \leq n$. It follows from type correctness 
1813: of the program that $d_i \in \oo \func{rt}(\theta(t_i),\ttold) \cc \cup 
1814: \{\#\texttt{fresh}\#\}$ for each argument $1 \leq i \leq n$.
1815: 
1816: A program is \emph{input-output mode-correct} if any call 
1817: to a predicate which is correct with respect to  the 
1818: input instantiation for some mode 
1819: declared for that predicate will only have answers that
1820: are correct with respect to the output instantiation of that mode.
1821: More formally, a program is 
1822: \emph{input-output mode-correct} 
1823: if for
1824: each 
1825: procedure $p/n$ with declared type $p(t_1, \ldots, t_n)$ in 
1826: mode $p(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$,  and
1827: for any call of the form
1828: $p(d_1, \ldots, d_n)$ with type environment $\theta$ 
1829: such that $d_i \in \oo \func{rt}(\theta(t_i),c_i) \cc, 1 \leq i \leq n$,
1830: it is the case that the resulting values $d'_1, \ldots, d'_n$ on 
1831: success of the procedure are such that 
1832: $d'_i \in \oo \func{rt}(\theta(t_i),s_i) \cc, 1 \leq i \leq n$.
1833: In other words, the declared mode is satisfied by the code generated for the
1834: procedure.
1835: 
1836: \begin{example}
1837: For example the first
1838: mode for predicate \texttt{pop/3}, defined in Example~\ref{fig:stack},
1839: \begin{ttline}
1840: :- mode pop(in,out,out) is semidet.
1841: \end{ttline}
1842: will be shown to be input-output mode-correct by showing that 
1843: if the first argument to \texttt{pop/3} is ground at call time,
1844: and the last two arguments \texttt{new},
1845: then all three arguments will be ground on success of the
1846: predicate.
1847: \end{example}
1848: 
1849: A program is \emph{call mode-correct} if any call 
1850: to a predicate which is correct with respect to the 
1851: input instantiation for some mode 
1852: declared for that predicate will only lead to calls to literals
1853: within the definition of the predicate which are mode-correct.
1854: More formally, a program is \emph{call mode-correct} if for
1855: each procedure $p/n$ with declared type $p(t_1, \ldots, t_n)$ 
1856: in mode  $p(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$,   and
1857: for any call of the form
1858: $p(d_1, \ldots, d_n)$ with type environment $\theta$ 
1859: such that $d_i \in \oo \func{rt}(\theta(t_i),c_i) \cc, 1 \leq i \leq n$,
1860: it is the case that each call to a procedure 
1861: $p'/n'$ with type (given by the occurrence in the definition of $p/n$) 
1862: $p'(t'_1, \ldots, t'_{n'})$  in mode 
1863: $p(c'_1 \rightarrow s'_1, \ldots, c'_{n'} \rightarrow s'_{n'})$ 
1864: of the form
1865: $p'(d'_1, \ldots, d'_n)$ is such that 
1866: $d'_i \in \oo \func{rt}(\theta(t'_i),c'_i) \cc, 1 \leq i \leq n'$,
1867: and any call to an equality of the form $x_1 = x_2$ is either a copy 
1868: or unify, and any call to an equality of the form
1869: $x = f(x_1, \ldots, x_n)$ is either a construct or deconstruct.
1870: In other words each mode-correct call leads to only mode-correct calls.
1871: %\pjs{Added new stuff here}
1872: 
1873: \begin{example}\label{ex:dup}
1874: Consider the following code which duplicates the top element of
1875: the stack:
1876: \begin{ttprog}
1877: :- pred dupl(list(T), list(T)). \% duplicate top of stack \\
1878: :- mode dupl(in(nelist(ground)), out(nelist(ground))) is det. \\
1879: dupl(S0, S) :- S0 = [], S = []. \\
1880: dupl(S0, S) :- push(S0, A, S), pop(S0, A, S1).
1881: \end{ttprog}
1882: Showing call mode-correctness for the procedure for 
1883: \texttt{dupl/2} involves showing that any correct call to
1884: \texttt{dupl/2} (that is with the first argument a non-empty ground list,
1885: and its second argument \texttt{new})
1886: will call \texttt{push/3} and \texttt{pop/3} with correct
1887: input instantiations for one of their given modes,
1888: and each equation must be either a construct or deconstruct.
1889: \end{example}
1890: 
1891: A program is \emph{well-moded} if it is input-output mode-correct
1892: and call mode-correct.
1893: 
1894: %%\pjs{Probably need a link sentence}
1895: We shall now explain mode checking by showing how to 
1896: check whether each program
1897: construct is schedulable for a given ti-state $TI$ and, if so,
1898: what the resulting ti-state $TI'$ is.
1899: The scheduling also returns a goal 
1900: illustrating the order of execution of conjunctions, and the mode
1901: for each equation or predicate call. 
1902: If the program construct is not schedulable for the 
1903: given ti-state it may be
1904: reconsidered after other constructs have been scheduled.
1905: We assume that before
1906: checking each construct for an initial ti-state $TI$, we
1907: extend $TI$ so that any variable of type $t$ local to the construct is
1908: assigned the ti-grammar \nt{new}.
1909: 
1910: \subsection{Equality}\label{sec:eq}
1911: 
1912: Consider the equality $x_1 = x_2$ where $x_1$ and $x_2$ are variables of
1913: type $t$ and the current ti-state is $TI = \{x_1 \mapsto
1914: r_1, x_2 \mapsto r_2\} \cup \emph{RTI}$ (where $\emph{RTI}$ is the ti-state for
1915: the remaining variables).  The two standard modes of usage for such an
1916: equality are \textbf{copy} (\texttt{:=}) and \textbf{unify} (\texttt{==}).
1917: If exactly one of $r_1$ and $r_2$ is \nt{new} (say $r_1$), the 
1918: \textbf{copy}
1919: $x_1 := x_2$ can be performed and the resulting ti-state is $TI'
1920: = \{x_1 \mapsto r_2, x_2 \mapsto r_2\} \cup \emph{RTI}$.  
1921: If both are not \nt{new}
1922: then \textbf{unify} $x_1 == x_2$ is performed and the resulting
1923: instantiation is $TI' = \{x_1 \mapsto r_1 \wedge r_2, x_2 \mapsto r_1
1924: \wedge r_2\} \cup \emph{RTI}$. 
1925: If neither of the two modes of usage apply (i.e.\ 
1926: both variables are new), the literal is not schedulable (although it might
1927: become schedulable after automatic initialization, see Section \ref{sec:init}).
1928: 
1929: 
1930: Consider the equality $x = f(x_1, \ldots, x_n)$ where $x, x_1,\ldots, x_n$
1931: are variables with types $\{x \mapsto t, x_1 \mapsto t_1, \ldots, x_n
1932: \mapsto t_n\}$ and current ti-state $TI = \{x \mapsto r, x_1
1933: \mapsto r_1, \ldots, x_n \mapsto r_n\} \cup \emph{RTI}$. 
1934: The two standard modes
1935: of usage of such an equality are
1936: \textbf{construct} (\texttt{:=}) and \textbf{deconstruct} (\texttt{=:}).
1937: The \textbf{construct} mode applies if $r$ is \nt{new} and none of the
1938: $r_j$ are \nt{new}. 
1939: The resulting ti-state is $TI' = \{x
1940: \mapsto r', x_1 \mapsto r_1, \ldots, x_n \mapsto r_n\} \cup \emph{RTI}$ 
1941: where $r'$ is the ti-grammar 
1942: defined by 
1943: $\{a \rightarrow f(root(r_1), \ldots, root(r_n))\} \cup r_1 \cup 
1944: \cdots  \cup r_n$, where $a$ is a new non-terminal,
1945: (i.e.\ the grammar defining the terms constructible from 
1946: an $f$ with arguments from $r_1,...,r_n$ respectively).
1947: The \textbf{deconstruct} mode applies if each $r_j$ is \nt{new}
1948: and $r$ is not \nt{new} and has no production rule
1949: $root(r) \rightarrow \#\texttt{var}\# $
1950: (which means it is definitely bound to some functor).
1951: The resulting ti-state is 
1952: $TI' = \{x \mapsto r, x_1 \mapsto r_1', \ldots ,
1953: x_n \mapsto r_n'\} \cup \emph{RTI}$ where $r_1', \ldots, r_n'$
1954: are defined below.
1955: If $r$ has a production rule of the form 
1956: $root(r) \rightarrow f(y_1, \ldots, y_n)$, then
1957: the $r'_j = subg(y_j,r), 1 \leq i \leq n$.
1958: If $r$ has no rule of this form, then 
1959: the resulting ti-state is the same but with 
1960: $r'_j = \bot, 1 \leq j \leq n$, 
1961: indicating that the \textbf{deconstruct} must fail.
1962: If some of the variables $x_j$ are
1963: \nt{new} and some are not (say $x_{k_1},
1964: \ldots, x_{k_m}$) the mode checking process decomposes the equality
1965: constraint into a \textbf{deconstruct} followed by new equalities by
1966: introducing fresh variables, e.g.\ $x = f(x_1, \ldots, \fresh_{k_j}, \ldots),
1967: \ldots, x_{k_j} = \fresh_{k_j}, \ldots$.  These new equalities are
1968: handled as above.  
1969: 
1970: Note that if $r = \nt{new}$ and some $r_i = \nt{new}$ 
1971: then the literal is not schedulable
1972: (although it might
1973: become schedulable after automatic initialization, again 
1974: see Section \ref{sec:init}). 
1975: 
1976: 
1977: \begin{example} 
1978: Assume \texttt{X} and \texttt{Y} are ground lists, 
1979: while \texttt{A} is \nt{new}.
1980: Scheduling the goal \texttt{Y = [A|X]} 
1981: results in the code \texttt{Y =:{~}[A|F], X == F}.
1982: \end{example}
1983: 
1984: The above uses  of deconstruct are guaranteed to be safe at run-time 
1985: and correspond to the modes of usage allowed by Mercury.
1986: HAL, in addition to the above,
1987: allows
1988: the use of the \textbf{deconstruct} mode
1989: when $x$ is \texttt{old} (i.e.\ $r$ contains a production rule
1990: $root(r) \rightarrow \#\texttt{var}\#$).  
1991: In this case we check whether 
1992: $r$ has a production rule of the form 
1993: $root(r) \rightarrow f(y_1, \ldots, y_n)$
1994: and we proceed as in
1995: the previous paragraph. Note that this is (the only place)
1996: where the HAL mode system is not completely strong
1997: (i.e.\ run-time mode errors can occur).
1998: % bmd
1999: The following example illustrates the need for this behavior.
2000: 
2001: \begin{example}\label{ex:runtimeerror} 
2002: Consider the types \texttt{abc/0} and \texttt{hlist/1} 
2003: from Example~\ref{ex:labcs}, 
2004: the following use of  \texttt{append/3} 
2005: may not detect a mode error until run-time:
2006: \begin{ttprog}
2007: :- pred append(hlist(abc), hlist(abc), hlist(abc)). \\
2008: :- mode append(oo, oo, no) is nondet. \\
2009: append(X, Y, Z) :- X = [], Y = Z. \\
2010: append(X, Y, Z) :- X = [A|X1], append(X1, Y, Z1), Z = [A|Z1].
2011: \end{ttprog}
2012: The equation \texttt{X = [A|X1]} 
2013: is schedulable as a deconstruct since \texttt{X} is
2014: \texttt{old}. However, if at run-time \texttt{X} is not bound 
2015: when \texttt{append/3} is called, the
2016: deconstruct will generate a run-time error since \texttt{A} is not a 
2017: solver variable and, 
2018: thus, it cannot be initialized. 
2019: Note that if we did not allow
2020: deconstruction on \texttt{old} variables then the above predicate would not 
2021: pass mode checking thus preventing mode-correct goals like
2022: \begin{ttline}
2023: ?- X = [a,b,c], init(Y), append(X,Y,Z).
2024: \end{ttline}
2025: from being compiled.
2026: \end{example}
2027: 
2028: If we never allow Herbrand 
2029: solver types to contain non-solver types (as in the example above), 
2030: the problem cannot occur. 
2031: This gap in mode checking seems unavoidable 
2032: if we are to allow Herbrand solver types to contain non-solver types.
2033: However, it seems that in practice this gap is not problematic: in most
2034: programs, the possibility of a run-time mode error does not exist.
2035: Whenever it does, the compiler emits a warning message. In fact, we
2036: have never detected a run-time mode error.
2037: 
2038: 
2039: 
2040: \subsection{Predicates}
2041: 
2042: In this subsection 
2043: we describe the scheduling of predicate calls so
2044: that the resulting program after scheduling is call mode-correct.
2045: 
2046: 
2047: Consider the predicate call $p(x_1, \ldots, x_n)$ where each
2048: $x_i$ is a variable with type $t_i$.
2049: Assume $p$ has the mode
2050: declaration $p(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$
2051: where $c_j, s_j$
2052: are the call and success instantiations, respectively, for argument $j$,
2053: and the current ti-state is
2054: $TI = \{x_1 \mapsto r_1, \ldots, x_n \mapsto r_n\} \cup \emph{RTI}$.
2055: 
2056: %%PJS
2057: Note that the handling of polymorphic application is hidden here, since the
2058: type $t_i$ of the variable $x_i$ is  type in the calling literal 
2059: $p(x_1, \ldots, x_n)$, which may be more specific than the declared/inferred
2060: type of argument $i$ of $p$. Because instantiations are separate from
2061: types this is straightforwardly expressed by constructing the
2062: ti-grammar
2063: for the mode specific calling type $t_i$ and the appropriate instantiations.
2064: 
2065: 
2066: The predicate call can be scheduled if for each $1 \leq j \leq n$ the
2067: current ti-state restricts the $j$-th argument more than (defines a subset of) the calling
2068: ti-state required for $p$, i.e.\ $r_j \preceq \func{rt}(t_j,c_j)$.  If the
2069: predicate call 
2070: is schedulable for this mode the new ti-state is $TI' = \{x_1
2071: \mapsto r_1 \wedge \func{rt}(t_1,s_1), \ldots, x_n \mapsto r_n \wedge
2072: \func{rt}(t_n,s_n)\} \cup \emph{RTI}$. 
2073: The predicate call can also be
2074: scheduled if for each $j$ such that $r_j \not\preceq \func{rt}(t_j,c_j)$ then 
2075: $\func{rt}(t_j,c_j) = \nt{new}$. 
2076: For each such $j$, the argument $x_j$ in
2077: predicate call $p(x_1, \ldots,x_{j-1},x_j,x_{j+1},\ldots,x_n)$ is replaced
2078: by $\fresh_j$, where $\fresh_j$ is a fresh new program variable, and the
2079: equation $\fresh_j = x_j$ is added after the predicate call. 
2080: Such ``extra'' modes are usually referred to as \emph{implied modes}.
2081: 
2082: \begin{example}
2083: Consider the goal {\tt empty(S0)} for the program 
2084: of Figure~\ref{fig:stack} where the type of \texttt{S0} is given by 
2085: $\{ \texttt{S0} \mapsto \texttt{list(abc)}\}$ (which is more specific than
2086: the declared type \texttt{list(T)})
2087: and the current ti-state is $TI = \{
2088: \texttt{S0} \mapsto \nt{new}\}$.
2089: The two modes for \texttt{empty} (in expanded form) are 
2090: \begin{ttline}
2091: :- mode empty(ground -> ground) is semidet. \\
2092: :- mode empty(new    -> ground) is det.
2093: \end{ttline}
2094: The first mode of \texttt{empty} cannot be scheduled since
2095: $\nt{new} \not\preceq \func{rt}(\texttt{list(abc)},\texttt{ground})$,
2096: but the second mode can be scheduled, since 
2097: $\nt{new} \preceq \func{rt}(\texttt{list(abc)},\texttt{new}) = \nt{new}$.
2098: \end{example}
2099: 
2100: 
2101: 
2102: If more than one mode of the same predicate is schedulable, in theory the compiler should
2103: try each possibility. Unfortunately, this search may be too expensive. For
2104: this reason, HAL (like Mercury) chooses one schedulable mode and commits to
2105: it. This behavior might lead to the compiler failing to check a
2106: mode-correct procedure (see Example~\ref{non-check}). In order to minimize
2107: this risk, 
2108: we choose a schedulable mode whose 
2109: success ti-state $TI$ defined
2110: as $\{x_1 \mapsto \func{rt}(t_1,s_1), \ldots, x_n \mapsto \func{rt}(t_n,s_n)\}$ is
2111: minimal; that is, for each other schedulable mode with success 
2112: ti-state $TI'$ it is the case that $TI' \not\preceq TI$. Note that 
2113: there may be more than one mode with a minimal success ti-state. In 
2114: the case that we have more than one mode with the same minimal success 
2115: state then we use a mode with a
2116: minimal call ti-state.
2117: 
2118: 
2119: \begin{example}
2120: Consider the scheduling of the goal
2121: \texttt{pop(A,B,C)} where current ti-state is
2122: $TI = \{ A \mapsto r_1, B \mapsto \nt{new}, C \mapsto r_3 \}$
2123: where $r_1$ and $r_3$ are defined by the grammars
2124: \begin{grammar}
2125: r_1 & \rightarrow & [ r_2 ~|~ r_3 ] \\
2126: r_2 & \rightarrow & b \\
2127: r_3 & \rightarrow & []
2128: \end{grammar}
2129: That is $A = [b]$ and $C = []$.  Neither of the declared modes
2130: for \texttt{pop}, shown below, are immediately applicable.
2131: \begin{ttline}
2132: :- mode pop(in,out,out) is semidet. \\
2133: :- mode pop(in(nelist(ground)),out,out) is det. 
2134: \end{ttline}
2135: But both modes fit the conditions for an implied mode. 
2136: Since the second mode has a more specific success ti-state
2137: (the first argument is known to be non-empty)
2138: it is chosen. 
2139: The resulting code is \texttt{pop\_mode2(A,B,Fresh), Fresh = C},
2140: where mode checking will then schedule the new equation appropriately.
2141: \end{example}
2142: 
2143: The idea is to maintain as much instantiation information
2144: as possible, thus restricting as little as possible the number of
2145: schedulable modes for the remaining literals.
2146: In our experience with compiling real programs 
2147: this policy seems adequate to avoid any problems.
2148: It is straightforward, but in practice too expensive,
2149: to implement a complete search for all possible schedules.
2150:  
2151: 
2152: 
2153: \subsection{Conjunctions, Disjunctions and If-Then-Elses}
2154: 
2155: To determine if a conjunction $G_1, \ldots, G_n$ is schedulable for initial
2156: ti-state $TI$ we choose the left-most goal $G_j$ which is schedulable for
2157: $TI$ and compute the new ti-state $TI_j$. This default behavior schedules
2158: goals as close to the programmer given left-to-right order as possible.  If the
2159: state $TI_j$ assigns $\bot$ to any variable, then the subgoal $G_j$ must
2160: fail and hence the whole conjunction is schedulable.  The resulting
2161: ti-state $TI'$ maps all variables to $\bot$, and the final conjunction
2162: contains all previously scheduled goals followed by \texttt{fail}.  
2163: If $TI_j$ does
2164: % not assign any variable to $\bot$ we continue by scheduling the remaining
2165: % bmd
2166: not assign $\bot$ to any variable we continue by scheduling the remaining
2167: conjunction $G_1, \ldots, G_{j-1}, G_{j+1}, \ldots, G_n$ with initial
2168: ti-state $TI_j$.  If all subgoals are eventually schedulable we have
2169: determined both an order of evaluation for the conjunction and a final
2170: ti-state. 
2171: 
2172: \begin{example}
2173: Consider scheduling the goal
2174: \begin{ttline}
2175: Y = [U1|U2], U2 = [], X = [U1|U3].
2176: \end{ttline}
2177: where \texttt{X} is 
2178: initially $\func{rt}(\texttt{list(T)},\texttt{ground})$, and the remaining
2179: variables are \nt{new}.  The first literal is not schedulable and will
2180: remain so until both \texttt{U1} and \texttt{U2} are no longer new. 
2181: We consider then the
2182: second literal, which is schedulable as a construct, thus changing the
2183: type-instantiation of \texttt{U2} to
2184: $\func{rt}(\texttt{list(T)},\texttt{elist})$.  Since the first
2185: literal remains unschedulable, we consider the third literal which is
2186: schedulable as a deconstruct, thus changing the type-instantiation of
2187: \texttt{X},
2188: \texttt{U1} and \texttt{U3} to
2189: $\func{rt}(\texttt{list(T)},\texttt{nelist(ground)})$,
2190: $\func{rt}(\texttt{T},\texttt{ground})$
2191: and $\func{rt}(\texttt{list(T)},\texttt{ground})$, respectively. Since both
2192: \texttt{U1} and \texttt{U2} 
2193: are no longer new, the first literal is now schedulable as a
2194: construct.  The resulting code is
2195: \begin{ttline}
2196: U2 := [], X =:{~}[U1|U3], Y := [U1|U2].
2197: \end{ttline}
2198: In the final ti-state the instantiation of $Y$ is given by the
2199: tree-grammar
2200: \begin{grammar}
2201: \texttt{Y} & \rightarrow & [ ti(T,\nt{ground}) |
2202: ti(\nt{list}(T),\nt{elist}) ] \\
2203: ti(\nt{list}(T),\nt{elist}) & \rightarrow & [] \\ 
2204: ti(T,\ntground) & \rightarrow & \$\ttground(T)\$
2205: \end{grammar}
2206: in other words it is a list of length exactly one.
2207: \end{example}
2208: 
2209: 
2210: To determine if a disjunction $G_1 ; \cdots ; G_n$ is schedulable for
2211: initial ti-state $TI$ we check whether each subgoal $G_j$ is schedulable
2212: for $TI$ and, if so, compute each resulting ti-state $TI_j$, obtaining the
2213: final ti-state $TI' = \bigvee_{j \in \{1 .. n\}} TI_j$. 
2214: If this
2215: ti-state assigns $\ourtop$ to any variable or one of the disjuncts $G_j$
2216: is not schedulable then the whole disjunction
2217: is not schedulable.
2218: 
2219: To determine whether an if-then-else $G_i \rightarrow G_t ; G_e$ is schedulable
2220: for initial ti-state $TI$, we determine first whether $G_i$
2221: is schedulable for $TI$ with resulting ti-state $TI_i$.  If
2222: not, the whole if-then-else is not schedulable. Otherwise, we try to
2223: schedule $G_t$ in state $TI_i$ (resulting in state $TI_t$ say) and $G_e$ in
2224: state $TI$ (resulting in state $TI_e$ say). 
2225: The resulting ti-state is $TI' = TI_t \vee TI_e$. 
2226: If one of $G_t$ or $G_e$ is not schedulable or $TI'$ includes $\ourtop$ the
2227: whole if-then-else is not schedulable.
2228: Note that the analysis of $G_i \rightarrow G_t ; G_e$ is identical to that of
2229: $(G_i,G_t);G_e$ except that all goals of $G_i$ must be scheduled before those
2230: of $G_t$.
2231: 
2232: \subsection{Mode Declarations}
2233: 
2234: In this subsection we discuss how mode-correctness is checked for
2235: each mode declaration.
2236: 
2237: To check that a predicate with head $p(x_1, \ldots, x_n)$ and
2238:  declared (or inferred) type
2239: $\{x_1 \mapsto t_1, \ldots, x_n \mapsto t_n\}$
2240: satisfies the mode declaration $p(c_1 \rightarrow s_1, \ldots, c_n
2241: \rightarrow s_n)$, we build the initial ti-state
2242: $TI = \{x_1 \mapsto \func{rt}(t_1,c_1), \ldots, x_n \mapsto \func{rt}(t_n,c_n)\}$. 
2243: %for all
2244: %argument variables using the declared (or inferred) types and the
2245: %declared calling instantiations.  
2246: The body of the predicate 
2247: is then analyzed starting from
2248: the state $TI$.
2249: The mode declaration is
2250: correct if 
2251: (a) everything is schedulable and
2252: (b) if the final
2253: ti-state is $TI' = \{x_1 \mapsto r_1', \ldots , x_n \mapsto r_n'\}$,
2254: then  for each
2255: argument variable $1 \leq i \leq n$, $r_i' \preceq \func{rt}(t_i,s_i)$.
2256: If the body is not schedulable
2257: or the resulting instantiations are not
2258: strong enough, a mode error results. 
2259: Note that (a) ensures that the predicate is call mode-correct
2260: for that mode
2261: while (b) ensures that it is input-output mode-correct.
2262: 
2263: 
2264: 
2265: \begin{example}
2266: Consider mode checking of the following code from Example~\ref{ex:dup}
2267: which makes use of the code in Figure~\ref{fig:stack}:
2268: \begin{ttprog}
2269: :- pred dupl(list(T), list(T)). \% duplicate top of stack \\
2270: :- mode dupl(in(nelist(ground)), out(nelist(ground))) is det. \\
2271: dupl(S0, S) :- S0 = [], S = []. \\
2272: dupl(S0, S) :- push(S0, A, S), pop(S0, A, S1).
2273: \end{ttprog}
2274: We start by constructing 
2275: the initial ti-state $TI = \{\texttt{S0} \mapsto gnelT,
2276: \texttt{S} \mapsto \nt{new} \}$ where $gnelT = 
2277: \func{rt}(\texttt{list(T)},\texttt{nelist(ground)})$ 
2278: is the ti-grammar shown in Example~\ref{ex:rts}.  
2279: Checking the first disjunct (rule) we have
2280: \texttt{S0 = []} schedulable as a deconstruct.  
2281: The resulting
2282: ti-state assigns $\bot$ to \texttt{S0}, and thus 
2283: the whole conjunction is
2284: schedulable with  $TI_1 = \{\texttt{S0} \mapsto \bot, \texttt{S} \mapsto \bot \}$.
2285: Checking the second disjunct, we first extend $TI$ to map 
2286: \texttt{A} and \texttt{S1} to \nt{new}.
2287: Examining the first literal \texttt{push(S0, A, S)}
2288: we find that it is not schedulable since \texttt{A} has instantiation
2289: \nt{new} and is required to be \ntground.
2290: Examining the second literal \texttt{pop(S0, A,
2291: S1)} we find that both 
2292: modes declared for \texttt{pop/3} are
2293: schedulable. Since the second mode has more specific success
2294: instantiations, it is chosen and the ti-grammars for \texttt{A} and
2295: \texttt{S1} become $\func{rt}(\texttt{T},\texttt{ground})$ and 
2296: $\func{rt}(\texttt{list(T)}, \texttt{ground})$,
2297: respectively.  
2298: Now the first literal is schedulable obtaining for
2299: \texttt{S} the
2300: ti-grammar $gnelT$. 
2301: Restricting to the original variables the
2302: final ti-state is $TI_2 = \{\texttt{S0} \mapsto gnelT,
2303: \texttt{S} \mapsto gnelT \}$.  
2304: Taking the join $TI' = TI_1 \vee TI_2 = TI_2$.  
2305: Checking this against the declared success
2306: instantiations we find the declared mode is correct. The code generated for
2307: the procedure is:
2308: \begin{ttprog}
2309: dupl\_mode1(S0, S) :- fail. \\
2310: dupl\_mode1(S0, S) :- pop\_mode2(S0, A, S1), push\_mode1(S0, A, S).
2311: \end{ttprog}
2312: where \texttt{pop\_mode2/3} and \texttt{push\_mode1/3} are the procedures
2313: associated with the second and first modes of the predicates, respectively.
2314: \end{example}
2315: 
2316: 
2317: 
2318: 
2319: Note that the HAL compiler's current mode analysis 
2320: does not track variable dependencies
2321: and thus it may obtain a final type-instantiation state weaker than
2322: expected.  
2323: \begin{example}\label{ex:lcint}
2324: Consider the solver type \texttt{habc/0} of
2325: Example~\ref{ex:labcs}.
2326: The following program does not pass mode checking:
2327: \begin{ttprog}
2328: :- pred p(list(habc), habc). \\
2329: :- mode p(list(old) -> ground, in) is semidet. \\
2330: p(L, E) :- L = []. \\
2331: p(L, E) :- L = [E1|L1], E = E1, p(L1, E).
2332: \end{ttprog}
2333: The first literal of the second rule 
2334: is a deconstruct. After that deconstruct
2335: variable \texttt{L} is never touched and hence its instantiation is
2336: never updated; in particular it is not updated 
2337: when the instantiation of 
2338: \texttt{E1} and \texttt{L1} change. 
2339: The inferred type-instantiation for \texttt{L} 
2340: at the end of the second rule is thus
2341: $\func{rt}(\texttt{list(habc)},\texttt{nelist(old)})$ rather than 
2342: $\func{rt}(\texttt{list(habc)},\texttt{nelist(ground)})$. 
2343: Hence, mode checking fails.
2344: \end{example}
2345: 
2346: This could be overcome by adding a definite sharing
2347: analysis and/or a dependency based groundness analysis to the 
2348: mode checking phase. 
2349: Whenever a variable which definitely
2350: shares with another (through an equation $e$) is touched, we modify the
2351: resulting ti-state as if the equation $e$ has been
2352: rescheduled to update sharing variables.
2353: This is (partially) 
2354: implemented, for example, in the alias branch of the Mercury
2355: compiler. 
2356: 
2357: % An impediment to adding this analysis is that
2358: % modes that pass the richer analysis may be difficult to compile
2359: % to correct runtime code as shown in the following example.
2360: 
2361: % \kim{I am dubious about the following example: basically all it says is that the 
2362: % analysis of ground dependencies should reflect what the solver means 
2363: % by ground, i.e. the analysis is wrong not the implementation}
2364: 
2365: % \begin{example}
2366: %   Suppose the program in Example~\ref{ex:lcint} used a non-Herbrand
2367: %   solver type instead of \texttt{habc/0}. Consider for example a
2368: %   hidden constrained float implemented by some solver and defined as
2369: %   \begin{ttprog}
2370: %   :- typedef cfloat -> var(cplex) ; value(float). 
2371: %   \end{ttprog}
2372: %   where \texttt{float} is the base type for floats, and \texttt{cplex}
2373: %   is another solver type belonging to an already defined, efficient
2374: %   external solver. 
2375: % %  Now, it is not uncommon for solvers to be good
2376: % %  (i.e., fast) at determining whether a solver variable is ground or
2377: % %  not. Let us assume that is the case of our external solver. 
2378: %   The \texttt{cfloat} solver will find it useful to assume that every variable
2379: %   which is known to be ground is represented as \texttt{value(float)},
2380: %   the rest being non-ground.  In other words, \ttold{}
2381: %   \texttt{cfloat}s have a different internal representation to
2382: %   \ttground{} \texttt{cfloat}s.  This assumption can (and will) affect
2383: %   the definition of many constraints defined by the solver,
2384: %   for example, modes of usage in which variables are known to be
2385: %   ground in the call might only deal with the \texttt{value/1}
2386: %   functor. Unfortunately, this means that 
2387: %   the compiled code of the \texttt{p/2} in Example~\ref{ex:lcint} above 
2388: %   would need to 
2389: %   construct a new list containing the \ttground{} \texttt{cfloat}s to
2390: %   use as the value of \texttt{L} after the call to \texttt{p}.  This
2391: %   would add significant complexity to the compiler.
2392: % \end{example}
2393: 
2394: 
2395: \section{Automatic Initialization}\label{sec:init}
2396: 
2397: As mentioned before, constraint solvers must provide an initialization
2398: procedure (\texttt{init/1}) for their solver type. This procedure takes a
2399: solver variable with instantiation \texttt{new} and returns it with
2400: instantiation \texttt{old}, after initializing whatever data-structures (if
2401: any) the solver needs. 
2402: 
2403: Many of the predicates exported by constraint solvers
2404: (including most constraints) require the solver variables appearing as
2405: arguments to be already initialized.
2406: Thus, explicit initializations for local variables may
2407: need to be introduced. Not only is this a tedious exercise for the programmer,
2408: it may even be impossible for multi-moded predicate definitions
2409: since each mode may require different initialization instructions.  
2410: Therefore, the HAL mode checker
2411: automatically inserts variable initializations. In particular,
2412: whenever a literal cannot be scheduled because there is a
2413: requirement for an argument of type $t$ 
2414: to be $\func{rt}(t,\texttt{old})$ when it is \nt{new} 
2415: and $t$ is a solver type, then the \texttt{init/1} predicate for
2416: type $t$ can be inserted to make the literal schedulable.
2417: 
2418: 
2419: \begin{example}
2420: Assume we have an integer solver with solver type  \texttt{cint/0}.
2421: \begin{ttprog}
2422: :- pred length(list(cint), int). \\
2423: :- mode length(out(list(old)), in) is nondet. \\
2424: :- mode length(in(list(old)), out) is det. \\
2425: length(L, N) :- L = [], N = 0. \\
2426: length(L, N) :- L = [X|L1], +(N1,1,N), N > 0, length(L1, N1).
2427: \end{ttprog}
2428: where the predicate \texttt{+(X,Y,Z)} models $X + Y = Z$ and 
2429: requires at least two arguments to be ground on call and all arguments are
2430: ground on return.
2431: 
2432: For the first mode \texttt{L = [X|L1]} cannot be scheduled as a construct
2433: until \texttt{X} has a ti-grammar 
2434: different from \nt{new}. Hence, \texttt{X}
2435: needs to be initialized.  
2436: In the second mode \texttt{L = [X|L1]} can be scheduled as
2437: a deconstruct and thus no initialization is needed. 
2438: The two resulting procedures are:
2439: 
2440: \begin{ttprog}
2441: length\=\_mode1(L, N) :-  (L := [], N == 0 \\
2442:                   \> ;  +$_{outinin}$(N1, 1, N), N > 0, length\_mode1(L1, N1), init(X), L := [X|L1]). \\
2443: length\_mode2(L, N) :-  (L == [], N := 0 \\
2444:                   \> ;   L =:{~}[X|L1], length\_mode2(L1, N1),
2445:                   +$_{ininout}$(N1, 1, N), N > 0). 
2446: \end{ttprog}
2447: 
2448: \noindent
2449: where we have rewritten the call to \texttt{+/3} to show the mode more
2450: clearly (\texttt{+$_{outinin}$} indicates that the first argument is 
2451: \texttt{out} and the
2452: rest are \texttt{in}, \texttt{+$_{ininout}$} indicates that the 
2453: third argument is \texttt{out}
2454: and the other arguments are \texttt{in}).  
2455: \end{example}
2456: 
2457: Unfortunately, unnecessary initialization may slow down execution and introduce
2458: unnecessary variables (when it interacts with implied modes).  Hence, we
2459: would like to only add those initializations required so that mode checking
2460: will succeed.  The HAL mode checker implements this by first trying to mode
2461: the procedure without allowing initialization. If this fails it will start
2462: from the previous partial schedule looking for the leftmost unscheduled
2463: literal $l$ which can be scheduled by initializing variables which (a) have
2464: either a solver type or a parameter type (e.g.~$v \in V_{type}$) 
2465: and (b) do not appear in an unscheduled
2466: literal to the left 
2467: which equates them to a term (if so, chances are the equation will
2468: become a construct and no initialization is needed). If such an $l$ is
2469: found the appropriate initialization calls are inserted before $l$, and 
2470: then scheduling continues once more trying to schedule
2471: without initialization.  
2472: If no $l$ is found the whole conjunct is not schedulable.  This two phase
2473: approach is applied at each conjunct level individually.
2474: 
2475: 
2476: \begin{example}
2477: Consider the following program where \texttt{cint/0} is a solver type:
2478: \begin{ttprog} 
2479: :- instdef evenlist(T) -> ([] ; [T|oddlist(T)]). \\
2480: :- instdef oddlist(T) -> [T|evenlist(T)]. \\
2481: \\
2482: :- pred pairlist(list(cint),int). \\
2483: :- mode pairlist(out(evenlist(old)),in) is nondet. \\
2484: pairlist(L,N) :- N = 0, L = []. \\
2485: pairlist(L,N) :- N > 0, +(N1,1,N), L = [V|L1], L1 = [V|L2], pairlist(L2,N1).
2486: \end{ttprog}
2487: In the first phase all literals in the second
2488: rule are schedulable except \texttt{L = [V|L1]} and \texttt{L1 =
2489: [V|L2]} which can be neither a 
2490: construct (\texttt{V}, \texttt{L1} and \texttt{L2} are new) nor a 
2491: deconstruct (both \texttt{L} and \texttt{L1} are new). 
2492: In the second phase we examine the two remaining unscheduled
2493: literals: the second literal can be scheduled by initializing \texttt{V}. Once this is done the
2494: first literal can be scheduled obtaining:
2495: \begin{ttprog}
2496: pairlist(L,N) :- N == 0, L := []. \\
2497: pairlist(L,N) :- \= N > 0, +$_{outinin}$(N1,1,N), pairlist(L2,N1), \\
2498: \> init(V), L1 := [V|L2], L := [V|L1].
2499: \end{ttprog}
2500: \end{example}
2501: 
2502: Many other different initialization heuristics could be applied. We are
2503: currently investigating more informed policies which give the right tradeoff
2504: between adding constraints as early as possible, and delaying constraints
2505: until they can become tests or assignments.
2506: 
2507: \section{Higher-Order Objects}\label{sec:higher}
2508: 
2509: Higher-order programming is particularly important
2510: in HAL because it is the mechanism
2511: used to implement dynamic scheduling, which is vital in CLP
2512: languages for extending and combining constraint solvers.  
2513: Higher-order programming 
2514: introduces two new kinds of literals: construction
2515: of higher-order objects and higher-order calls.  
2516: 
2517: A \emph{higher-order object} is constructed
2518: using an equation of the form 
2519: $h = p(x_{1}, \ldots, x_k)$ where
2520: $h, x_{1}, \ldots, x_k$ are variables and 
2521: $p$ is an $n$-ary predicate with
2522: $n \geq k$. 
2523: The variable $h$ is referred to as a \emph{higher-order object}.
2524: \emph{Higher-order calls} are literals of the form \texttt{call}$(h, x_{k+1},
2525: \ldots, x_n)$ where $h, x_{k+1}, \ldots, x_n$ are variables.  Essentially,
2526: the \texttt{call} literal supplies the $n-k$ arguments missing from the
2527: higher-order object $h$.
2528: 
2529: In order to represent types and instantiations for
2530: higher-order objects we need to extend the languages of
2531: type and instantiation expressions.
2532: The \emph{higher-order type} of a higher-order object $h$
2533: constructed in the previous paragraph 
2534: is of the form $pred(t_{k+1}, \ldots, t_n)$ 
2535: where $pred/(n-k)$ is a new special type constructor and
2536: $t_{k+1}, \ldots, t_n$ are types. 
2537: It provides the types of the $n-k$ arguments missing from $h$. 
2538: The \emph{higher-order instantiation} of $h$ 
2539: is of the form $pred(c_{k+1} \rightarrow s_{k+1}, \ldots, c_n
2540: \rightarrow s_n)$\footnote{In reality, the determinism information 
2541: also appears in the
2542: higher-order instantiation; for simplicity we ignore it here.} 
2543: where $pred/(n-k)$ is a new
2544: special instantiation construct and $c_j \rightarrow s_j$ give the call and
2545: success instantiations of argument $j$ respectively. 
2546: It provides the modes of the $n-k$ arguments missing from $h$.
2547: Note that for the first time we allow \ttnew{} instantiations appearing
2548: inside instantiation expressions (since they will often be call
2549: instantiations). But their appearance is restricted to the outermost arguments 
2550: of higher-order instantiations.
2551: 
2552: 
2553: Now we must extend the 
2554: $\func{rt}(t,i)$ operation to handle higher-order types and instantiations.
2555: Let us first consider the case in which 
2556: $i$ is the higher-order instantiation 
2557: $pred(c_{k+1} \rightarrow s_{k+1}, \ldots, c_n \rightarrow s_n)$. 
2558: If $t$ is the higher-order type $pred(t_{k+1}, \ldots,
2559: t_n)$ then $\func{rt}(t,i)$ returns the grammar
2560: \begin{grammar}
2561: ti(t,i) & \rightarrow & \ipred(root(tc_{k+1}), root(ts_{k+1}),
2562: \ldots, root(tc_n), root(ts_n))
2563: \end{grammar}
2564: together with the grammars 
2565: $tc_{k+1}, \ldots, tc_n, ts_{k+1}, \ldots, ts_n$
2566: where $tc_j = \func{rt}(t_j,c_j)$ and $ts_j = \func{rt}(t_j,s_j)$.
2567: If $t$ is not a higher-order type or has the wrong arity 
2568: then $\func{rt}(t,i) = \ourtop$, indicating an error.
2569: The new constant \ipred{} simply collects the
2570: call and success ti-grammars for the higher-order object's 
2571: missing arguments.
2572: 
2573: 
2574: The extension of $\func{rt}(t,i)$ for the case of base
2575: instantiations $i$ is similar to the treatment of type parameters.  
2576: A higher-order object can be \texttt{new} or 
2577: \texttt{ground}, but if it is \texttt{old} this is identical
2578: to \texttt{ground} since higher-order objects never have an
2579: attached solver.
2580: $\func{rt}(pred(t_1,\ldots,t_n), \texttt{new})$ is treated as
2581: before (i.e.~it creates a \nt{new} ti-grammar).
2582: Similarly $\func{rt}(pred(t_1,\ldots,t_n), \texttt{ground})$ 
2583: generates a production rule using a new constant 
2584: \gpred{} of the form
2585: $$
2586: ti(pred(t_1,\ldots,t_n), \texttt{ground}) \rightarrow
2587: \gpred
2588: $$
2589: $\func{rt}(pred(t_1,\ldots,t_n), \texttt{old})$ generates the same
2590: grammar (since it is equivalent).
2591: Since we will only compare the higher-order ti-grammar
2592: against other ti-grammars for the same type we can safely omit
2593: the information about the argument types ($t_1, \ldots, t_n$).
2594: 
2595: The new constant \gpred{} 
2596: acts like $\$\texttt{ground}(v)\$$ 
2597: but it can also be compared with more complicated ti-grammars 
2598: (with production rules for function symbol \ipred)
2599: of the same type.  The full code for $\func{rt}(t,i)$ is given in the appendix.
2600: 
2601: \begin{example}\label{ex:map}
2602: Consider the following code:
2603: \begin{ttprog}
2604: :- pred map(pred(T1,T2), list(T1), list(T2)). \\
2605: :- mode map(in(pred(in,out) is det), in, out) is det. \\
2606: map(H, [], []). \\
2607: map(H, [A|As], [B|Bs]) :- call(H,A,B), map(H,As,Bs). \\
2608: \\
2609: :- typedef sign -> (neg ; zero ; pos). \\
2610: \\
2611: :- pred mult(sign, sign, sign). \\
2612: :- mode mult(in, in, out) is det. \\
2613: %% mult(neg,zero,zero). \\
2614: %% mult(neg,pos,neg). \\
2615: %% mult(neg,neg,pos). \\
2616: %% mult(zero,\_,zero). \\
2617: %% mult(pos,zero,zero). \\
2618: %% mult(pos,pos,pos). \\
2619: %% mult(pos,neg,neg). \\
2620: \\
2621: ?- H1 = mult(pos), map(H1, [neg,zero,pos], L1).
2622: \end{ttprog}
2623: The \texttt{map/3} predicate takes a higher-order predicate with two
2624: missing arguments of parametric types \texttt{T1} and \texttt{T2} 
2625: and modes \texttt{in} and  \texttt{out},
2626: respectively. 
2627: The ti-grammar describing the input instantiation of
2628: the first argument of \texttt{map/3} is the grammar with 
2629: root $a_1 = ti(\nt{pred(T1,T2)}, \nt{pred(in,out)})$, defined by
2630: \begin{grammar}
2631: a_1 & \rightarrow & 
2632: \ipred(ti(\nt{T1},\ntground),ti(\nt{T1},\ntground),\ntnew,ti(\nt{T2},\ntground)) \\
2633: ti(\nt{T1},\ntground) & \rightarrow & \$\ttground(T1)\$ \\
2634: ti(\nt{T2},\ntground) & \rightarrow & \$\ttground(T2)\$ \\
2635: \ntnew & \rightarrow & \#\texttt{fresh}\#
2636: \end{grammar}
2637: 
2638: This predicate is applied to a list of \texttt{T1}s, returning a list
2639: of \texttt{T2}s. 
2640: The literal \texttt{H1 = mult(pos)} 
2641: constructs a higher-order object which 
2642: multiplies the sign of its first argument 
2643: by \texttt{pos}, returning the result in its
2644: second argument.
2645: The type-instantiation of \texttt{H1},
2646: $\func{rt}(\texttt{pred(sign,sign)}, \texttt{pred(in,out)})$,
2647: is the grammar with root $a_2 = ti(\nt{pred(sign,sign)},
2648: \nt{pred(in,out)})$ and rules:
2649: \begin{grammar}
2650: a_2 & \rightarrow &
2651: \ipred(ti(\nt{sign},\ntground),ti(\nt{sign},\ntground),\nt{new}, 
2652: ti(\nt{sign},\ntground)) \\
2653: ti(\nt{sign},\ntground) & \rightarrow & neg ~;~ zero ~;~ pos \\
2654: \ntnew & \rightarrow & \#\texttt{fresh}\#
2655: \end{grammar}
2656: \end{example}
2657: 
2658: 
2659: We need to extend the ordering $\preceq$ to 
2660: higher-order type-instantiations
2661: as well as the operations $\wedge$ and $\vee$.  
2662: Two higher-order ti-grammars $r$ and $r'$ defined with rules
2663: \begin{grammar}
2664: root(r) & \rightarrow & \ipred(xc_1, xs_1, \ldots, xc_n, xs_n)
2665: \end{grammar}
2666: and
2667: \begin{grammar}
2668: root(r') & \rightarrow & \ipred(xc'_1, xs'_1, \ldots, xc'_n, xs'_n)
2669: \end{grammar}
2670: satisfy $r \preceq r'$ iff for $i=1,\ldots,n$, 
2671: $subg(xc'_i, r') \preceq subg(xc_i, r)$ and 
2672: $subg(xs_i, r) \preceq subg(xs'_i, r')$.
2673: Intuitively, if $r \preceq r'$, then any higher-order $\texttt{call}(r',\ldots)$ should 
2674: be replaceable by 
2675: $\texttt{call}(r,\ldots)$. 
2676: For this to work, two conditions must be fulfilled. First,
2677: $r$ must be able to deal with any
2678: values that $r'$ can deal with (and perhaps more). Thus, 
2679: $subg(xc'_i, r') \preceq subg(xc_i, r)$. 
2680: And second, $r$ must return the same values
2681: as $r'$ or less. Thus, $subg(xs_i, r) \preceq subg(xs'_i, r')$. 
2682: For more details see 
2683: the example below.
2684: 
2685: We define $r \preceq \func{rt}(\texttt{pred}(t_1,\ldots,t_n),\ttground)$ 
2686: for any ti-grammar $r$ of the appropriate type except \ntnew.
2687: The full definition of $\preceq$ is given in the appendix.
2688: The $\wedge$ and $\vee$ operations follow naturally from the
2689: ordering, and are given in the appendix.
2690: 
2691: \begin{example}
2692: Consider the following code and goal:
2693: \begin{ttprog}
2694: :- typedef abc -> a ; b ; c. \\
2695: :- instdef ab -> a ; b. \\
2696: \\
2697: :- pred ho1(abc,abc). \\
2698: :- mode ho1(in(ab),out(ab)) is det.\\
2699: ho1(A,B) :- A = B.\\
2700: \\
2701: :- pred ho2(abc,abc). \\
2702: :- mode ho2(in,out) is det.\\
2703: %ho2(A,B) :- A = B.\\
2704: ho2(A,B) :- A = a, B = b.\\
2705: ho2(A,B) :- A = b, B = c.\\
2706: ho2(A,B) :- A = c, B = a.\\\\
2707: ?- HO1 = ho1, HO2 = ho2, (HO = HO1 ; HO = HO2).
2708: \end{ttprog}
2709: During scheduling of the disjunction, 
2710: %after scheduling the first 
2711: % disjunct 
2712: %two equalities 
2713: the ti-grammar for
2714: \texttt{HO1} is \func{rt}(\texttt{pred(abc,abc)},
2715: \texttt{pred(in(ab),out(ab))}), i.e.:
2716: \begin{grammar}
2717: \nt{ho1}& \rightarrow &  \ipred(\nt{gndab},\nt{gndab},\nt{new},\nt{gndab}) \\
2718: \nt{gndab} & \rightarrow & a ~;~ b \\
2719: \nt{new}   & \rightarrow & \#\texttt{fresh}\#
2720: \end{grammar}
2721: and the ti-grammar for \texttt{HO2} is
2722: \begin{grammar}
2723: \nt{ho2}& \rightarrow &  \ipred(\nt{gndabc},\nt{gndabc},\nt{new},\nt{gndabc}) \\
2724: \nt{gndab} & \rightarrow & a ~;~ b ~;~ c\\
2725: \nt{new}   & \rightarrow & \#\texttt{fresh}\#
2726: \end{grammar}
2727: The abstract disjunction of these two grammars to build the
2728: ti-grammar for \texttt{HO}  gives
2729: \begin{grammar}
2730: \nt{ho} & \rightarrow &  \ipred(\nt{gndab},\nt{gndabc},\nt{new},\nt{gndabc}) \\
2731: \end{grammar}
2732: Notice the call ti-grammars have been abstractly conjoined.  This
2733: illustrates the contravariant nature of calling instantiations
2734: of higher-order predicates.
2735: The higher-order object in \texttt{HO} can only be safely applied to
2736: an input \texttt{a} or \texttt{b} since it may be predicate \texttt{ho1}.
2737: It can only be guaranteed to give output \texttt{a},\texttt{b} or \texttt{c}
2738: since it may be predicate \texttt{ho2}.
2739: \end{example}
2740: 
2741: 
2742: 
2743: \subsection{Scheduling Higher-Order}\label{sec:sched-ho}
2744: 
2745: Intuitively, a higher-order equation $h = p(x_{1}, \ldots, x_k)$ is
2746: schedulable if $h$ is \nt{new} and $x_{1}, \ldots, x_k$ are at least as
2747: instantiated as the call instantiations of one of the modes declared
2748: for $p/n$. If this is true for more than one mode, we again choose one schedulable 
2749: mode (using the same criteria used for calls to first order predicates) 
2750: and commit to it. 
2751: If it is not true for any mode, the equation is delayed until the
2752: arguments become more instantiated.  Formally,
2753: let the current ti-state be 
2754: $TI = \{h \mapsto r, x_1
2755: \mapsto r_1, \ldots, x_k \mapsto r_k\} \cup \emph{RTI}$
2756: and the types $\{x_{1} \mapsto t_1, \ldots, x_k \mapsto t_k\}$.
2757: Let the (declared or inferred) predicate 
2758: type of $p/n$ be $p(dt_1, \ldots, dt_n)$, then (because of type correctness)
2759: we have that there exists $\theta$ such that $\theta(dt_j) = t_j$.
2760: 
2761: Consider the declared mode 
2762: $p(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$. 
2763: The higher-order equation is schedulable if $r = \nt{new}$ 
2764: and for each $1 \leq j \leq k,~ r_j \preceq
2765: \func{rt}(t_j,c_j) ~\wedge~ r_j \neq \nt{new}$.  
2766: The resulting ti-state is
2767: $$
2768: \{h \mapsto r', 
2769: x_1 \mapsto r_1, \ldots, x_k \mapsto r_k\} \cup \emph{RTI}.
2770: $$
2771: where $tc_j = \func{rt}(\theta(dt_{j}),c_{j})$ and 
2772: $ts_j = \func{rt}(\theta(dt_{j}),s_{j})$
2773: for $k+1 \leq j \leq n$ and 
2774: $r' = \{ a \rightarrow 
2775: \ipred(root(tc_{k+1}), root(ts_{k+1}), \ldots, root(tc_{n}),
2776: root(ts_{n}))\}$, where $a$ is a new non-terminal,
2777: together with the grammars for 
2778: $tc_{k+1}, ts_{k+1}, \ldots, tc_{n}, ts_{n}$.
2779: 
2780: % Note that the instantiation of each $x_j$ is
2781: % unchanged and, in fact, will not be updated after $h$ is
2782: % used. This is because we cannot ensure that a 
2783: % call has been actually made. 
2784: % As a result, higher-order objects can lose precision of mode
2785: % information. This might lead to erroneous mode information if one of the
2786: % $r_j$ is \nt{new}. For this reason, the call is not schedulable if this
2787: % is the case (hence the $r_j \neq \nt{new}$ condition above).  
2788: Note that the instantiation of each $x_j$ is
2789: unchanged and, in fact, will not be updated even when $h$ is
2790: called. This is because in general we cannot ensure when or if the
2791: call has actually been made. 
2792: As a result, mode checking with higher-order objects can be imprecise.
2793: In particular, if one of the
2794: $r_j$ is \nt{new} we may not know if it becomes initialised or not 
2795: since we do not know if the call to $h$ which will initialise it has been made. Since 
2796: we must be able to precisely track when a variable has become 
2797: initialised, we do not allow a call to be scheduled if this
2798: is the case (hence the $r_j \neq \nt{new}$ condition above).  
2799: 
2800: A higher-order call \texttt{call}$(h, x_{k+1}, \ldots, x_n)$ is schedulable
2801: if $x_{k+1}, \ldots, x_n$ are at least as instantiated as the call
2802: instantiations of the arguments
2803: of the higher-order type-instantiation previously assigned to $h$. If this is
2804: not true, the call is delayed until the arguments become more instantiated.
2805: Formally, let the current ti-state be 
2806: $TI = \{h \mapsto r,
2807: x_{k+1} \mapsto r_{k+1}, \ldots, x_n \mapsto r_n\} \cup \emph{RTI}$. 
2808: The call is
2809: schedulable if $r$ has a production rule of the form 
2810: $$
2811: root(r) \rightarrow 
2812: \ipred(xc_{k+1}, xs_{k+1},  \ldots, xc_n, xs_n)
2813: $$
2814: and for each $j \in k+1, ..., n, ~ r_j \preceq subg(xc_j, r)$.  
2815: The resulting instantiation is 
2816: $TI' = \{h \mapsto r, x_{k+1} \mapsto
2817: r_{k+1} \wedge subg(xs_{k+1},r), \ldots, x_n \mapsto r_n \wedge subg(xs_n,r)\} 
2818: \cup \emph{RTI}$.
2819: Just as for normal predicate calls, implied modes are also possible
2820: where if, for example, $xc_l$ is \nt{new}, 
2821: we can replace $x_l$ with a fresh
2822: variable $\fresh_l$ and a following equation $\fresh_l = x_l$.
2823: And, if necessary, the mode checker will add calls to initialise
2824: solver variables.
2825: 
2826: \begin{example}
2827: Consider the following code and assume all goals are schedulable in the
2828: order written:
2829: \begin{ttprog}
2830: :- instdef only\_a -> a. \\
2831: :- modedef abc2a -> (ground -> only\_a). \\
2832: \\
2833: :- pred p(abc, abc, abc) \\
2834: :- mode p(abc2a, in, out(only\_a)) is semidet. \\
2835: \\
2836: ?- $G_1$, p(A,B,C), $G_2$. \\
2837: ?- $G_1$, H = p(A), call(H,B,C), $G_2$. 
2838: \end{ttprog}
2839: The two queries would appear to have identical 
2840: effects. However, mode checking for the second
2841: goal will not determine that the 
2842: instantiation for \texttt{A} becomes \texttt{only\_a}
2843: by the time it reaches goal $G_2$.
2844: Assuming \texttt{A} was \texttt{ground} before  \texttt{H = p(A)},
2845: then the type-instantiation of \texttt{H} is the grammar with
2846: root $x = ti(pred(\nt{abc},\nt{abc}),pred(\nt{in},\nt{out(only\_a)}))$ and
2847: rules:
2848: \begin{grammar}
2849: x 
2850: & \rightarrow &
2851: \ipred(ti(\nt{abc},\ntground),ti(\nt{abc},\ntground),
2852: \nt{new}, ti(\nt{abc}, \nt{only\_a})) \\
2853: ti(\nt{abc},\ntground) & \rightarrow & a ~;~ b ~;~ c \\
2854: ti(\nt{abc},\nt{only\_a}) & \rightarrow & a \\
2855: \nt{new} & \rightarrow & \#\texttt{fresh}\# 
2856: \end{grammar}
2857: Of course in this case it is obvious that the predicate is being called
2858: before $G_2$, and so it could be inferred that the instantiation of 
2859: \texttt{A}
2860: was \nt{only\_a} at that point.
2861: However, in the usual case such analysis is harder, since the construction
2862: of a higher-order term and its eventual execution are usually performed
2863: in different predicates.
2864: Indeed, in general it is impossible to know at compile time whether at
2865: a given program point the higher-order predicate has been executed or not.
2866: \end{example}
2867: 
2868: 
2869: \section{Polymorphism and Modes}\label{sec:poly}
2870: 
2871: Polymorphic predicates are very useful because they can be
2872: used for different types. Unfortunately, mode information can be lost since
2873: only the base instantiations \texttt{ground}, \texttt{new}, and
2874: \texttt{old} can be associated with type parameters. 
2875: 
2876: \begin{example}
2877: Consider the interface to the stack data type defined in Figure
2878: \ref{fig:stack} and the following program:
2879: \begin{ttprog}
2880: :- pred q(abc). \\
2881: :- mode q(in) is semidet. \\
2882: :- mode q(in(only\_a)) is det. \\
2883: \\
2884: ?- empty(S0), I0 = a, push(S0, I0, S1), pop(S1, I, S2), q(I). 
2885: \end{ttprog}
2886: Although list \texttt{S1} is indeed a list only containing items \texttt{a}
2887: this information is lost after executing \texttt{push} since the output
2888: instantiation declared for this predicate is simply ground. Because of
2889: this, the first mode of predicate \texttt{q/1} will be selected for
2890: literal \texttt{q(I)}, thus losing the information 
2891: that \texttt{q(I)} could not fail.
2892: \end{example}
2893: 
2894: This loss of instantiation information for arguments to polymorphic 
2895: predicates may have severe consequences for higher-order objects
2896: because the base instantiation \texttt{ground} applied to polymorphic
2897: code does not contain enough information for the higher-order object to be
2898: used (called). 
2899: \begin{example}
2900: Consider the following goal using code from Figure~\ref{fig:stack}
2901: and Example~\ref{ex:map}:
2902: \begin{ttprog}
2903: ?- empty(S0), I0 = mult(pos), push(S0,I0,S1), pop(S1,I,S2), map(I,[neg],S). 
2904: \end{ttprog}
2905: When item \texttt{I} is extracted from the list its ti-grammar is
2906: $\func{rt}(t,\nt{ground})$ where $t$ is type $pred(\nt{sign},\nt{sign})$.
2907: As a result, it cannot be used in
2908: \texttt{map} since its mode and determinism
2909: information has been lost, i.e.~the check
2910: $\func{rt}(t,\nt{ground}) \preceq \func{rt}(t,\nt{pred(in,out)})$ fails.
2911: \end{example}
2912: 
2913: We could overcome the above problem by having a special version of each
2914: stack predicate to handle the higher-order predicate case. But this requires
2915: modifying the \texttt{stack} module, defeating the idea of an abstract data
2916: type. Also, this modification is required for each mode of the higher-order
2917: object that the programmer wishes to make use of.  
2918: Clearly, this is not an attractive proposition.
2919: 
2920: Our approach is to use polymorphic type information to recover the lost mode 
2921: information.  This is an example of ``Theorems for
2922: Free''~\cite{wadler}: since the polymorphic code can only
2923: equate terms with polymorphic
2924: type, it cannot create instantiations and, thus, the output instantiations of
2925: polymorphic arguments must result from the calling instantiations of
2926: non-output arguments. Hence, they have to be at least as instantiated as the
2927: join of the input instantiations.
2928: 
2929: 
2930: \subsection{Polymorphic Mode Checking}
2931: 
2932: 
2933: To recover instantiation information we extend mode checking for procedures
2934: with polymorphic types to take into account the extra mode information that
2935: is implied by the polymorphic type.  
2936: Consider the predicate call $p(x_1,
2937: \ldots, x_n)$ where $x_1, \ldots, x_n$ are variables with type $\{x_1
2938: \mapsto t_1, \ldots, x_n \mapsto t_n\}$ and 
2939: current ti-state 
2940: $TI = \{x_1 \mapsto r_1, \ldots, x_n \mapsto r_n\} \cup \emph{RTI}$.  
2941: Suppose the predicate 
2942: type declared (or inferred) for $p$ is $p(dt_1, \ldots, dt_n)$.  Note that
2943: because of type correctness there exists the
2944: type substitution $\theta$ where $\theta(dt_j) = t_j$.
2945: 
2946: \begin{figure}[t]
2947: 
2948: \begin{small}
2949: \begin{tabbing} 
2950: xx \= xx \= xx \= xx \= xx \= \kill
2951: \func{collect\_set}($r_1$,$r_2$,$P$) \\
2952: \> $x_1$ := $root(r_1)$; $x_2$ := $root(r_2)$ \\
2953: \> \textbf{if} ($(x_1,x_2) \in P$) \textbf{return} $\emptyset$\\
2954: \> \textbf{if} ($x_1 = \nt{new}$)  \textbf{return} $\emptyset$ \\
2955: \> \textbf{if} ($x_1 \rightarrow \svars{v} \in r_1$) 
2956:         \textbf{return} $\{(\ttold,v,r_2)\}$ \\
2957: \> \textbf{if} ($x_1 \rightarrow \sgrounds{v} \in r_1$)
2958:         \textbf{return} $\{(\ttground,v,r_2)\}$ \\
2959: \> $M$ := $\emptyset$ \\
2960: \> \textbf{foreach} rule $x_1 \rightarrow f(x_{11}, \ldots, x_{1n})$ in $r_1$ \\
2961: \> \> \textbf{if} exists rule $x_2 \rightarrow f(x_{21}, \ldots, x_{2n})$ in
2962: $r_2$ \\
2963: \> \> \> \textbf{for} $i$ := $1 .. n$ \\
2964: \> \> \> \> $M$ := $M~ \cup$ \func{collect\_set}($subg(x_{1i},r_1)$, $subg(x_{2i},r_2)$, $P \cup \{(x_1,x_2)\}$) \\
2965: \> \textbf{return} $M$ \\
2966: \end{tabbing}
2967: \end{small}
2968: \caption{Algorithm for collecting the type-instantiations that match
2969:  type parameters.\label{fig:collect}}
2970: \label{collect_set}
2971: 
2972: \end{figure}
2973: 
2974: Assume the literal is schedulable for mode declaration 
2975: $p(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$.  
2976: We proceed by matching the ti-trees $\func{rt}(dt_j, c_j)$ 
2977: against the current instantiations $r_j$ in a
2978: process analogous to the matching that occurs in the meet function.
2979: Note that $\func{rt}(dt_j, c_j)$ is the ti-grammar which contains information
2980: on the positions of type parameters in the declared type of $p$.
2981: 
2982: Consider the function \func{collect\_set}($r_1,r_2,\emptyset$),
2983: defined in Figure~\ref{collect_set}, which returns the set of triples
2984: $(\ttold,v,r')$ and $(\ttground,v,r')$ obtained by collecting
2985: each ti-grammar, $r'$, in $r_2$ 
2986: matching occurrences of $\svars{v}$ and  $\sgrounds{v}$ in 
2987: $r_1$. 
2988: Let 
2989: $M = \cup_{j=1}^n$ \func{collect\_set}($\func{rt}(dt_j,c_j),r_j,\emptyset$).  
2990: We will use this 
2991: information to compute the success instantiations as follows: since the
2992: only success type-instantiation information for elements of parametric type
2993: $v$ must come from its call type-instantiations, we can safely assume that
2994: any success type-instantiation is at least as instantiated as the join
2995: (upper bounds) of the calls.
2996: 
2997: 
2998: Note that when determining ground success information, we need only
2999: consider ground calling instantiations, since ground success instantiations
3000: cannot result from old call instantiations.
3001: On the other hand, for old success information, we need to consider both old
3002: and ground calling instantiations, since old success instantiations can
3003: result from either.
3004: Hence the following definitions for $ground(v,M)$ and $old(v,M)$, which
3005: compute upper bounds on success instantiations for $v$ based on the call
3006: instantiation information collected in $M$:
3007: \begin{eqnarray*}
3008: ground(v,M) & = & \bigvee \{ r ~|~ (\ttground,v,r) \in M \} \\
3009: old(v,M) & = & \bigvee \{ r ~|~ (\ttground,v,r) \in M \mbox{~or~}
3010: (\ttold,v,r) \in M\}
3011: \end{eqnarray*}
3012: Because the literal is schedulable for the given mode we know
3013: that no $r_i$ contains \nt{new} for any $t$. 
3014: Thus,  the abstract disjunctions in  
3015: $ground(v,M)$ and $old(v,M)$ never lead to $\ourtop$.
3016: 
3017: Let $ps_j$ be the result of replacing in 
3018: $\func{rt}(dt_j, s_j)$ 
3019: each non-terminal $x$ with productions of the form
3020: $$
3021: x \rightarrow \sgrounds{v} ~;~ \svars{v}
3022: $$
3023: by $root(old(v,M))$ and removing the rules for
3024: $x$, and replacing each non-terminal $x$ with productions of the form
3025: $$
3026: x \rightarrow \sgrounds{v}
3027: $$
3028: by $root(ground(v,M))$ and removing the rules for $x$, and finally adding
3029: the rules in $old(v,M)$ and $ground(v,M)$.
3030: The new ti-state
3031: resulting after scheduling the polymorphic literal is $TI' = \{x_1 \mapsto
3032: r_1 \wedge ps_1, \ldots, x_n \mapsto r_n \wedge ps_n\} \cup \emph{RTI}$.
3033: 
3034: \begin{example} \label{ex:hopush}
3035: Assume we are scheduling the \texttt{push/3} literal in the
3036: goal using code from Figure~\ref{fig:stack} and Example~\ref{ex:map}:
3037: \begin{ttline}
3038: ?- empty(S0), I0 = mult(pos), push(S0,I0,S1), pop(S1,I,S2), map(I,[neg],S). 
3039: \end{ttline}
3040: for current ti-state 
3041: $\{\texttt{S0} \mapsto r_3, \texttt{I0} \mapsto r_4\}$,
3042: the remaining variables being \nt{new},
3043: where $r_3$ is the grammar
3044: \begin{grammar}
3045: ti(\nt{list(sign)},\nt{elist}) & \rightarrow & [] \\
3046: \end{grammar}
3047: and $r_4$ is the grammar with root 
3048: $a = ti(pred(\nt{sign},\nt{sign}),pred(\nt{in},\nt{out}))$ defined by
3049: \begin{grammar}
3050: a & \rightarrow &
3051:         \ipred(ti(\nt{sign},\ntground),ti(\nt{sign},\ntground),
3052:                           \ntnew,ti(\nt{sign},\ntground)) \\
3053: ti(\nt{sign},\ntground) & \rightarrow & neg ~;~ zero ~;~ pos
3054: \end{grammar}
3055: The ti-grammars 
3056: defined by the declared type and mode declarations 
3057: for the first two arguments of
3058: \texttt{push/3} are: $r_5 = \func{rt}(\texttt{list(T)},\texttt{ground})$ 
3059: or the grammar
3060: \begin{grammar}
3061: ti(\nt{list(T)},\nt{ground}) & \rightarrow & [] ~;~ [ ti(\nt{T},\ntground)
3062: ~|~ti(\nt{list(T)},\nt{ground}) ] \\
3063: ti(\nt{T},\ntground) & \rightarrow & \$\texttt{ground}(T)\$ 
3064: \end{grammar}
3065: and, $r_6 = \func{rt}(\nt{T},\nt{ground})$, the grammar
3066: \begin{grammar}
3067: ti(\nt{T},\ntground) & \rightarrow & \$\texttt{ground}(T)\$ 
3068: \end{grammar}
3069: The literal is schedulable and the matching process determines that
3070: $\func{collect\_set}(r_5,r_3) = \emptyset$, 
3071: $\func{collect\_set}(r_6,r_4) = \{(\nt{ground},\nt{T}, r_4)\}$
3072: and $M = \{(\nt{ground},\nt{T}, r_4)\}$. 
3073: The improved analysis 
3074: determines that extra success instantiation 
3075: ($ps_3$) 
3076: for the third argument (\texttt{S1}) by improving
3077: $\func{rt}(\texttt{list(T)},\texttt{nelist(ground)})$ which is
3078: \begin{grammar}
3079: ti(\nt{list(T)},\nt{nelist(ground)} & \rightarrow & [ ti(\nt{T},\ntground)
3080: ~|~ ti(\nt{list(T)},\nt{list(ground)} ] \\
3081: ti(\nt{list(T)},\nt{list(ground)} & \rightarrow & [] ~;~ [ ti(\nt{T},\ntground)
3082: ~|~ ti(\nt{list(T)},\nt{list(ground)} ] \\
3083: ti(\nt{T},\ntground) & \rightarrow & \$\texttt{ground}(T)\$ 
3084: \end{grammar}
3085: replacing the last rule by $r_4$ and occurrences of
3086: $ti(\nt{list(T)},\nt{list(ground)}$ by $root(r_4) = a$ obtaining
3087: \begin{grammar}
3088: ti(\nt{list(T)},\nt{nelist(ground)} & \rightarrow & [ {a}
3089: ~|~ ti(\nt{list(T)},\nt{list(ground)} ] \\
3090: ti(\nt{list(T)},\nt{list(ground)} & \rightarrow & [] ~;~ [ a
3091:  ~|~ ti(\nt{list(T)},\nt{list(ground)} ] \\
3092: a & \rightarrow &
3093:         \ipred(ti(\nt{sign},\ntground),ti(\nt{sign},\ntground),
3094:                           \ntnew,ti(\nt{sign},\ntground)) \\
3095: ti(\nt{sign},\ntground) & \rightarrow & neg ~;~ zero ~;~ pos
3096: \end{grammar}
3097: Note that the mode information of the higher-order term has been preserved.
3098: The mode checking for the call to \texttt{pop/3} 
3099: will similarly preserve the higher-order mode information, and
3100: the original goal will be schedulable.
3101: \end{example}
3102: 
3103: 
3104: The interaction between polymorphic mode analysis and higher-order
3105: constructs and calls is in fact slightly more complicated than discussed
3106: previously. This is because higher-order objects allow us to give arguments
3107: to a predicate in a piecewise manner. This affects the execution of
3108: \func{collect\_set} which was collecting the set $M$ over all predicate
3109: arguments simultaneously. In order to handle these accurately we need to
3110: store the information from $M$ found during the higher-order object
3111: construction, to be used in the higher-order call. That is, we need to store
3112: $ground(v,M)$ and $old(v,M)$ for each type parameter $v$ appearing in the
3113: remaining arguments as part of the ti-grammar for the higher-order object.
3114: 
3115: An alternative approach used by the HAL compiler is to update the success
3116: instantiations stored in the ti-grammar of the higher-order object based on
3117: the extra information from polymorphism.  When the call to the higher-order
3118: polymorphic predicate is analyzed, the matching process also matches the
3119: success instantiations of the higher-order object to recover the previous
3120: matching information.
3121: 
3122: \section{Conclusions and Future Work}\label{sec:concl}
3123: 
3124: The ultimate aim of mode checking is to ensure that the compiler has correct
3125: instantiation information at every program point in order to
3126: allow program optimization.  It is reasonably 
3127: straightforward (but laborious) 
3128: to show that the mode checking defined in this paper
3129: ensures that the resulting program has input-output 
3130: and call correctness.
3131: Some subtle points that arise are as follows. 
3132: First, 
3133: it is an invariant that any ti-grammar (or sub-grammar) $r$ occurring in the
3134: mode checking process that contains rule
3135: $root(r) \rightarrow \#\texttt{var}\#$ must be equivalent 
3136: to $\func{rt}(t,\ttold)$ for some $t$, 
3137: which means that when variables are 
3138: bound indirectly (through shared variables) the 
3139: correctness of the ti-state is maintained. 
3140: Second, if a procedure is input-output correct for
3141: its declared type, then it is also input-output correct for any
3142: instance of the type. This follows from the limited possibilities
3143: for manipulating objects of variable type (essentially copying
3144: and testing equality).
3145: 
3146: 
3147: This means that compiler optimizations can safely be applied.
3148: The only mode error that may be detected at run-time
3149: arises from situations explained in Section~\ref{sec:eq} and
3150: Example~\ref{ex:runtimeerror}.\footnote{Note this does not invalidate the
3151: input-output or call correctness for the remainder of the program.}
3152: The compiler emits warnings when such a possibility exists.
3153: 
3154: 
3155: We have described for the first time 
3156: mode checking for CLP languages, such as HAL,
3157: which have strong typing and re-orderable clause bodies, 
3158: and described the algorithms currently used in the HAL compiler.
3159: The actual implementation of these algorithms in the HAL 
3160: compiler is considerably more
3161: sophisticated than the simple presentation here.  Partial schedules are
3162: computed and stored and accessed only when enough new instantiation
3163: information has been created to reassess them. Operations such as $\preceq$ 
3164: are tabled and hence many operations are simply a 
3165: lookup in a table. 
3166: We have found mode checking is efficient enough for a practical compiler.
3167: For the compiler compiling itself 
3168: (29000 lines of HAL code in 27 highly interdependent 
3169: modules compiled in 15 mins 20 secs)
3170: mode checking requires 
3171: 16.4\% of overall compile time.
3172: While compiling the libraries
3173: (4600 lines of HAL code in 12 almost independent modules compiled in 47 secs)
3174: it takes 13.1\% of overall compile time.
3175: And compiling a suite of small to medium size benchmarks 
3176: (6200 lines of HAL code in 67 modules compiled in 183 secs)
3177: it takes 13.0\% of overall compile time,
3178: 
3179: There is considerable scope for future work. 
3180: One aim is to strengthen mode checking. 
3181: We plan to add tracking of aliasing and groundness dependencies. 
3182: Another problem is that currently
3183: HAL (like Mercury) never undoes a feasible choice of ordering the literals.
3184: This can lead to correctly moded programs not being checkable as in
3185: Example \ref{non-check}.  In practice this behavior is rare, but we
3186: would like to explore more complete strategies.
3187: 
3188: \begin{example}
3189: \label{non-check}
3190: Consider the following declarations and goal:
3191: \begin{ttprog}
3192: :- pred p(list(int),list(int)). \\
3193: :- mode p(out,out) is det. \\
3194: :- mode p(in(evenlist(ground)),out(evenlist(ground))) is det. \\
3195: \\
3196: :- pred q(list(int)). \\
3197: :- mode q(out(evenlist(ground))) is det. \\
3198: \\
3199: :- pred r(list(int)). \\
3200: :- mode r(in(evenlist(ground))) is det. \\
3201: \\
3202: ?- p(L0, L1), q(L0), r(L1).
3203: \end{ttprog}
3204: The first two literals of the goal are schedulable in the 
3205: order given, as 
3206: \texttt{p\_mode1(L0, L1), q\_mode1(L2), L2 = L0}
3207: but then \texttt{r(L1)} is not schedulable (the list \texttt{L1}
3208: may not be of even length). There is a feasible schedule:
3209: \texttt{q\_mode1(L0), p\_mode2(L0, L1), r\_mode1(L1)}
3210: which is missed by both HAL and Mercury, since they don't undo the
3211: feasible schedule for the first two literals.
3212: In order to avoid this problem HAL allows the user to name modes
3213: of a predicate and hence specify exactly which mode is required.
3214: \end{example}
3215: 
3216: A second aim is to improve the efficiency of the reordered code,
3217: by, for instance, reducing the number of initializations.
3218: The final aim is to provide mode inference as well as mode 
3219: checking---the ability to reorder body literals  makes this a potentially very expensive
3220: process.
3221: 
3222: 
3223: 
3224: \subsection*{Acknowledgements}
3225: 
3226: We would like to thank Fergus Henderson and Zoltan Somogyi for discussions
3227: of the Mercury mode system, and their help in modifying Mercury to support
3228: HAL features. We would also like to thank
3229: the anonymous referees whose suggestions have enormously improved the paper.
3230: 
3231: %\bibliography{paper}
3232: %\bibliographystyle{acmtrans}
3233: \begin{thebibliography}{}
3234: 
3235: \bibitem[\protect\citeauthoryear{Boye and Ma\l{}uszy\'{n}ski}{Boye and
3236:   Ma\l{}uszy\'{n}ski}{1997}]{mal}
3237: {\sc Boye, J.} {\sc and} {\sc Ma\l{}uszy\'{n}ski, J.} 1997.
3238: \newblock Directional types and the annotation method.
3239: \newblock {\em {JLP}\/}~{\em 33,\/}~3, 179--220.
3240: 
3241: \bibitem[\protect\citeauthoryear{Bueno, Cabeza, Carro, Hermenegildo,
3242:   L\'{o}pez-Garc\'{\i}a, and Puebla}{Bueno et~al\mbox{.}}{2002}]{CIAO}
3243: {\sc Bueno, F.}, {\sc Cabeza, D.}, {\sc Carro, M.}, {\sc Hermenegildo, M.},
3244:   {\sc L\'{o}pez-Garc\'{\i}a, P.}, {\sc and} {\sc Puebla, G.} 2002.
3245: \newblock The {Ciao} {Prolog} system reference manual.
3246: \newblock Tech. rep., Technical University of Madrid (UPM).
3247: \newblock http://clip.dia.fi.upm.es/Software/Ciao.
3248: 
3249: \bibitem[\protect\citeauthoryear{Charlier and Hentenryck}{Charlier and
3250:   Hentenryck}{1994}]{LeCharlier94:toplas}
3251: {\sc Charlier, B.~L.} {\sc and} {\sc Hentenryck, P.~V.} 1994.
3252: \newblock {E}xperimental {E}valuation of a {G}eneric {A}bstract
3253:   {I}nterpretation {A}lgorithm for {P}rolog.
3254: \newblock {\em {ACM} Transactions on Programming Languages and Systems\/}~{\em
3255:   16,\/}~1, 35--101.
3256: 
3257: \bibitem[\protect\citeauthoryear{Codish and Lagoon}{Codish and
3258:   Lagoon}{2000}]{mike}
3259: {\sc Codish, M.} {\sc and} {\sc Lagoon, V.} 2000.
3260: \newblock Type dependencies for logic programs using {ACI}-unification.
3261: \newblock {\em Theoretical Computer Science\/}~{\em 238,\/}~1--2, 131--159.
3262: 
3263: \bibitem[\protect\citeauthoryear{Codognet, Codognet, and Corsini}{Codognet
3264:   et~al\mbox{.}}{1990}]{Codognet90}
3265: {\sc Codognet, C.}, {\sc Codognet, P.}, {\sc and} {\sc Corsini, M.} 1990.
3266: \newblock Abstract interpretation of concurrent logic languages.
3267: \newblock In {\em {N}orth {A}merican Conference on Logic Programming}.
3268:   215--232.
3269: 
3270: \bibitem[\protect\citeauthoryear{Comon, Dauchet, Gilleron, Jacquemard, Lugiez,
3271:   Tison, and Tommasi}{Comon et~al\mbox{.}}{1997}]{tata97}
3272: {\sc Comon, H.}, {\sc Dauchet, M.}, {\sc Gilleron, R.}, {\sc Jacquemard, F.},
3273:   {\sc Lugiez, D.}, {\sc Tison, S.}, {\sc and} {\sc Tommasi, M.} 1997.
3274: \newblock Tree automata techniques and applications.
3275: \newblock Available on: \texttt{http://www.grappa.univ-lille3.fr/tata}.
3276: 
3277: \bibitem[\protect\citeauthoryear{Debray}{Debray}{1989}]{deb-acm89}
3278: {\sc Debray, S.~K.} 1989.
3279: \newblock Static inference of modes and data dependencies in logic programs.
3280: \newblock {\em {ACM} Transactions on Programming Languages and Systems\/}~{\em
3281:   11,\/}~3, 418--450.
3282: 
3283: \bibitem[\protect\citeauthoryear{Demoen, {Garc\'{\i}a de la Banda}, Harvey,
3284:   Marriott, and Stuckey}{Demoen et~al\mbox{.}}{1999a}]{iclp99}
3285: {\sc Demoen, B.}, {\sc {Garc\'{\i}a de la Banda}, M.}, {\sc Harvey, W.}, {\sc
3286:   Marriott, K.}, {\sc and} {\sc Stuckey, P.} 1999a.
3287: \newblock Herbrand constraint solving in {HAL}.
3288: \newblock In {\em Logic Programming: Proceedings of the 16th International
3289:   Conference}, {D.~D. Schreye}, Ed. MIT Press, 260--274.
3290: 
3291: \bibitem[\protect\citeauthoryear{Demoen, {Garc\'{\i}a de la Banda}, Harvey,
3292:   Marriott, and Stuckey}{Demoen et~al\mbox{.}}{1999b}]{cp99}
3293: {\sc Demoen, B.}, {\sc {Garc\'{\i}a de la Banda}, M.}, {\sc Harvey, W.}, {\sc
3294:   Marriott, K.}, {\sc and} {\sc Stuckey, P.} 1999b.
3295: \newblock An overview of {HAL}.
3296: \newblock In {\em Proceedings of the Fourth International Conference on
3297:   Principles and Practices of Constraint Programming}, {J.~Jaffar}, Ed. LNCS.
3298:   Springer-Verlag.
3299: 
3300: \bibitem[\protect\citeauthoryear{Demoen, {Garc\'{\i}a de la Banda}, and
3301:   Stuckey}{Demoen et~al\mbox{.}}{1999}]{typec}
3302: {\sc Demoen, B.}, {\sc {Garc\'{\i}a de la Banda}, M.}, {\sc and} {\sc Stuckey,
3303:   P.} 1999.
3304: \newblock Type constraint solving for parametric and ad-hoc polymorphism.
3305: \newblock In {\em Proceedings of the 22nd {A}ustralian Computer Science
3306:   Conference}, {J.~Edwards}, Ed. Springer-Verlag, 217--228.
3307: 
3308: \bibitem[\protect\citeauthoryear{{Garc\'{\i}a de la Banda}, Demoen, Marriott,
3309:   and Stuckey}{{Garc\'{\i}a de la Banda} et~al\mbox{.}}{2002}]{flops2002}
3310: {\sc {Garc\'{\i}a de la Banda}, M.}, {\sc Demoen, B.}, {\sc Marriott, K.}, {\sc
3311:   and} {\sc Stuckey, P.} 2002.
3312: \newblock To the gates of {HAL}: a {HAL} tutorial.
3313: \newblock In {\em Proceedings of the Sixth International Symposium on
3314:   Functional and Logic Programming}. Number 2441 in LNCS. Springer-Verlag,
3315:   47--66.
3316: 
3317: \bibitem[\protect\citeauthoryear{Gecseg and Steinby}{Gecseg and
3318:   Steinby}{1984}]{gs}
3319: {\sc Gecseg, F.} {\sc and} {\sc Steinby, M.} 1984.
3320: \newblock {\em Tree Automata}.
3321: \newblock Academei Kaido, Budapest.
3322: 
3323: \bibitem[\protect\citeauthoryear{Henderson, Somogyi, and Conway}{Henderson
3324:   et~al\mbox{.}}{1996}]{mercury-switch}
3325: {\sc Henderson, F.}, {\sc Somogyi, Z.}, {\sc and} {\sc Conway, T.} 1996.
3326: \newblock Determinism analysis in the {Mercury} compiler.
3327: \newblock In {\em Proceedings of the {Australian} Computer Science Conference}.
3328:   337--346.
3329: 
3330: \bibitem[\protect\citeauthoryear{Hermenegildo, Puebla, Bueno, and
3331:   L\'{o}pez-Garc\'{\i}a}{Hermenegildo et~al\mbox{.}}{2003}]{SAS-CIAO}
3332: {\sc Hermenegildo, M.}, {\sc Puebla, G.}, {\sc Bueno, F.}, {\sc and} {\sc
3333:   L\'{o}pez-Garc\'{\i}a, P.} 2003.
3334: \newblock Program development using abstract interpretation (and the {Ciao}
3335:   system preprocessor).
3336: \newblock In {\em Proceedings of the 10th International Symposium on Static
3337:   Analysis}, {R.~Cousot}, Ed. LNCS, vol. 2694. Springer.
3338: 
3339: \bibitem[\protect\citeauthoryear{Janssens and Bruynooghe}{Janssens and
3340:   Bruynooghe}{1993}]{gerda}
3341: {\sc Janssens, G.} {\sc and} {\sc Bruynooghe, M.} 1993.
3342: \newblock Deriving descriptions of possible values of program variables by
3343:   means of abstract interpretation.
3344: \newblock {\em {JLP}\/}~{\em 13}, 205--258.
3345: 
3346: \bibitem[\protect\citeauthoryear{Marriott and Stuckey}{Marriott and
3347:   Stuckey}{1992}]{3r}
3348: {\sc Marriott, K.} {\sc and} {\sc Stuckey, P.} 1992.
3349: \newblock The 3 {R}'s of optimizing constraint logic programs: Refinement,
3350:   removal, and reordering.
3351: \newblock In {\em 19th. Annual {ACM} Conference on Principles of Programming
3352:   Languages}. {ACM}, 334--344.
3353: 
3354: \bibitem[\protect\citeauthoryear{Marriott and Stuckey}{Marriott and
3355:   Stuckey}{1998}]{CLPbook}
3356: {\sc Marriott, K.} {\sc and} {\sc Stuckey, P.} 1998.
3357: \newblock {\em Programming with Constraints: an Introduction}.
3358: \newblock MIT Press.
3359: 
3360: \bibitem[\protect\citeauthoryear{Mellish}{Mellish}{1987}]{Mellish87}
3361: {\sc Mellish, C.} 1987.
3362: \newblock {A}bstract {I}nterpretation of {P}rolog {P}rograms.
3363: \newblock {\em Abstract Interpretation of Declarative Languages\/}, 181--198.
3364: 
3365: \bibitem[\protect\citeauthoryear{Mulkers, Simoens, Janssens, and
3366:   Bruynooghe}{Mulkers et~al\mbox{.}}{1995}]{mulkers-iclp95}
3367: {\sc Mulkers, A.}, {\sc Simoens, W.}, {\sc Janssens, G.}, {\sc and} {\sc
3368:   Bruynooghe, M.} 1995.
3369: \newblock {O}n the {P}racticality of {A}bstract {E}quation {S}ystems.
3370: \newblock In {\em International Conference on Logic Programming}. MIT Press,
3371:   781--795.
3372: 
3373: \bibitem[\protect\citeauthoryear{Mycroft}{Mycroft}{1984}]{mycroft}
3374: {\sc Mycroft, A.} 1984.
3375: \newblock Polymorphic type schemes and recursive definitions.
3376: \newblock In {\em International Symposium on Programming}. LNCS, vol. 167.
3377:   Springer-Verlag, 217--228.
3378: 
3379: \bibitem[\protect\citeauthoryear{Okasaki}{Okasaki}{1998}]{okasaki}
3380: {\sc Okasaki, C.} 1998.
3381: \newblock {\em Purely Functional Data Structures}.
3382: \newblock Cambridge University Press.
3383: 
3384: \bibitem[\protect\citeauthoryear{Overton}{Overton}{2003}]{dmo}
3385: {\sc Overton, D.} 2003.
3386: \newblock Precise and expressive mode systems for typed logic programming
3387:   languages.
3388: \newblock Ph.D. thesis, Department of Computer Science and Software
3389:   Engineering, University of Melbourne.
3390: \newblock
3391:   http://www.cs.mu.oz.au/research/mercury/information/papers/dmo-thesis.ps.gz.
3392: 
3393: \bibitem[\protect\citeauthoryear{Ridoux, Boizumault, and Malesieux}{Ridoux
3394:   et~al\mbox{.}}{1999}]{rid-boiz99}
3395: {\sc Ridoux, O.}, {\sc Boizumault, P.}, {\sc and} {\sc Malesieux, F.} 1999.
3396: \newblock Typed static analysis: Application to groundness analysis of prolog
3397:   and lambda-prolog.
3398: \newblock In {\em Proceedings of the International Symposium on Functional and
3399:   Logic Programming}. LNCS. Springer-Verlag, 267--283.
3400: 
3401: \bibitem[\protect\citeauthoryear{Smaus, Hill, and King}{Smaus
3402:   et~al\mbox{.}}{2000}]{smaus}
3403: {\sc Smaus, J.-G.}, {\sc Hill, P.}, {\sc and} {\sc King, A.} 2000.
3404: \newblock Mode analysis domains for typed logic programs.
3405: \newblock In {\em Proceedings of the 9th International Workshop on Logic-based
3406:   Program Synthesis and Transformation}, {A.~Bossi}, Ed. LNCS. Springer-Verlag,
3407:   82--101.
3408: 
3409: \bibitem[\protect\citeauthoryear{Somogyi}{Somogyi}{1987}]{Zoltan87}
3410: {\sc Somogyi, Z.} 1987.
3411: \newblock A system of precise modes for logic programs.
3412: \newblock In {\em Logic Programming: Proceedings of the 4th International
3413:   Conference}. 769--787.
3414: 
3415: \bibitem[\protect\citeauthoryear{Somogyi, Henderson, and Conway}{Somogyi
3416:   et~al\mbox{.}}{1996}]{mercury}
3417: {\sc Somogyi, Z.}, {\sc Henderson, F.}, {\sc and} {\sc Conway, T.} 1996.
3418: \newblock The execution algorithm of {Mercury}: an efficient purely declarative
3419:   logic programming language.
3420: \newblock {\em {JLP}\/}~{\em 29}, 17--64.
3421: 
3422: \bibitem[\protect\citeauthoryear{Wadler}{Wadler}{1989}]{wadler}
3423: {\sc Wadler, P.} 1989.
3424: \newblock Theorems for free.
3425: \newblock In {\em Conference on Functional Programming Languages and Computer
3426:   Architecture}. ACM Press, 347--359.
3427: 
3428: \end{thebibliography}
3429: 
3430: 
3431: 
3432: 
3433: \small
3434: 
3435: \appendix
3436: \section{Algorithms}
3437: 
3438: In this appendix we give full versions of
3439: the tree operations mentioned in the paper.  The basic tree operations are
3440: relatively straightforward, but new kinds of nodes for solver variables,
3441: polymorphic types and higher-order terms complicate this somewhat.
3442: Recall that we assume we are dealing with type correct programs, hence the
3443: operations make use of this to avoid many redundant comparisons. For example
3444: when comparing the order of two ti-grammars, then if one is a predicate type,
3445: the other must be an identical predicate type.
3446: 
3447: The ordering relation $r_1 \preceq r_2$ on two ti-grammars 
3448: is defined as the result of $\func{lt}(r_1, r_2, \emptyset)$. 
3449: 
3450: \begin{small}
3451: \begin{tabbing} 
3452: xx \= xx \= xx \= xx \= xx \= xx \= xxxxxxxxxxxxxxxxxxxxx \= xxxxxxx \= \kill
3453: \func{lt}($p_1$, $p_2$, $P$) \\
3454: \> \textbf{if} ($p_2 = \ourtop$) \textbf{return true} \\
3455: \> \textbf{if} ($p_1 = \ourtop$) \textbf{return false} \\
3456: \> \textbf{if} ($(root(p_1),root(p_2)) \in P$) \textbf{return true} \\
3457: \> \textbf{if} ($p_2 = \nt{new} \mbox{ and } p_1 \neq \nt{new}$) \textbf{return false} \\
3458: \> \textbf{case}: \\
3459: \> \> $p_1 = \ntnew$: \textbf{return} $(p_2 = \nt{new})$ \\
3460: \> \> $root(p_1) \rightarrow \svars{v} \in p_1$: 
3461:         \> \> \> \> \> \%\% $p_1 =  \func{base}(v,\ttold)$ \\
3462: \> \> \> \textbf{return} $root(p_2) \rightarrow \svars{v} \in p_2$ \\
3463: \> \> $root(p_1) \rightarrow \sgrounds{v} \in p_1$: 
3464:         \> \> \> \> \> \%\% $p_1 = \func{base}(v,\ttground)$ \\
3465: \> \> \> \textbf{return} $root(p_2) \rightarrow \sgrounds{v} \in p_2$ \\
3466: \> \> $root(p_1) \rightarrow \gpred \in p_1$: 
3467:         \> \> \> \> \> \%\% $p_1 = \func{base}(pred(t_1,\ldots,t_n),\ttground)$ \\
3468: \> \> \>  \textbf{return} $root(p_2) \rightarrow \gpred \in p_2$ \\
3469: \> \> $root(p_1) \rightarrow \ipred(tc_1, ts_1, \ldots, tc_n, ts_n)
3470:          \in p_1$:  \> \> \> \> \> \> \%\% non-base higher-order ti \\
3471: \> \> \> \textbf{if} ($root(p_2) \rightarrow \gpred \in p_2$) 
3472:         \textbf{return} \textbf{true} \\
3473: \> \> \> \textbf{let} $root(p_2) \rightarrow 
3474:         \ipred(tc'_1, ts'_1, \ldots, tc'_n, ts'_n) \in p_2$ \\
3475: \> \> \> \textbf{for} $i$ := $1..n$ \\
3476: \> \> \> \> \textbf{if} ($\neg
3477: \func{lt}(subg(tc'_i,p_2),subg(tc_i,p_1),P \cup \{(root(p_1),root(p_2))\})$)
3478: \textbf{return} \textbf{false} \\
3479: \> \> \> \> \textbf{if} ($\neg
3480: \func{lt}(subg(ts_i,p_1),subg(ts'_i,p_2),P \cup \{(root(p_1),root(p_2))\})$)
3481: \textbf{return} \textbf{false} \\
3482: \> \> \> \textbf{endfor} \\
3483: \> \> \> \textbf{return true} \\
3484: \> \> \textbf{default}: \\
3485: \> \> \> \textbf{foreach} $root(p_1) \rightarrow f(x_1, \ldots, x_n)
3486: \in p_1$ \\
3487: \> \> \> \> \textbf{if} ($\exists root(p_2) \rightarrow f(x'_1, \ldots,
3488: x'_n) \in p_2$) \\
3489: \> \> \> \> \> \textbf{for} $i$ := $1 ..n$ \\
3490: \> \> \> \> \> \> \textbf{if} ($\neg \func{lt}(subg(x_i, p_1), subg(x'_i,
3491: p_2), P \cup \{(root(p_1),root(p_2))\})$) \textbf{return false} \\
3492: \> \> \> \> \> \textbf{endfor} \\
3493: \> \> \> \> \textbf{else} \textbf{return} \textbf{false} \\
3494: \> \> \> \textbf{endfor} \\
3495: \> \> \> \textbf{return true} 
3496: \end{tabbing}
3497: \end{small}
3498: 
3499: 
3500: The abstract conjunction operation
3501: $r_1 \wedge r_2$ on two ti-grammars
3502: is defined as the first element of the pair returned by
3503: $\func{conj}(r_1,r_2, \emptyset)$.
3504: 
3505: \begin{small}
3506: \begin{tabbing} 
3507: xx \= xx \= xx \= xx \= xx \= xx \= xxxxxxxxxxxxxxxxxxxxx \= xxxxxxx \= \kill
3508: \func{conj}($p_1$,$p_2$,$P$) \\
3509: %%\> \textbf{if} $p_1 = \bot$ \textbf{return} $(\bot,\bot)$\\ 
3510: %%\> \textbf{if} $p_2 = \bot$ \textbf{return} $(\bot,\bot)$\\ 
3511: \> \textbf{if} ($p_1 = \ourtop$) \textbf{return} $(\ourtop,\_)$\\ 
3512: \> \textbf{if} ($p_2 = \ourtop$) \textbf{return} $(\ourtop,\_)$\\ 
3513: \> \textbf{if} ($p_2 = \nt{new}$) \textbf{return} $(p_1,root(p_1))$\\ 
3514: \> \textbf{if} ($meet(root(p_1),root(p_2)) \in P$) 
3515: \textbf{return} $(\emptyset,meet(root(p_1),root(p_2)))$\\
3516: \> \textbf{case}: \\
3517: \> \> $p_1 = \nt{new}$: \textbf{return} ($p_2$, $root(p_2)$)  \\
3518: \> \> $root(p_1) \rightarrow \svars{v} \in p_1$: 
3519:         \> \> \> \> \> \%\% $p_1 =  \func{base}(v,\ttold)$ \\
3520: \> \> \>  \textbf{return} ($p_2$,$root(p_2)$) \\
3521: \> \> $root(p_1) \rightarrow \sgrounds{v} \in p_1$: 
3522:         \> \> \> \> \> \%\% $p_1 = \func{base}(v,\ttground)$ \\
3523: \> \> \>  \textbf{return} ($p_1$,$root(p_1)$) \\
3524: \> \> $root(p_1) \rightarrow \gpred \in p_1$: 
3525:      \> \> \> \> \> \%\% $p_1 = \func{base}(pred(t_1,\ldots,t_n),\ttground)$ \\
3526: \> \> \>  \textbf{return} ($p_2$,$root(p_2)$) \\
3527: \> \> $root(p_1) \rightarrow \ipred(tc_1, ts_1, \ldots, tc_n, ts_n)
3528:          \in p_1$:  \> \> \> \> \> \> \%\% non-base higher-order ti \\
3529: \> \> \> \textbf{if} ($root(p_2) \rightarrow \gpred \in p_2$) \textbf{return} ($p_1$,$root(p_1)$) \\
3530: \> \> \> \textbf{let} $root(p_2) \rightarrow 
3531:         \ipred(xc'_1, xs'_1, \ldots, xc'_n, xs'_n) \in p_2$ \\
3532: \> \> \> \textbf{for} $i$ := $1..n$ \\
3533: \> \> \> \> $(tc_i, xc''_i)$ := $\func{disj}(subg(xc_i,p_1),subg(xc'_i,
3534: p_2),P)$ \\
3535: \> \> \> \> $(ts_i, xs''_i)$ := $\func{conj}(subg(xs_i,p_1),subg(xs'_i,
3536: p_2),P)$ \\
3537: \> \> \> \> \textbf{if} ($tc_i = \ourtop \mbox{ or } ts_i = \ourtop$)
3538:                 \textbf{return} $(\ourtop,\_)$ \\
3539: \> \> \> \textbf{endfor} \\
3540: \> \> \> $p$ := \= $\{ meet(root(p_1), root(p_2))
3541: \rightarrow \ipred(xc''_1, xs''_1, \ldots, xc''_n, xs''_n)\} \cup$ \\
3542: \> \> \> \> $tc_1  \cup \cdots \cup tc_n \cup ts_1 \cup \cdots \cup ts_n$ \\
3543: \> \> \> \textbf{return} $(p,  meet(root(p_1),root(p_2)))$ \\
3544: \> \> \textbf{default}: \\
3545: \> \> \> $p$ := $\emptyset$ \\
3546: \> \> \> \textbf{foreach} $root(p_1) \rightarrow f(x_1, \ldots, x_n)
3547: \in p_1$ \\
3548: \> \> \> \> \textbf{if} ($\exists root(p_2) \rightarrow f(x'_1, \ldots,
3549: x'_n) \in p_2$) \\
3550: \> \> \> \> \> \textbf{for} $i$ := $1 .. n$ \\
3551: \> \> \> \> \> \> $(p''_i, x''_i)$ := $\func{conj}(subg(x_i,p_1),
3552: subg(x'_i, p_2),  P \cup \{meet(root(p_1),root(p_2))\})$) \\
3553: \> \> \> \> \> \> \textbf{if} ($p''_i = \ourtop$)
3554:                 \textbf{return} $(\ourtop,\_)$  \\
3555: \> \> \> \> \> \textbf{endfor} \\
3556: \> \> \> \> \> $p$ := $meet(root(p_1),root(p_2)) \rightarrow f(x''_1,
3557: \ldots, x''_n) \cup p \cup p''_1 \cup \cdots \cup p''_n$ \\
3558: \> \> \> \textbf{endfor} \\
3559: \> \> \> \textbf{return} $(p, meet(root(p_1),root(p_2)))$  
3560: \end{tabbing}
3561: \end{small}
3562: 
3563: The abstract disjunction operation
3564: $r_1 \vee r_2$ on two ti-grammars, 
3565: is defined as the first element of the pair returned by
3566: $\func{disj}(r_1,r_2, \emptyset)$.
3567: 
3568: \begin{small}
3569: \begin{tabbing} 
3570: xx \= xx \= xx \= xx \= xx \= xx \= xxxxxxxxxxxxxxxxxxxxx \= xxxxxxx \= \kill
3571: \func{disj}($p_1$,$p_2$,$P$) \\
3572: \> \textbf{if} ($p_1 = \ourtop$) \textbf{return} $(\ourtop,\_)$\\ 
3573: \> \textbf{if} ($p_2 = \ourtop$) \textbf{return} $(\ourtop,\_)$\\ 
3574: %% \> \textbf{if} $p_1 = \bot$ \textbf{return} $(p_2,root(p_2))$ \\ 
3575: %% \> \textbf{if} $p_2 = \bot$ \textbf{return} $(p_1,root(p_1))$ \\ 
3576: \> \textbf{if} ($p_1 = \nt{new} \mbox{ and } p_2 = \nt{new}$) 
3577:         \textbf{return} $(\{\nt{new} \rightarrow
3578: \#\texttt{fresh}\#\},\nt{new})$\\ 
3579: \> \textbf{if} ($p_2 = \nt{new}$) \textbf{return} $(\ourtop,\_)$\\ 
3580: \> \textbf{if} ($\exists join(root(p_1),root(p_2)) \in P$) 
3581: \textbf{return} $(\emptyset,join(root(p_1),root(p_2)))$\\
3582: \> \textbf{case}: \\
3583: \> \> $p_1 = \nt{new}$: \textbf{return} ($\ourtop$, $\_$)  \\
3584: \> \> $root(p_1) \rightarrow \svars{v} \in p_1$: 
3585:         \> \> \> \> \> \%\% $p_1 =  \func{base}(v,\ttold)$ \\
3586: \> \> \>   \textbf{return} ($p_1$,$root(p_1)$) \\
3587: \> \> $root(p_1) \rightarrow \sgrounds{v} \in p_1$: 
3588:         \> \> \> \> \> \%\% $p_1 = \func{base}(v,\ttground)$ \\
3589: \> \> \> \textbf{return} ($p_2$,$root(p_2)$) \\
3590: \> \> $root(p_1) \rightarrow \gpred \in p_1$:  
3591:      \> \> \> \> \> \%\% $p_1 = \func{base}(pred(t_1,\ldots,t_n),\ttground)$ \\
3592: \> \> \>   \textbf{return} ($p_1$,$root(p_1)$) \\
3593: \> \> $root(p_1) \rightarrow \ipred(xc_1, xs_1, \ldots, xc_n, xs_n)
3594:          \in p_1$:  \> \> \> \> \> \> \%\% non-base higher-order ti \\
3595: \> \> \> \textbf{if} ($root(p_2) \rightarrow \gpred \in p_2$) 
3596:         \textbf{return} ($p_2$,$root(p_2)$) \\
3597: \> \> \> \textbf{let} $root(p_2) \rightarrow 
3598:         \ipred(xc'_1, xs'_1, \ldots, xc'_n, xs'_n) \in p_2$ \\
3599: \> \> \> \textbf{for} $i$ := $1..n$ \\
3600: \> \> \> \> $(tc_i, xc''_i)$ := $\func{conj}(subg(xc_i,p_1),subg(xc'_i,
3601: p_2),P)$ \\
3602: \> \> \> \> $(ts_i, xs''_i)$ := $\func{disj}(subg(xs_i,p_1),subg(xs'_i,
3603: p_2),P)$ \\
3604: \> \> \> \> \textbf{if} ($tc_i = \ourtop \mbox{ or } ts_i = \ourtop$)
3605:                 \textbf{return} $(\ourtop,\_)$ \\
3606: \> \> \> \textbf{endfor} \\
3607: \> \> \> $p$ := \= $\{join(root(p_1), root(p_2))
3608: \rightarrow \ipred(xc''_1, xs''_1, \ldots, xc''_n, xs''_n)\} \cup$ \\
3609: \> \> \> \> $tc_1 \cup \cdots \cup tc_n \cup ts_1 \cup \cdots \cup ts_n$ \\
3610: \> \> \> \textbf{return} $(p,  join(root(p_1),root(p_2)))$ \\
3611: \> \> \textbf{default}: \\
3612: \> \> \> $p$ := $\emptyset$ \\
3613: \> \> \> \textbf{foreach} $root(p_1) \rightarrow f(x_1, \ldots, x_n)
3614: \in p_1$ \\
3615: \> \> \> \> \textbf{if} ($\exists root(p_2) \rightarrow f(x'_1, \ldots,
3616: x'_n) \in p_2$) \\
3617: \> \> \> \> \> \textbf{for} $i$ := $1 .. n$ \\
3618: \> \> \> \> \> \> $(p''_i, x''_i)$ := $\func{disj}(subg(x_i,p_1),
3619: subg(x'_i, p_2),  P \cup \{join(root(p_1),root(p_2))\})$) \\
3620: \> \> \> \> \> \> \textbf{if} ($p''_i = \ourtop$)
3621:                 \textbf{return} $(\ourtop,\_)$  \\
3622: \> \> \> \> \> \textbf{endfor} \\
3623: \> \> \> \> \> $p$ := $\{join(root(p_1),root(p_2)) \rightarrow f(x''_1,
3624: \ldots, x''_n)\} \cup p \cup p''_1 \cup \cdots \cup p''_n$ \\
3625: \> \> \> \> \textbf{else} \\
3626: \> \> \> \> \> $p$ := \= $\{join(root(p_1),root(p_2) \rightarrow f(x_1, \ldots,
3627: x_n)\} \cup p \cup$  \\
3628: \> \> \> \> \> \>  $subg(x_1, p_1) \cup \cdots \cup subg(x_n,p_1)$ \\
3629: \> \> \> \> \textbf{endif} \\
3630: \> \> \> \textbf{endfor} \\
3631: \> \> \> \textbf{foreach}  $root(p_2) \rightarrow f(x'_1, \ldots, x'_n)
3632: \in p_2$ \\
3633: \> \> \> \> \textbf{if} ($\neg \exists root(p_1) \rightarrow f(x_1, \ldots,
3634: x_n) \in p_1$) \\
3635: \> \> \> \> \> $p$ := $\{join(root(p_1),root(p_2) \rightarrow f(x'_1, \ldots,
3636: x'_n)\} \cup p \cup$  \\
3637: \> \> \> \> \> \>  $subg(x'_1, p_2) \cup \cdots \cup subg(x'_n,p_2)$ \\
3638: \> \> \> \textbf{endfor} \\
3639: \> \> \> \textbf{return} $(p, join(root(p_1),root(p_2)))$  
3640: \end{tabbing}
3641: \end{small}
3642: 
3643: The \func{rt} operation constructs a ti-grammar
3644: from a type $t$ and instantiation $i$ and 
3645: is defined as the first element in the pair resulting from
3646: $\func{rt}(t, i, \emptyset)$. 
3647: 
3648: \begin{small}
3649: \begin{tabbing}
3650: xx \= xx \= xx \= xx \= xx \= xx \= xx \= xx \= \kill
3651: \func{rt}($t$,$i$,$P$) \\
3652: \> \textbf{if} ($\exists ti(t,i) \in P$) 
3653:                 \textbf{return} $(\emptyset,ti(t,i)))$\\
3654: \> \textbf{case}: \\
3655: \> \> $i$ is a base instantiation: \textbf{return}  
3656:         \func{base}($t$,$i$,$P$) \\
3657: \> \> $i = pred(c_1 \rightarrow s_1, \ldots, c_n \rightarrow s_n)$: \\
3658: \> \> \> \textbf{if} ($t \neq pred(t_1, \ldots, t_n)$) \textbf{return}
3659: $(\ourtop, \_)$ \\
3660: \> \> \> \textbf{let} $t$ be of the form $pred(t_1, \ldots, t_n)$ \\
3661: \> \> \> \textbf{for} $j$ = $1 .. n$ \\
3662: \> \> \> \> $(tc_j, xc_j)$ := \func{rt}($t_j$, $c_j$, $P$) \\ 
3663: \> \> \> \> $(ts_j, xs_j)$ := \func{rt}($t_j$, $s_j$, $P$) \\ 
3664: \> \> \> \> \textbf{if} ($tc_j = \ourtop \mbox{ or } ts_j = \ourtop$)
3665:                 \textbf{return} $(\ourtop, \_)$ \\
3666: \> \> \> \textbf{endfor} \\
3667: \> \> \> $r$ := $\{ti(t,i) \rightarrow \ipred(xc_1,xs_1,
3668: \ldots, xc_n, xs_n)\} \cup$  \\
3669: \> \> \> \> \> $tc_1 \cup \cdots \cup tc_n \cup ts_1 \cup \cdots \cup ts_n$ \\
3670: \> \> \> \textbf{return} $(r, ti(t,i))$ \\ 
3671: \> \> \textbf{default}: \\
3672: \> \> \> \textbf{if} ($t \in V_{type}$) \textbf{return} $(\ourtop, \_)$ \\
3673: \> \> \> $r$ := $\emptyset$ \\
3674: \> \> \> \textbf{foreach} $x \rightarrow f(x_{i1}, \ldots, 
3675:                 x_{in}) \in rules(i)$ \\
3676: \> \> \> \> \textbf{if} ($\exists x' \rightarrow f(x_{t1}, \ldots, 
3677:                 x_{tn}) \in rules(t)$) \\
3678: \> \> \> \> \> \textbf{for} $j$ = $1 .. n$ \\
3679: \> \> \> \> \> \> $(r_j, x_j)$ := \func{rt}($x_{tj}$, $x_{ij}$, 
3680:                 $P \cup \{ti(x_{tj},x_{ij})\}$) \\
3681: \> \> \> \> \> \> \textbf{if} ($r_j = \ourtop$) \textbf{return} $(\ourtop,\_)$ \\
3682: \> \> \> \> \> \textbf{endfor} \\
3683: \> \> \> \> \> $r$ := $\{ti(t,i) \rightarrow f(x_1, \ldots, x_n)\} \cup r
3684:  \cup r_1 \cup \cdots \cup r_n$ \\
3685: \> \> \> \> \textbf{endif} \\
3686: \> \> \> \textbf{endfor} \\
3687: \> \> \> \textbf{return} $(r, ti(t,i))$ 
3688: \end{tabbing}
3689: 
3690: \begin{tabbing}
3691: xx \= xx \= xx \= xx \= xx \= xx \= xx \= xx \= \kill
3692: \func{base}($t$,$base$,$P$) \\
3693: \> \textbf{if} ($base = \ttnew$) \textbf{return} ($\{\nt{new} \rightarrow
3694: \#\texttt{fresh}\#\}, \nt{new}$) \\
3695: \> \textbf{if} ($ti(t,base) \in P$) \textbf{return} ($\emptyset$,$ti(t,base)$) \\
3696: \> \textbf{if} ($t \in V_{type}$): \\
3697: \> \> \textbf{if} ($base = \ttground$) 
3698:         \textbf{return} 
3699:         ($\{ti(t,\ntground) \rightarrow \sgrounds{t}\}, ti(t,\ntground)$) \\
3700: \> \> \textbf{else} \textbf{return} ($\{ti(v,\ntold) \rightarrow
3701: \sgrounds{v} ~;~ \svars{v}\}, 
3702:                 ti(v, \ntold)$) \\ 
3703: \> \textbf{else if} ($t$ is of the form $pred(t_1, \ldots, t_n)$) \\
3704: \> \>  \textbf{return} (\{$ti(pred(t_1, \ldots, t_n),\ttground) \rightarrow \gpred$\}, 
3705:            $ti(pred(t_1, \ldots, t_n),\ttground)$ ) \\
3706: \> \textbf{else} \\
3707: \> \> $r$ := $\emptyset$ \\
3708: \> \> \textbf{foreach} $x \rightarrow f(t_{1}, \ldots, 
3709:                 t_{n})$ in $rules(t)$ \\
3710: \> \> \> \textbf{for} $j \in 1 .. n$ \\
3711: \> \> \> \> $(r_j,x_j)$ := \func{base}($t_j$,$base$,$P \cup \{
3712: ti(t,base) \}$) \\
3713: \> \> \> \textbf{endfor} \\
3714: \> \> \> $r$ := $\{ti(t,base) \rightarrow f(x_1, \ldots, x_n)\} \cup r
3715:  \cup r_1 \cup \cdots \cup r_n$ \\
3716: \> \> \textbf{endfor} \\
3717: \> \> \textbf{if} ($base = \ttold$ and $t$ is a solver type) \textbf{then} $r$ := $\{ti(t,base)
3718: \rightarrow \#\texttt{var}\#\} \cup r$ \\
3719: \> \> \textbf{return} $(r,ti(t,base))$
3720: 
3721: \end{tabbing}
3722: \end{small}
3723: 
3724: 
3725: 
3726: \end{document}
3727: 
3728: 
3729: 
3730: 
3731: 
3732: 
3733: 
3734: 
3735: 
3736: 
3737: 
3738: 
3739: 
3740: % LocalWords:  instdef nelist modedef pred det semidet elist deconstruct nondet
3741: % LocalWords:  ti dupl cint groundness moded init int
3742: % LocalWords:  evenlist oddlist pairlist CLP ary
3743: