1: \documentclass{article}
2:
3: \usepackage{euscript}
4: \usepackage{amssymb}
5: \usepackage{epsfig}
6:
7: \textwidth=1.1\textwidth
8: %\textheight=1.1\textheight
9: %\def\baselinestretch{1.2}
10:
11:
12: %\newtheorem{Theorem}{Theorem}
13: \newtheorem{lem}{Lemma}
14: \newenvironment{pf}{{\bf Proof}}{~~{Q.E.D.}}
15:
16:
17:
18:
19: \newcommand{\ERA}{{\bf ERA}}
20:
21: \newlength{\opencomm}
22: \settowidth{\opencomm}{$(*$}
23: \newcommand{\opn}{\hspace*{\opencomm}}
24: \newlength{\gammalen}
25: \settowidth{\gammalen}{$\hspace*{1mm}\gamma(\tilde{\pi}\setminus S^\prime)=$}
26: \newcommand{\gammaspc}{\hspace*{\gammalen}}
27:
28:
29: \newcommand{\No}{\mbox{${N}^{\circ}$}}
30:
31:
32: \newcommand{\ET}[1]{\underline{#1}}
33: \newcommand{\C}[1]{\mbox{$\EuScript #1$}}
34: \newcommand{\GC}[2]{\mbox{#1\hspace*{-0.5em}\makebox[2em][c]{%
35: \raisebox{-1pt}{$\rightharpoondown$}\hspace*{-1em}%
36: \raisebox{1pt}{$\leftharpoonup$}\hspace*{-0.75em}%
37: \raisebox{-2pt}{{\tiny$\alpha$}}\hspace*{-0.4em}%
38: \raisebox{5pt}{{\tiny$\gamma$}}%
39: }#2}}
40:
41: \newcommand{\notsim}{\mbox{$\not\sim$}}
42: \newcommand{\BIGSQCAP}{\mbox{{\LARGE$\sqcap$}}}
43: \newcommand{\DERIV}[2]{\mbox{$\stackrel{\hspace*{-1mm}\mbox{{\tiny#1}}}{\Rightarrow}
44: \hspace*{-1.1em}
45: \raisebox{-1.25mm}{{\tiny #2}}
46: \hspace*{0.4em}$}}
47: \newcommand{\IMP}[1]{\mbox{${\EuScript ALS}(#1)$}}
48: \newcommand{\SEM}[2]{\mbox{$#1[\![#2]\!]$}}
49: \newcommand{\SOP}[2]{\mbox{$#1\lefteqn{\{}\hspace*{2.7pt}\vert#2\lefteqn{\vert}\}$}}
50: \newcommand{\CSEM}[2]{\mbox{$Sem[\![#2]\!]#1$}}
51:
52: \newcommand{\TrS}{\mbox{$\langle{\EuScript S},\tau,\iota,\varsigma\rangle$}}
53: \newcommand{\TAU}{\mbox{$\wp({\EuScript S}\times{\EuScript S})$}}
54: \newcommand{\PSS}{\mbox{$\wp({\EuScript S})$}}
55: \newcommand{\CBOT}{\mbox{$\bot$}}
56: \newcommand{\CTOP}{\mbox{$\top$}}
57: \newcommand{\CINCL}{\mbox{$\subseteq$}}
58: \newcommand{\CCUP}{\mbox{$\cup$}}
59: \newcommand{\CSD}{\mbox{$\langle\PSS;\CINCL,\CBOT,\CCUP\rangle$}}
60: \newcommand{\CSF}[1]{\mbox{{\it sp}$_{\iota}^{\tau^{#1}}$}}
61:
62: \newcommand{\APSS}{\mbox{$\overline{\PSS}$}}
63: \newcommand{\ABOT}{\mbox{$\bot^\prime$}}
64: \newcommand{\ATOP}{\mbox{$\top^\prime$}}
65: \newcommand{\AINCL}{\mbox{$\supseteq$}}
66: \newcommand{\ACAP}{\mbox{$\cap$}}
67: \newcommand{\ACUP}{\mbox{$\cup$}}
68: \newcommand{\CV}{\mbox{${\EuScript C\EuScript V}$}}
69: \newcommand{\FP}{\mbox{${\EuScript F\EuScript P}$}}
70: \newcommand{\TERM}{\mbox{${\EuScript T\EuScript R\EuScript S}$}}
71: \newcommand{\TYPES}{\mbox{${\EuScript T\EuScript Y\EuScript P\EuScript E\EuScript S}$}}
72: \newcommand{\EQS}{\mbox{${\EuScript E\!\EuScript Q\EuScript S}$}}
73: \newcommand{\PSEQS}{\mbox{$\wp(\EQS)$}}
74: \newcommand{\PPSEQS}{\mbox{$\wp(\wp(\EQS))$}}
75: \newcommand{\PDEQS}{\mbox{$\wp^2(\EQS)$}}
76: \newcommand{\CSDE}{\mbox{$\PDEQS(\subseteq,\emptyset,\cup)$}}
77: \newcommand{\ASDE}{\mbox{$\PSEQS(\AINCL,\EQS,\ACAP)$}}
78: \newcommand{\INF}{\mbox{$\lefteqn{\bigcirc}\infty$}}
79: \newcommand{\ACC}[1]{\mbox{${\EuScript A}(#1)$}}
80:
81: \newcommand{\Red}[1]{\mbox{\bf Red(#1)}}
82: \newcommand{\Co}[1]{\mbox{\bf Co(#1)}}
83: \newcommand{\Know}[2]{\mbox{$#1\DERIV{+}{G}#2$}}
84: \newcommand{\DWID}{\mbox{$\lefteqn{\mbox{\raisebox{1.1ex}[0pt][0pt]{\hspace*{0.5pt}$\sim$}}}\bigtriangledown$}}
85: \newcommand{\MFVS}[1]{\mbox{$#1\backslash_{\mbox{\tiny\bf FVS}}$}}
86: \newcommand{\mfvs}{\mbox{{\small\bf FVS}}}
87: \newcommand{\OPR}[1]{\mbox{\tt\bf #1}}
88: \newcommand{\DEREF}{\mbox{$^{\wedge}$}}
89:
90: \newcounter{numline}
91: \newcommand{\NM}{\vspace*{-1pt}\stepcounter{numline}\lefteqn{\mbox{\tiny\bf\thenumline:}}}
92: \newcommand{\PHC}{\lefteqn{\phantom{\mbox{$\displaystyle\bigcap_i$}}}}
93:
94: \newcommand{\Z}{\hspace*{5mm}}
95:
96: \newcommand{\ADT}{\mbox{\,\raisebox{3pt}[0pt][0pt]{\fontsize{5}{5}\selectfont$\swarrow\!\searrow$}\,}}
97:
98: \newcommand{\KW}[1]{{\bf #1}}
99:
100:
101: \begin{document}
102:
103: \title{Analysis of Equality Relationships \\ for Imperative Programs}
104:
105: \author{Pavel Emelianov%
106: \footnote{This work was partly done when the author was in {\em
107: Laboratoire d'informatique, Ecole polytechnique}\/ (Palaiseau,
108: France) and {\em Ecole normale sup\'erieure d'ingenieur}
109: (Bourges, France).} \\
110: Institute of Informatics Systems, \\
111: 6 avenue Lavrentiev \\
112: 630090 Novosibirsk, Russia \\
113: emelianov@iis.nsk.su}
114:
115: %\author{Pavel Emelyanov}
116: %\institute{Institute of Informatics Systems\thanks{This work was
117: %done when the author was in Laboratoire d'informatique, Ecole
118: %polytechnique (Palaiseau, France) and Ecole normale sup\'erieure
119: %d'ingenieur (Bourges, France) in 2000-2002.}, 6 avenue Lavrentiev,
120: %630090 Novosibirsk, Russia \\ \email{emelianov@iis.nsk.su}}
121:
122:
123: %\date{19 December 2005}
124:
125:
126: \maketitle
127:
128: \begin{abstract}
129: In this article, we discuss a flow--sensitive analysis of equality
130: relationships for imperative programs. We describe its semantic
131: domains, general purpose operations over abstract computational
132: states (term evaluation and identification, semantic completion,
133: widening operator, etc.) and semantic transformers corresponding
134: to program constructs. We summarize our experiences from the last
135: few years concerning this analysis and give attention to
136: applications of analysis of automatically generated code. Among
137: other illustrating examples, we consider a program for which the
138: analysis diverges without a widening operator and results of
139: analyzing residual programs produced by some automatic partial
140: evaluator. An example of analysis of a program generated by this
141: evaluator is given.
142:
143: \vskip\baselineskip
144:
145: \noindent{\bf Keywords}: abstract interpretation, value numbering,
146: equality relationships for program terms, formal grammars,
147: semantic transformers, widening operator, automatically generated
148: programs.
149:
150: \end{abstract}
151:
152:
153:
154:
155:
156:
157: \section*{Introduction}
158:
159: {\em Semantic analysis}\/ is a powerful technique for building
160: effective and reliable programming systems. In
161: \cite{EmSa/94,Em/96,SAS-4/97} we presented a new kind of a
162: semantic flow--sensitive analysis designed in the framework of
163: {\em abstract interpretation}\/ \cite{CC/77,CC/92b,CC/92a}. This
164: analysis which determines an approximation of sets of invariant
165: term equalities $t_1=t_2$\/ was called {\em the analysis of
166: equality relationships for program terms} (hereinafter referred to
167: as \ERA).
168:
169: Most traditional static analyses of imperative programs are
170: interested in finding the (in)equalities of a specific kind
171: (so--called {\em value analyses}; only they are discussed here)
172: describing regular approximations (i.e. they have simple
173: mathematical descriptions and machine representations) of sets of
174: values: convex polyhedrons/oc\-ta\-hed\-rons/oc\-ta\-gons
175: \cite{CouHal/78,HalbwachsProyRoumanoff/97,Mine/2001:pado,ClarisoCortadella-SAS04},
176: affine \cite{Karr/76,GulwaniNecula-POPL03} and congruent
177: \cite{Gra/91b,Mas/92} hyper-planes, their non-relational
178: counterparts \cite{CC/77,WZ/91,Gra/89,Mas/93} as well, etc. They
179: are carefully designed to be reasonable (i.e. they express
180: non-trivial semantic properties) and effectively computed (i.e.
181: there are polynomial
182: algorithms to handle them\footnote{%
183: An example of this sort is an approximation of value sets by conic
184: shapes that has only one ``computational disadvantage'': semantic
185: transformers involve algorithms known to be ${\mathcal NP}$-hard.
186: Proposed in \cite{BerMar/76} many years ago it did not gain
187: ground.}) but ``regular'' nature does not allow them to treat well
188: programs with irregular control/data-flows. Hence of special
189: interest is investigations of approximations based on sets of
190: terms which can have potentially arbitrary nature, i.e. they could
191: be powerful (due to their irregularity) but effectively computed.
192: One well known example is the set--based analysis
193: \cite{HeiJaf/94,CC/95}.
194:
195: In our case, terms represent all expressions computed in programs.
196: This enables the analysis to take into account different aspects
197: of program behavior in a unified way. A such unified treatment of
198: all semantic information allows the analysis to improve its
199: accuracy. This does not mean that \ERA\/ is a generalization of
200: all other value analyses (except the {\em constant propagation}
201: one), because they use different approaches (semantic domains and
202: transformers) to extract effectively and precisely the limited
203: classes of semantic properties. In general, the results of the
204: analyses are not comparable.
205:
206: \ERA\/ provides interesting possibilities for gathering and
207: propagating different invariant information about programs in a
208: unified way. This information can be used both for verification
209: and optimization purposes. The second is especially interesting
210: for automatically generated programs: residual, i.e. obtained in
211: the process of the partial evaluation, and synthesized from
212: high--level specifications. Due to nature of automatic generation
213: processes, such programs have specific control flows (for example,
214: hierarchy of nested conditional statements with specific
215: conditions; in the case of residual programs this hierarchy is
216: more deep as ``degree'' of the partial evaluation increases) that
217: can be successfully optimized on base of gathered invariant
218: information.
219:
220: Besides the peculiarity of \ERA\/ mentioned above, let us discuss
221: some common properties of the semantic analyses. Such taxonomic
222: properties of the analysis algorithms as the attribute
223: (in)dependence, context (in)sensitivity, flow (in)sensitivity,
224: scalability and some other properties are well known. However, it
225: is the author's opinion that a notion of ``{\em interpretability
226: of a semantic analysis}''\/ has not been considered adequately
227: yet. Here the interpretability of analysis means how extensively
228: the properties of primitive operations of the language
229: (arithmetical, logical, etc.) and type information are allowed for
230: analyzing and can be handled when the analysis works.
231:
232: One extreme point of view on the interpretability is an approach
233: accepted in the ``pure'' program scheme theory where no
234: interpretations of functional symbols or type information are allowed\footnote{%
235: More precisely, there exist some works in the program scheme
236: theory where some semantic interpretations of functional symbols
237: (like commutativity of superposition $f\circ g\equiv g\circ f$,
238: etc.) are considered.}. Unfortunately, the results obtained under
239: this approach are not reasonably strong. Nevertheless, it must be
240: underscored that \ERA\/ dates back to V. Sabelfeld's works in the
241: program scheme theory \cite{Sa/79,Sa/80}. Another extreme leads to
242: the complete description of the program behavior that is also not
243: workable. Obviously, it is closely allied to its flow sensitivity
244: (ignoring some part of semantic information does not allow us to
245: treat precisely some control flow constructs of analyzed programs)
246: and its scalability (attempts to take into account large quantity
247: of semantic properties, for example, using some theorem prover
248: which is invoked while an semantic analyzer works and deduces new
249: properties, can lead to combinatorial explosion for abstract
250: computational states).
251:
252:
253: It is possible that the interpretability has not been highlighted
254: enough, because most of analysis algorithms take into account the
255: limited classes of primitive operations and type information and
256: they cannot be enriched in some natural way. For example, an
257: interval analysis is not able to incorporate congruence properties
258: in some natural way, etc\footnote{Of course, it is possible to use
259: sophisticated approaches for combinations of analyses but it
260: introduces complicated problems under implementations and it is
261: not an enrichment of original ones.}. Essentially another case is
262: \ERA\/ where we have a choice to handle expressiveness of the
263: analysis. We intend to illustrate the notion of ``interpretability
264: of analysis'', its importance and usefulness on this example of
265: analysis.
266:
267: Among the analyses closely related to ours we would like to point
268: out the following. A semantic analysis for detection of equalities
269: between program variables (and simple relationships among them)
270: was described in \cite{AWZ/88}. It makes a list of sets of
271: variables discovered to be equal by using the Hopcroft's
272: partitioning algorithm for finite--state automata. This algorithm
273: being quite efficient is not however precise enough. Further
274: value-numbering techniques were developed in
275: \cite{RuthingKnoopSteffen-SAS99,Gargi-PLDI02,GulwaniNecula-SAS04}.
276: These algorithms demonstrate that adequacy of value numbering is
277: resource-consuming. For example, in the last case time complexity
278: of the algorithm is $O(k^3jN)$\/ where $k$\/ is a number of
279: program variables, $j$\/ is a number join-points, and $N$\/ is
280: program size.
281:
282: Another important example is the set-based analysis
283: \cite{HeiJaf/94} mentioned above. Here approximating sets of terms
284: are found with resolving some system of set-theoretical equations.
285: Formal grammars were used for an analysis of recursive data
286: structures of functional languages (see, for example,
287: \cite{Jo/87}). Formal languages were applied to coding of memory
288: access paths in \cite{Deu/94,Venet/99} and values of program
289: variables in the set-based analysis. \cite{CC/95} established
290: common foundations connecting and generalizing different
291: approaches using formal languages to represent semantic
292: properties. Of course, we should mention techniques from the
293: automatic proof theory and the term rewriting theory which can be
294: widely applied both at the analysis stage to improve its accuracy
295: and the post-processing stage to present its results to the user.
296:
297: This article is organized as follows: In {\bf Sections
298: \ref{PConcrete}} and {\bf\ref{PAbstract}} we describe the semantic
299: properties, concrete and abstract, respectively, which are
300: considered in \ERA. In {\bf Section \ref{OpBasic}} we discuss some
301: basic operations over the semantic properties used to define the
302: semantic transformers which are next presented in {\bf Section
303: \ref{STransformers}}. In {\bf Section \ref{WideningOp}} we
304: consider a widening operator and the complexity of \ERA\/ is
305: discussed. Finally, {\bf Section \ref{Experiments}} describes
306: processing of \ERA\/ invariants and presents some results of our
307: experiments with \ERA. In {\bf Appendix} an example of analysis of
308: some residual program is considered.
309:
310:
311:
312: \section{Properties of interest}
313:
314: \subsection{Concrete properties}\label{PConcrete}
315:
316: A usual choice for the description of the operational semantics is
317: a specification of some transition relationship on the pairs
318: $<${\em control point, state of program memory}$>$ (see, for
319: example, \cite{PFA/81,HOOTS/98}) where the states of program
320: memory are described by mapping the cells of memory into a
321: universe of values. Here variables (groups of cells) and their
322: values (constants) are in asymmetric roles. Another example of
323: ``asymmetry'': manipulations over the structured objects of
324: programs (arrays, records etc.) are not so transparent as over the
325: primary ones. To describe the operational semantics for \ERA, we
326: used another approach. All objects of a program are considered to
327: be ``identical'' in the following meaning.
328:
329: Let \CV\/ be a set of $0$--ary symbols representing variables and
330: constants. The last ones may be of the following kinds: scalars,
331: compositions over scalars (i.e., constant arrays, records, etc.),
332: names of record fields, and {\em indefiniteness}. Let \FP\/ be a
333: set of n--ary ({\em functional}) symbols which represent primitive
334: operations of programming languages: arithmetic, logic, type
335: casting, and all the kinds of memory addressing, as well. Let
336: \TERM\/ be a set of well-formed terms over \CV\/ and \FP,
337: hereinafter referred to as {\em program terms}. They represent
338: expressions computed during execution of a program. So, as a state
339: of program memory we take a reflexive, symmetrical, and transitive
340: relationship (i.e., {\em equivalence relationship}) over \TERM.
341: The relationship defines some set of term equalities which we use
342: to describe the operational semantics and call a {\em computation
343: state}.
344: %
345: %This approach should be compared to "Algebaric data types"
346: %
347: %Notice that this set is always infinite because there exists infinitely consistent
348: %equalities of terms in contrast to the finite mapping {\em cells$\rightarrow$values}
349: %where it is always possible for convergent programs.
350:
351:
352:
353:
354: \begin{table}%[ht]
355: \begin{center}
356: \fbox{\fontsize{8}{10}\selectfont
357: \begin{tabular}{lcc}
358: &&\\
359: & {\sc then}-branch & {\sc else}-branch\\
360: &&\\
361: \hline
362: &&\\
363: \sc entry
364: & $\!\!\!\!\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}
365: a[1]=1,a[2]=j=2,a[3]=i=3,\\
366: a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=TRUE\\
367: \end{tabular}}\!\!\right\}\!\!\right\}$
368: & $\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}
369: a[1]=1,a[2]=j=2,a[3]=i=3,\\
370: a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=FALSE\\
371: \end{tabular}}\!\!\right\}\!\!\right\}$
372: \\
373: &&\\
374: \sc exit
375: & $\!\!\!\!\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}
376: a[1]=i=j=1,a[2]=2,a[3]=3,\\
377: a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=TRUE\\
378: \end{tabular}}\!\!\right\}\!\!\right\}$
379: & $\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}
380: a[1]=i=j=3,a[2]=2,a[3]=1,\\
381: a=\fbox{3}\fbox{2}\fbox{1},ODD(x)=FALSE\\
382: \end{tabular}}\!\!\right\}\!\!\right\}$
383: \\
384: &&\\
385: \hline
386: &&\\
387: & \hfill \sc exit ~of~ $\lefteqn{\mbox{\footnotesize\sc if--statement}}$ & \\
388: &&\\
389: {\Large$\pi=$}
390: & $\!\!\!\!\left\{~\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}
391: a[1]=i=j=1,a[2]=2,a[3]=3,\\
392: a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=TRUE
393: \smash{\lefteqn{\mbox{\hspace*{3mm}\huge,}}}\\
394: \end{tabular}}\!\!\right\}\!\!\right.$
395: & \hspace*{-2mm}$\!\!\left.\!\!\left\{\mbox{\begin{tabular}{l}
396: a[1]=i=j=3,a[2]=2,a[3]=1,\\
397: a=\fbox{3}\fbox{2}\fbox{1},ODD(x)=FALSE\\
398: \end{tabular}}\!\!\right\}\rule[10mm]{0pt}{0pt}~\!\!\right\}$
399: \\
400: &&\\
401: \end{tabular}
402: }
403: \end{center}
404: \caption{Description of collecting semantics for {\bf Example 1}
405: (here the constant \fbox{$c_1$}\fbox{$c_2$}\fbox{$c_3$}
406: represents constant arrays).}\label{Collecting}
407: \end{table}
408:
409:
410:
411:
412:
413: Suppose that the following code
414:
415: \begin{tabbing}
416: \quad \=\quad \=\quad \kill
417: \KW{var} x,i,j : \KW{integer}; a : \KW{array} [1..3] \KW{of integer}=\{1,2,3\};\\
418: ...\\
419: i := 3;\\
420: j := i-1;\\
421: \KW{if} \KW{odd}(x) \KW{then}\\
422: \> i := i \KW{mod} j;\\
423: \> j := 1;\\
424: \KW{else}\\
425: \> j := a[i];\\
426: \> a[i] := a[1];\\
427: \> a[1] := j\\
428: \KW{end}\\
429: ...
430: \end{tabbing}
431: \begin{center}{\bf Example 1}\end{center}
432:
433: \noindent is executed at least twice for the different parities of
434: the variable {\sc x}. In {\bf Table \ref{Collecting}} static
435: semantics for five control points is given. We present a minimum%
436: \footnote{A set of equalities can be completed with any number of
437: consistent equalities.} subset of term equalities concerning
438: dynamic behavior of the piece. We shall use the property
439: {\large$\pi$}\/ (see {\bf Table \ref{Collecting}}) to illustrate
440: our further reasoning.
441:
442: Formally it is described as follows. Let \EQS\/ be a set of all
443: equalities of the terms from \TERM, i.e.,
444: $\EQS=\{t_1=t_2~\vert~t_1, t_2\in\TERM\}$. A set $S\in\PSEQS$\/ is
445: a computation state interpreted in the following way. For each
446: equality $t_i=t_j\in S$\/ values of the expressions represented by
447: $t_i$\/ and $t_j$\/ must be equal at this point for some execution
448: trace. We take the set \PPSEQS\/ as a set for a concrete semantic
449: domain describing {\em the collecting semantics}\/ of \ERA. So, an
450: element of the concrete semantic domain is a set of computation
451: states. For a particular point in a particular program it is a set
452: of computation states each of them corresponds to some execution
453: trace in the program that reaches this point.
454:
455:
456: Properties considered in \ERA\/ are presented by means of
457: context--free grammars ${\bf G}=({\EuScript N},{\EuScript
458: T},{\EuScript P},S)$\/ where ${\EuScript N}$\/ is a finite set of
459: nonterminals denoted by capital letters, $S\in{\EuScript N}$\/ is
460: the initial symbol of the grammar ${\bf G}$, ${\EuScript
461: T}=\CV\cup\FP\cup\{~(,~),~=,$~\raisebox{1mm}{{\Huge ,}}$\}$\/ is a
462: finite set of terminal symbols, and ${\EuScript P}$\/ is a finite
463: set of grammar rules. We do not give their precise description
464: because we shall use quite simple machinery from formal languages
465: theory that is not an object of considerations itself and serves
466: for demonstrations. We could use functional nets language as well
467: but it is rather machine--oriented and is not widely used. We
468: expect that all these descriptive ways become apparent from the
469: examples on {\bf Figure \ref{SemPrps}} and further ones.
470:
471:
472: In that way, a state of computation is represented by a language
473: $L{\bf(G)}$\/ generated by some grammar {\bf G} of the described
474: form. If for $A\in{\EuScript N} ~:~ \linebreak A\DERIV{+}{G}t_1
475: ~\wedge~ A\DERIV{+}{G}t_2$, i.e. $t_{1}=t_{2}\in L$({\bf G}), we
476: say that the nonterminal $A$\/ and the language $L$({\bf G}) know
477: the terms $t_{1},~t_{2}$. Obviously, such a grammar representation
478: has some superfluous ``syntactic sugaring''. We can use
479: $S\rightarrow A$\/ rules only and say about $A$--nonterminals as
480: classes of equal values.
481:
482:
483: \begin{figure}%[ht]
484: \centering
485: \unitlength=0.75mm
486: \linethickness{0.4pt}
487: \begin{picture}(170,61.50)
488: \put(15.00,52.50){\makebox(0,0)[cc]{Equality}}
489: \put(15.00,47.00){\makebox(0,0)[cc]{relationships}}
490: \put(82.00,52.50){\makebox(0,0)[cc]{Grammar}}
491: \put(82.00,47.00){\makebox(0,0)[cc]{representation}}
492: \put(148.00,52.50){\makebox(0,0)[cc]{Functional net}}
493: \put(148.00,47.00){\makebox(0,0)[cc]{representation}}
494: \put(0,8){\shortstack[l]{$a=h(y)$\\$x=f(a,y)$\\
495: $x=f(h(y),y)$\\$f(a,y)=f(h(y),y)$}}
496: \put(52,8){\shortstack[l]{$S\rightarrow A_1=A_1|A_2=A_2|A_3=A_3$\\
497: $A_1\rightarrow x|f(A_2,A_3)$\\
498: $A_2\rightarrow a|h(A_3)$\\
499: $A_3\rightarrow y$}}
500: \put(148.50,36.50){\oval(27.00,10.00)[]}
501: \put(140.00,34.00){\framebox(5.00,5.00)[cc]{$x$}}
502: \put(151.00,34.00){\framebox(5.00,5.00)[cc]{$f$}}
503: \put(153.00,34.00){\vector(-3,-4){13.67}}
504: \put(139.50,10.50){\oval(27.00,10.00)[]}
505: \put(131.00,8.00){\framebox(5.00,5.00)[cc]{$a$}}
506: \put(142.00,8.00){\framebox(5.00,5.00)[cc]{$h$}}
507: \put(166.00,10.50){\circle{11.0}}
508: \put(163.50,8.00){\framebox(5.00,5.00)[cc]{$y$}}
509: \put(154.00,34.00){\vector(2,-3){12.0}}
510: \put(147.00,10.00){\vector(1,0){13.50}}
511: \end{picture}
512: \caption{Semantic properties and their representations. The set of equalities
513: does not contain trivial equalities like $x=x$\/ and equalities given by
514: the symmetry of the equality relationship.}\label{SemPrps}
515: \end{figure}
516:
517: Evidently we may suppose that the set of rules ${\EuScript P}$\/
518: does not contain rules having identical right parts. It is
519: convenient to consider the grammars which do not contain useless
520: and redundant nonterminals and rules. A rule is useless if it
521: produces only one term (the language knows only a trivial equality
522: like $t=t$) and this term is not an argument of other terms. A
523: nonterminal is useless if it does not participate in derivations
524: of sentential forms \cite{AU/72}. Such grammars can arise as
525: result of operations on grammars. If nonterminals and rules are
526: useless or redundant it is possible to remove them (see {\bf Lemma
527: \ref{DeleteTerms}}). This operation called {\bf state reduction}
528: consist in detecting and removing a set of useless/redundant
529: nonterminals/rules. They are revealed with the well--known
530: incremental markup algorithms (see, for example, \cite{AU/72}).
531:
532:
533: For illustrating purposes we shall use special functional nets for
534: these grammars. Here nonterminals are represented by ovals
535: containing $0$--ary and functional symbols from right parts of
536: rules. Arcs from functional symbols to ovals represent argument
537: dependencies ordered from left to right (see {\bf Figure
538: \ref{SemPrps}}).
539:
540:
541:
542:
543: \subsection{Abstract properties}\label{PAbstract}
544:
545: It is an interesting peculiarity of \ERA\/ that abstract (i.e.
546: approximate) properties have the same nature as the computation
547: states of the operational semantics. Formally this approximation
548: is defined as follows.
549:
550:
551: \subsubsection{Functions of abstraction and concretization}
552:
553: Given a concrete property $\pi\in\PPSEQS$\/ and an abstract
554: property $\tilde{\pi}\in\PSEQS$, the abstraction function
555: $\alpha:\PPSEQS\rightarrow\PSEQS$\/ and the concretization one
556: $\gamma:\PSEQS\rightarrow\PPSEQS$\/ are defined in the following
557: way
558: \[
559: \begin{array}{lcl}
560: \alpha(\pi)~=~\left\{
561: \begin{array}{lcl}
562: \EQS, & ~ & \mbox{if } \pi = \emptyset,\\
563: \mbox{$\displaystyle\bigcap_{S\in\pi}$}S
564: & & \mbox{otherwise}
565: \end{array}
566: \right.& \mbox{~~and~~} &
567: \gamma(\tilde{\pi}) = \CCUP\{~\pi~\vert~\alpha(\pi)~\AINCL~\tilde{\pi}~\}\\
568: \end{array}
569: \]
570: where \CCUP\/ is the set--theoretical union on \PPSEQS\/ and
571: \AINCL\/ and \ACAP\/ are the set--theoretical inclusion and
572: intersection of the languages (i.e. on \PSEQS) respectively. We
573: take the empty language as infimum \ABOT\/ (there are no computed
574: expressions) of the semi-lattice of abstract semantic properties.
575: The supremum \ATOP\/ (an inaccessible computation state) is the
576: language containing all possible equalities of program terms. Also
577: \CINCL\/ is the set--theoretical inclusion on \PPSEQS\/
578:
579:
580: \begin{lem}
581: The abstraction function $\alpha$\/ is monotonic.
582: \end{lem}
583:
584: \begin{pf}
585: The function $\alpha$\/ is monotonic iff $\forall
586: \pi_1,\pi_2\in\PPSEQS : \pi_1\CINCL\pi_2 \Rightarrow \linebreak
587: \alpha(\pi_1)\AINCL\alpha(\pi_2)$. Because $\pi_1\CINCL\pi_2$,
588: then $\pi_2=\pi_1\CCUP(\pi_2\setminus\pi_1)$. So, we have
589: $\alpha(\pi_1)\AINCL \linebreak
590: \alpha(\pi_1\CCUP(\pi_2\setminus\pi_1))=
591: \alpha(\pi_1)\cap\alpha(\pi_2\setminus\pi_1)=\alpha(\pi_1)\cap(\cap_{S\in\pi_2\setminus\pi_1}S)$.
592: %\qed
593: \end{pf}
594:
595: \noindent $\gamma(\tilde{\pi})$\/ is the most imprecise element of \PPSEQS\/
596: that can be soundly approximated by $\tilde{\pi}\in\PSEQS$.
597: % Because for each abstract property $\gamma$\/ gives a unique set of languages
598: % all of which contain a sublanguage corresponding to this abstract property as a common
599: % part and not more we {\bf\large can} conclude that \ERA\/ provides the Galois surjection.
600: For the example of {\bf Table \ref{Collecting}}, the best approximation
601: of the concrete property {\large$\pi$} is
602: \[
603: \alpha(\pi)=\left\{\rule{0pt}{5mm}a[1]=i=j,a[2]=2\right\}.~~~~~~~~~~~\mbox{{\bf$(*)$}}
604: \]
605:
606:
607: %
608: %{\Huge Why semi-lattice and not complete lattice?}
609: %
610:
611: \subsubsection{Intersection of \ERA--languages}
612:
613: Finding of the intersection of context--free languages is an
614: undecidable problem in the general case. In our case, for the
615: languages of term equalities, an algorithm exists (see an example
616: at {\bf Fig. \ref{Intersection}}). It is similar to constructing a
617: Cartesian product of automata.
618:
619: \vskip 0.75\baselineskip
620: \noindent{\bf Algorithm.} Intersection of two languages of term equalities.\\
621: Input: grammars ${\bf G_1}=({\EuScript N}_1,{\EuScript T},{\EuScript P}_1,S_1)$
622: and ${\bf G_2}=({\EuScript N}_2,{\EuScript T},{\EuScript P}_2,S_2)$.\\
623: Output: grammar ${\bf G}=({\EuScript N},{\EuScript T},{\EuScript
624: P},S)$ such that
625: $L({\bf G})=L({\bf G_1})\cap L({\bf G_2})$.\\
626: Description:
627: \begin{enumerate}
628: \item Let ${\EuScript N}=\{ \langle N_1,N_2\rangle \vert%
629: N_1\in{\EuScript N}_1~\&~N_2\in{\EuScript N}_2 \}={\EuScript N}_1\times{\EuScript N}_2$.
630:
631: \item The set of rules ${\EuScript P}$ is defined as follows:
632: \begin{itemize}
633: \item The rule $\langle N_1,N_2\rangle\rightarrow t$ is introduced
634: if and only if
635: $t\in\CV~~\&~~N_1\rightarrow t\in{\EuScript P}_1~~\&~~
636: N_2\rightarrow t\in{\EuScript P}_2$.
637: \item The rule $\langle N_1,N_2\rangle\rightarrow t(%
638: \langle N_1^1,N_2^1\rangle,\ldots,\langle N_1^k,N_2^k\rangle)$
639: is introduced if and only if
640: $t\in\FP~~\&~~N_1\rightarrow t(N_1^1,\ldots,N_1^k)\in{\EuScript P}_1
641: ~~\&~~N_2\rightarrow t(N_2^1,\ldots,N_2^k)\in{\EuScript P}_2$.
642: \end{itemize}
643:
644: \item Add rules $S\rightarrow\langle N_1,N_2\rangle=\langle N_1,N_2\rangle$
645: for the initial nonterminal $S$ of {\bf G} and for all
646: $N_1\in{\EuScript N}_1\setminus\{S_1\}, N_2\in{\EuScript N}_2\setminus\{S_2\}$.
647:
648: \item Apply {\em state reduction}.
649: \end{enumerate}
650:
651:
652: \begin{figure}[t]
653: {\small
654: \centering
655: \unitlength=0.70mm
656: %\special{em:linewidth 0.4pt}
657: \linethickness{0.4pt}
658: \begin{picture}(150.00,103.00)
659: \put(26.50,95.50){\oval(25.00,9.00)[]}
660: \put(19.00,93.00){\framebox(5.00,5.00)[cc]{$f$}}
661: \put(29.00,93.00){\framebox(5.00,5.00)[cc]{$g$}}
662: \put(11.50,70.50){\oval(25.00,9.00)[]}
663: \put(4.00,68.00){\framebox(5.00,5.00)[cc]{$z$}}
664: \put(14.00,68.00){\framebox(5.00,5.00)[cc]{$f$}}
665: \put(34.00,68.00){\framebox(5.00,5.00)[cc]{$x$}}
666: \put(44.00,68.00){\framebox(5.00,5.00)[cc]{$a$}}
667: \put(54.00,68.00){\framebox(5.00,5.00)[cc]{$y$}}
668: \put(46.50,70.50){\oval(35.00,9.00)[]}
669: \put(30.00,93.00){\vector(-2,-3){12.00}}
670: \put(21.00,93.00){\vector(-2,-3){12.00}}
671: \put(33.00,93.00){\vector(3,-4){13.67}}
672: \put(32.00,61.50){\oval(30.00,3.00)[b]}
673: \put(17.00,68.00){\line(0,-1){7.00}}
674: \put(47.00,61.00){\vector(0,1){5.00}}
675: \put(103.50,95.50){\oval(25.00,9.00)[]}
676: \put(96.00,93.00){\framebox(5.00,5.00)[cc]{$f$}}
677: \put(106.00,93.00){\framebox(5.00,5.00)[cc]{$g$}}
678: \put(88.50,70.50){\oval(25.00,9.00)[]}
679: \put(81.00,68.00){\framebox(5.00,5.00)[cc]{$x$}}
680: \put(91.00,68.00){\framebox(5.00,5.00)[cc]{$f$}}
681: \put(111.00,68.00){\framebox(5.00,5.00)[cc]{$f$}}
682: \put(121.00,68.00){\framebox(5.00,5.00)[cc]{$a$}}
683: \put(131.00,68.00){\framebox(5.00,5.00)[cc]{$y$}}
684: \put(107.00,93.00){\vector(-2,-3){12.00}}
685: \put(98.00,93.00){\vector(-2,-3){12.00}}
686: \put(110.00,93.00){\vector(3,-4){13.50}}
687: \put(141.00,68.00){\framebox(5.00,5.00)[cc]{$z$}}
688: \put(128.50,70.50){\oval(45.00,9.00)[]}
689: \put(111.00,70.00){\vector(-1,0){10.00}}
690: \put(111.50,61.50){\oval(35.00,3.00)[b]}
691: \put(94.00,68.00){\line(0,-1){7.00}}
692: \put(129.00,61.00){\vector(0,1){5.00}}
693: \put(129.00,86.00){\makebox(0,0)[cc]{$L_{2}$}}
694: \put(51.00,86.00){\makebox(0,0)[cc]{$L_{1}$}}
695: \put(44.50,47.50){\oval(25.00,9.00)[]}
696: \put(37.00,45.00){\framebox(5.00,5.00)[cc]{$f$}}
697: \put(47.00,45.00){\framebox(5.00,5.00)[cc]{$g$}}
698: \put(48.00,45.00){\vector(-2,-3){12.00}}
699: \put(39.00,45.00){\vector(-2,-3){12.00}}
700: \put(51.00,45.00){\vector(3,-4){13.67}}
701: \put(29.00,20.00){\framebox(5.00,5.00)[cc]{$f$}}
702: \put(31.50,22.50){\oval(19.00,9.00)[]}
703: \put(64.50,22.50){\oval(25.00,9.00)[]}
704: \put(57.00,20.00){\framebox(5.00,5.00)[cc]{$a$}}
705: \put(67.00,20.00){\framebox(5.00,5.00)[cc]{$y$}}
706: \put(48.50,13.50){\oval(33.00,3.00)[b]}
707: \put(65.00,13.00){\vector(0,1){5.00}}
708: \put(32.00,20.00){\line(0,-1){7.00}}
709: \put(98.50,47.50){\oval(25.00,9.00)[]}
710: \put(91.00,45.00){\framebox(5.00,5.00)[cc]{$f$}}
711: \put(101.00,45.00){\framebox(5.00,5.00)[cc]{$z$}}
712: \put(96.00,20.00){\framebox(5.00,5.00)[cc]{$x$}}
713: \put(98.50,22.50){\oval(19.00,9.00)[]}
714: \put(94.00,45.00){\vector(1,-4){4.50}}
715: \put(133.00,36.00){\makebox(0,0)[cc]{$L_{1}\ACAP L_{2}$}}
716: \end{picture}
717: \caption{Intersection of computation states.}\label{Intersection}
718: }
719: \end{figure}
720:
721:
722:
723: The described algorithm of intersection is very ``na\"{\i}ve'' and
724: impractical. To improve it we should choose a more efficient
725: strategy for generating functional symbols. To do this we first do
726: a topological sorting of the functional symbols appearing in the
727: right parts of rules; intersect the $0$--ary symbol sets; and then
728: generate the next functional symbol in conformity with its
729: topological order if and only if all arguments of this symbol
730: already exist in the new grammar. For practical cases this
731: intersection can be done in an linear average time with respect to
732: grammar size and linear space. It demands quadratic time in the
733: worst case.
734:
735:
736:
737:
738:
739:
740:
741: \subsection{Operations over semantic properties}\label{OpBasic}
742:
743: Now we shall discuss some basic operations over semantic properties \linebreak
744: $\SOP{}{\centerdot}:\PSEQS\rightarrow\PSEQS$\/ used to define the semantic
745: transformers of \ERA. For all \SOP{}{\centerdot}~ the notation
746: \SOP{\SOP{L}{\centerdot}}{\centerdot} means
747: \SOP{(\SOP{L}{\centerdot})}{\centerdot}.
748:
749:
750: \subsubsection{Removing terms}
751:
752: Operations over abstract computation states use certain common
753: transformation of the sets of term equalities which consists in {\bf removing}
754: some subset $L^\prime$. The following statement holds.
755:
756: \begin{lem}\label{DeleteTerms}
757: Removing any subset of term equalities preserves correctness of an
758: approximation.
759: \end{lem}
760:
761: \begin{pf}
762: It easy to see that
763: \[
764: \begin{array}{lcl}
765: \CCUP\{~\pi~\vert~\alpha(\pi)~\AINCL~\tilde{\pi}~\}=\gamma(\tilde{\pi})
766: &~\CINCL~&\gamma(\tilde{\pi}\setminus L^\prime)=\CCUP\{~\pi~\vert~\alpha(\pi)~\AINCL~(\tilde{\pi}\setminus L)~\}=\\
767: &&\gammaspc\CCUP\{~\pi~\vert~\mbox{$\displaystyle\bigcap_{L\in\pi}$}(L\ACUP
768: L^\prime)~\AINCL~\tilde{\pi}~\},
769: \end{array}
770: \]
771: which states that removing term equalities makes the approximation more rough
772: but it does preserve its correctness.
773: %~\qed
774: \end{pf}
775:
776: \noindent For (${\fontsize{17}{17}\selectfont*}$), for example,
777: $\gamma(\{\rule{0pt}{5mm}a[1]=i=j,a[2]=2\})\CINCL \gamma(\{a[1]=i\})$.
778:
779: We shall write \SOP{L}{\downarrow t} and \SOP{L}{\downarrow T} for
780: single term and term set removing followed by the state reduction
781: operation defined above.
782:
783:
784:
785:
786: \subsubsection{Term evaluation}\label{Term-evaluation}
787:
788: We define the abstract semantics for an {\bf evaluation of
789: a term} in an abstract computation state. The result of the
790: term evaluation is a state knowing the evaluated term.
791:
792: \vskip\baselineskip
793: \noindent{\bf Term evaluation} \SOP{L}{t}.
794:
795: \begin{enumerate}
796: \item If $t=t\in L$, then $\SOP{L}{t}=L$.
797: \item Otherwise, if $t\in\CV$, then add the new rules
798: $S\rightarrow A=A$ and $A\rightarrow t$\/ to the
799: grammar ${\bf G}$, where $A$\/ is a nonterminal
800: which does not exist in ${\bf G}$.
801: \item Otherwise, if $t=f(t_{1},\ldots,t_{n})$\/ where
802: $f\in\FP$\/ is a functional $n$--ary symbol and
803: the sub-terms $t_{1},\ldots,t_{n}$\/ have been calculated
804: (i.e. there exist derivations
805: $A_1\DERIV{+}{G}t_1,\ldots,A_n\DERIV{+}{G}t_n$),
806: then add the new rules $S\rightarrow A=A$\/ and
807: $A\rightarrow f(A_1,\ldots,A_n)$\/ to the grammar
808: ${\bf G}$, where $A$\/ is a nonterminal which
809: still does not exist in ${\bf G}$.
810: \end{enumerate}
811: To improve an accuracy of analysis we can take into account, for example, the
812: commutativity of primitive operations.
813: %\footnote{This observation is not expanding to the real computations}.
814: If $L$\/ knows $f(t_1,t_2)$\/ and $f$\/ is commutative then $\SOP{L}{f(t_2,t_1)}=L$.
815:
816: \begin{figure}[t]
817: \centering
818: {\small
819: \unitlength=0.85mm
820: \linethickness{0.4pt}
821: \begin{picture}(118.00,70.00)
822: \put(12.50,15.00){\oval(25.00,10.00)[]}
823: \put(4.00,12.00){\framebox(6.00,6.00)[cc]{$y$}}
824: \put(15.00,12.00){\framebox(6.00,6.00)[cc]{$a$}}
825: \put(12.50,40.00){\oval(25.00,10.00)[]}
826: \put(4.00,37.00){\framebox(6.00,6.00)[cc]{$x$}}
827: \put(15.00,37.00){\framebox(6.00,6.00)[cc]{$h$}}
828: \put(18.00,37.00){\vector(-1,-3){5.67}}
829: \put(82.50,15.00){\oval(25.00,10.00)[]}
830: \put(74.00,12.00){\framebox(6.00,6.00)[cc]{$y$}}
831: \put(85.00,12.00){\framebox(6.00,6.00)[cc]{$a$}}
832: \put(82.50,40.00){\oval(25.00,10.00)[]}
833: \put(74.00,37.00){\framebox(6.00,6.00)[cc]{$x$}}
834: \put(85.00,37.00){\framebox(6.00,6.00)[cc]{$h$}}
835: \put(88.00,37.00){\vector(-1,-3){5.67}}
836: \put(79.33,62.00){\framebox(6.00,6.00)[cc]{$f$}}
837: \put(52.00,37.00){\framebox(6.00,6.00)[cc]{$g$}}
838: \put(107.00,37.00){\framebox(6.00,6.00)[cc]{$f$}}
839: \put(107.00,12.00){\framebox(6.00,6.00)[cc]{$b$}}
840: \put(81.00,62.00){\vector(-3,-2){25.00}}
841: \put(83.67,62.00){\vector(3,-2){25.67}}
842: \put(99.50,33.33){\oval(19.00,6.67)[b]}
843: \put(109.00,37.00){\line(0,-1){4.00}}
844: \put(90.00,33.00){\vector(0,1){2.00}}
845: \put(111.00,37.00){\vector(0,-1){17.00}}
846: \put(65.83,33.33){\oval(19.00,6.67)[b]}
847: \put(56.33,37.00){\line(0,-1){4.00}}
848: \put(75.33,33.00){\vector(0,1){2.00}}
849: \put(57.17,18.33){\oval(7.67,6.67)[lb]}
850: \put(57.00,15.00){\vector(1,0){13.00}}
851: \put(53.33,37.00){\line(0,-1){19.00}}
852: \put(12.33,0.00){\makebox(0,0)[cc]{$L$}}
853: \put(82.33,0.00){\makebox(0,0)[cc]{$\SOP{L}{f(g(a,x),f(h(y),b))}$}}
854: \put(110.00,15.00){\oval(16.00,10.00)[]}
855: \put(110.00,40.00){\oval(16.00,10.00)[]}
856: \put(82.33,65.00){\oval(16.00,10.00)[]}
857: \put(55.00,40.00){\oval(16.00,10.00)[]}
858: \end{picture}
859: \caption{Term evaluation.}\label{TermEval}
860: }
861: \end{figure}
862:
863:
864: \subsubsection{Identification of terms}
865:
866: %{\bf Identification of terms}\/ corresponds to a situation when, during execution
867: %of a program, it turns out that the values of computed expressions represented
868: %by the terms become equal.
869:
870: When the standard semantics defines that during program execution
871: values of computed expressions are equal, we can incorporate this
872: information in the computation state (but it can also be left
873: out). {\bf Identification of terms}\/ transforms the state into a
874: new one incorporating this information. For example, we know that
875: a value of a term representing the conditional expression of an
876: {\bf IF}-statement coincides with 0-ary terms representing the
877: constants {\bf TRUE} or {\bf FALSE} when, respectively, {\bf
878: THEN}-branch or {\bf ELSE}-branch is being executed. So,
879: identification of terms along with {\em semantic completion}
880: considered below provides powerful facilities to take into account
881: real control flow in programs.
882:
883: \vskip\baselineskip
884: \noindent{\bf Identification of terms} \SOP{L}{t_1\equiv t_2}.
885:
886: \begin{enumerate}
887: \item If $t_1=t_2\in L$ then $\SOP{L}{t_1\equiv t_2}=L$.
888:
889: \item Let $A_{1}\DERIV{+}{G}t_1$\/ and $A_2\DERIV{+}{G}t_2$.
890: We replace the nonterminal $A_2$\/ by the nonterminal
891: $A_1$\/ in all rules of ${\EuScript P}$.
892: If rules with an identical right side
893: $B_1\rightarrow w,\ldots,B_k\rightarrow w$ have appeared, then
894: a certain nonterminal from the left sides of the rules (for example $B_1$)
895: must be taken and all nonterminals $B_2,\ldots,B_k$\/
896: in the grammar must be replaced by it.
897:
898: \item Repeat step 2 until stabilization.
899: If after that we have a state $L^\prime$\/ containing inconsistent
900: term equalities\footnote{There exists a wide spectrum of
901: inconsistency conditions. The simplest of them is an
902: equality of two different constants (see {\bf Section
903: \ref{Semantic-completion}}
904: for the further discussion).} then the result is \ATOP\/
905: else it is a reduction of~$L^\prime$.
906: \end{enumerate}
907:
908: \noindent An example of identification is given in {\bf Figure \ref{Identification}}.
909:
910: \begin{figure}%[ht]
911: \centering
912: {%\small
913: \unitlength=0.85mm
914: %\special{em:linewidth 0.4pt}
915: \linethickness{0.4pt}
916: \begin{picture}(140.00,70.00)
917: \put(19.00,59.00){\framebox(6.0,6.0)[cc]{$g$}}
918: \put(29.00,59.00){\framebox(6.0,6.0)[cc]{$a$}}
919: \put(27.00,62.00){\oval(24.00,12.00)[]}
920: \put(9.00,40.00){\framebox(6.0,6.0)[cc]{$f$}}
921: \put(12.00,43.00){\circle{12.00}}
922: \put(27.00,40.00){\framebox(6.0,6.0)[cc]{$g$}}
923: \put(37.00,40.00){\framebox(6.0,6.0)[cc]{$f$}}
924: \put(35.00,43.00){\oval(24.00,12.00)[]}
925: \put(21.00,59.00){\vector(-3,-4){7.33}}
926: \put(23.00,59.00){\vector(1,-1){10.00}}
927: \put(27.00,43.00){\vector(-1,0){8.50}}
928: \put(9.00,21.00){\framebox(6.0,6.0)[cc]{$x$}}
929: \put(12.00,24.00){\circle{12.00}}
930: \put(32.00,21.00){\framebox(6.0,6.0)[cc]{$b$}}
931: \put(35.00,24.00){\circle{12.00}}
932: \put(12.00,40.00){\vector(0,-1){10.00}}
933: \put(30.00,40.00){\vector(1,-3){3.33}}
934: \put(40.00,40.00){\vector(-1,-3){3.33}}
935: \put(89.00,59.00){\framebox(6.0,6.0)[cc]{$g$}}
936: \put(99.00,59.00){\framebox(6.0,6.0)[cc]{$a$}}
937: \put(97.00,62.00){\oval(24.00,12.00)[]}
938: \put(89.00,40.00){\framebox(6.0,6.0)[cc]{$g$}}
939: \put(99.00,40.00){\framebox(6.0,6.0)[cc]{$f$}}
940: \put(97.00,43.00){\oval(24.00,12.00)[]}
941: \put(91.00,59.00){\vector(0,-1){10.00}}
942: \put(93.00,59.00){\vector(3,-4){7.33}}
943: \put(89.00,21.00){\framebox(6.0,6.0)[cc]{$x$}}
944: \put(99.00,21.00){\framebox(6.0,6.0)[cc]{$b$}}
945: \put(97.00,24.00){\oval(24.00,12.00)[]}
946: \put(93.00,40.00){\vector(0,-1){10.00}}
947: \put(102.00,40.00){\vector(0,-1){10.00}}
948: \put(23.00,11.00){\makebox(0,0)[cc]{$L$}}
949: \put(97.00,11.00){\makebox(0,0)[cc]{\SOP{L}{x\equiv b}}}
950: \put(84.00,37.00){\oval(14.00,8.00)[b]}
951: \put(79.50,39.50){\oval(5.00,7.00)[lt]}
952: \put(77.00,40.00){\line(0,-1){4.00}}
953: \put(91.00,40.00){\line(0,-1){4.00}}
954: \put(79.00,43.00){\vector(1,0){6.00}}
955: \end{picture}
956: \caption{Identification of values of terms.}\label{Identification}
957: }
958: \end{figure}
959:
960:
961: \begin{lem}
962: Identification of values of terms is a correct transformation and
963: the resulting state is unique.
964: %
965: %More formal definition involving \alpha
966: %
967: \end{lem}
968:
969: \begin{pf}
970:
971: Let $\pi=\{~L_i~\vert~L_i\in\PSEQS~\}$\/ be a concrete semantic
972: property which holds before identification of terms $t_1$\/ and
973: $t_2$. If the values of $t_1$\/ and $t_2$\/ are equal in the
974: concrete semantics $\forall L_i ~:~ t_1=t_2\in L_i$, then they are
975: equal in the abstract semantics $t_1=t_2\in\alpha(\pi)$, too. If
976: their values are not equal, then identification gives us an
977: inconsistent computation state which obviously includes
978: \SOP{L}{t_1\equiv t_2}\/ for all $t_1$\/ and $t_2$. So, this
979: transformation is correct.
980:
981: Identification is done in finite steps because the size of
982: grammar decreases at each step. Uniqueness of the resulting state
983: is explained by the following observation. If we have two pairs
984: of terms which are candidates for identification, then
985: identification of one of them does not close a possibility of it
986: for another, because we remove a duplication of the functional
987: symbols only. In fact, after identification of a pair of terms we
988: obtain a new state, including the source one, and thus other
989: existing identification possibilities remain. So, the order of
990: ``merging'' of term pairs is not important for the resulting
991: state.
992: %~\qed
993: \end{pf}
994:
995:
996:
997: \subsubsection{Semantic completion}\label{Semantic-completion}
998:
999: We have not yet considered any interpretation of constants and
1000: functional symbols. We could continue developing \ERA\/ in the
1001: same way. As a result we shall obtain a noninterpretational
1002: version of the analysis likewise analysis algorithms in the
1003: program scheme theory. However, it is natural to use semantics of
1004: primitive operations of the programming language of interest in
1005: order to achieve better accuracy.
1006:
1007: \ERA\/ provides us with possibilities of taking into account
1008: properties of language constructs, and, what is especially
1009: important, we can easily handle complexity of these manipulations.
1010: In fact, inclusion of these properties corresponds to carrying out
1011: some finite part of completion of the computation states by
1012: consistent equalities. This manipulation is called {\bf semantic
1013: completion} (about ``conjunctive/disjunctive completion'' see
1014: \cite{CC/92a}).
1015:
1016:
1017:
1018: \begin{figure}%[ht]
1019: \centering
1020: {%\small
1021: \unitlength=0.85mm
1022: \linethickness{0.4pt}
1023: \begin{picture}(131.33,60.00)
1024: \put(12.50,45.00){\oval(25.00,10.00)[]}
1025: \put(4.00,42.00){\framebox(6.00,6.00)[cc]{$x$}}
1026: \put(15.00,42.00){\framebox(6.00,6.00)[cc]{$+$}}
1027: \put(52.50,45.00){\oval(25.00,10.00)[]}
1028: \put(44.00,42.00){\framebox(6.00,6.00)[cc]{$y$}}
1029: \put(55.00,42.00){\framebox(6.00,6.00)[cc]{{\large$\ast$}}}
1030: \put(33.33,37.67){\oval(27.33,5.33)[b]}
1031: \put(47.00,37.67){\vector(0,1){2.50}}
1032: \put(19.67,42.00){\line(0,-1){4.67}}
1033: \put(13.67,15.00){\framebox(6.00,6.00)[cc]{$z$}}
1034: \put(16.33,42.00){\vector(0,-1){18.50}}
1035: \put(16.67,18.17){\oval(17.33,10.33)[]}
1036: \put(57.00,42.00){\vector(-4,-3){31.50}}
1037: \put(53.33,15.00){\framebox(6.00,6.00)[cc]{$0$}}
1038: \put(56.33,18.17){\oval(17.33,10.33)[]}
1039: \put(58.67,42.00){\vector(0,-1){18.50}}
1040: \put(34.33,5.00){\makebox(0,0)[cc]{$L$}}
1041: %%
1042: \put(119.67,45.00){\oval(34.00,10.00)[]}
1043: \put(106.67,42.00){\framebox(6.00,6.00)[cc]{$+$}}
1044: \put(126.67,42.00){\framebox(6.00,6.00)[cc]{$z$}}
1045: \put(116.67,42.00){\framebox(6.00,6.00)[cc]{$x$}}
1046: \put(119.78,18.00){\oval(34.00,10.00)[]}
1047: \put(106.78,15.00){\framebox(6.00,6.00)[cc]{$y$}}
1048: \put(126.78,15.00){\framebox(6.00,6.00)[cc]{{\large$\ast$}}}
1049: \put(116.78,15.00){\framebox(6.00,6.00)[cc]{$0$}}
1050: \put(103.83,53.00){\oval(9.67,8.33)[t]}
1051: \put(103.83,37.50){\oval(9.67,8.33)[b]}
1052: \put(99.00,37.50){\line(0,1){15.67}}
1053: \put(108.65,42.00){\line(0,-1){5.00}}
1054: \put(108.67,54.0){\vector(0,-1){4.00}}
1055: \put(111.00,42.00){\vector(1,-3){6.3}}
1056: \put(136.00,11.50){\oval(10.67,9.00)[b]}
1057: \put(138.50,42.00){\oval(5.67,6.67)[rt]}
1058: \put(141.33,42.00){\line(0,-1){31.67}}
1059: \put(138.5,45.40){\vector(-1,0){1.67}}
1060: \put(130.67,15.00){\line(0,-1){4.33}}
1061: \put(124.17,10.33){\oval(9.67,6.67)[b]}
1062: \put(129.00,15.00){\line(0,-1){5.33}}
1063: \put(119.33,9.67){\vector(0,1){3.40}}
1064: \put(119.33,0.00){\makebox(0,0)[cc]{{\bf Co}($L$)}}
1065: \end{picture}
1066: \caption{Semantic completion.}\label{C-Cosure}
1067: }
1068: \end{figure}
1069:
1070:
1071: As a basic version of {\bf semantic completion} {\bf Co} we take
1072: computations over constant and equal arguments.
1073: %
1074: %and computations involving ``{\em additive}~{\bf 0}/{\em multiplicative}~{\bf 1}''
1075: %properties of primitive operations of the language.
1076: %
1077: When we detect that some term $t$\/ has some specific value $v$\/
1078: then {\bf Co}($L$)$=$\SOP{L}{t\equiv v}. Also, it is possible to
1079: apply identification involving dependencies among result of an
1080: operation and its arguments: if $(t_1$ AND $t_2)=TRU\!E\in L$\/
1081: then \linebreak {\bf Co}($L$)$=$\SOP{\SOP{L}{t_1\equiv
1082: TRU\!E}}{t_2\equiv TRU\!E} etc. This identification process is
1083: iterative because new possibilities for identification can appear
1084: at the next steps. It is conceivable that in doing so we shall
1085: detect inconsistency of a computation state. In this case result
1086: of semantic completion is \ATOP.
1087:
1088: This version can be extended by some intelligent theorem prover
1089: inferring new reasonable equalities and checking inconsistency of
1090: computation states. Such combining of analysis with proofs offers
1091: powerful facilities to the analyzer (see
1092: \cite{HeintzeJafVoi00,CousotCousot-CAV02}) and, as it was
1093: mentioned in the previous works, this prover is reusable for
1094: consequent (semi-)automatic processing of results of the analysis.
1095: ``Size'' of used completion can be tuned by options of
1096: interpretability of the analyzer.
1097:
1098: Some arithmetical errors (such as division by zero, out of type
1099: range etc.) will appear during the semantic completion. In this
1100: case the analyzer tells us about the error and sets the current
1101: computation state to \ATOP. Notice that for the languages where
1102: incomplete Boolean evaluation is admissible semantic completion
1103: over Boolean expressions should be carefully designed especially
1104: in presence of pointers.
1105:
1106: %The following example demonstrates a potential problem:
1107: %\begin{center}
1108: %\OPR{(p$\neq$NIL) AND (p\DEREF.f=a)}.
1109: %\end{center}
1110:
1111: An example of semantic completion is presented in {\bf Figure
1112: \ref{C-Cosure}}. Turning back to the identification example at
1113: {\bf Figure \ref{Identification}}, we consider the following
1114: interpretation of constants and functional symbols: $g$\/ is the
1115: exclusive disjunction, $f$\/ is the negation, $a$\/ is the
1116: constant {\bf TRUE} and $b$\/ is the constant {\bf FALSE}. It is
1117: easy to see that in this case application of semantic completion
1118: gives us $\SOP{L}{x\equiv b}=\ATOP$.
1119:
1120: In our analyzer we implemented an interpretational version of
1121: \ERA\/ which uses semantic completion \Co{$L$}. Under this
1122: approach, the definitions of the basic transformations mentioned
1123: above are chan\-ged to the following:
1124: \[
1125: \begin{array}{l}
1126: \SOP{L}{t}^\prime=\Co{\SOP{L}{t}},\\
1127: \SOP{L}{t_1\equiv t_2}^\prime=\Co{\SOP{L}{t_1\equiv t_2}}.
1128: \end{array}
1129: \]
1130: In short, we shall omit this ``interpretability'' prime.
1131:
1132:
1133:
1134: \section{Semantic transformers}\label{STransformers}
1135:
1136: In this section we describe semantic transformers
1137: $\SEM{}{\centerdot}:\PSEQS\rightarrow\PSEQS$\/ corresponding to
1138: common statements existing in imperative programming languages. If
1139: for a statement {\bf S} an input computation state is $L$\/ then
1140: \SEM{L}{S} is its output computation state. For all
1141: \SEM{}{\centerdot}~ the notation
1142: \SEM{\SEM{L}{\centerdot}}{\centerdot} means
1143: \SEM{(\SEM{L}{\centerdot})}{\centerdot}.
1144:
1145:
1146: \subsection{Assignment statement}
1147:
1148: Among all program terms considered in \ERA\/ we can pick out {\em
1149: access program terms} including array {\em elm}, record {\em fld},
1150: and pointer {\em val} referencing and playing an important role in
1151: determination of effect of an assignment statement. As in
1152: \cite{Deu/94,Venet/99} our abstraction of program memory
1153: manipulations is storeless and based on notion of memory access
1154: paths represented by access program terms. For example, for an
1155: address expression {\bf bar[i][j]\^{}.foo} the access term is
1156: $\mbox{\em fld}(\mbox{\em val}(\mbox{\em elm}(\mbox{\em
1157: elm}(bar,i),j)),foo)$.
1158:
1159: We shall assume that no operations other than memory addressing
1160: (for example comparisons) are allowed for structured variables
1161: such as arrays and records. So, for the previous example neither
1162: $a$\/ and $\mbox{\em elm}(a,i)$ ({\bf a[i]}) nor \linebreak
1163: $\mbox{\em val}(\mbox{\em elm}(\mbox{\em elm}(a,i),j))$ ({\bf
1164: a[i][j]\^{}}) can appear as arguments for operations other than
1165: {\em elm} and {\em fld} respectively. This limitation allows us to
1166: simplify definition of our assignment statement
1167: abstraction%
1168: \footnote{Otherwise we have either to accept that each assignment
1169: to some structured variables destroys all equalities involving
1170: other components of it or to implement some strategy (for example
1171: copying) preserving useful and safe access terms.}.
1172:
1173: To preserve safety of the analysis we have to take into account
1174: memory aliasing appearing in programs. Two access terms are alias
1175: if they address the same memory location. In the general case
1176: \ERA\/ is inadequate itself to handle precisely all kinds of
1177: aliasing and we should use other analyses. Next it is assumed
1178: that for each access term $t_a$\/ we know a set \ACC{t_a} of
1179: access terms covering a set of aliases for $t_a$\/ (may--alias
1180: information about $t_a$). Let
1181: \[\overline{\ACC{t_a,L({\bf G})}}=
1182: (\ACC{t_a}\cap\CV)\cup\{f\in\FP~\vert~ A\DERIV{+}{G}f(\dots) ~\wedge~ f(\dots)\!\in\!\ACC{t_a}\}
1183: \]
1184: (``roots'' of memory access terms in \ACC{t_a}). Notice that
1185: flow--insensitive approximations of alias information may cause
1186: conservative results of \ERA. Therefore \ERA\/ and alias analyses
1187: used in its implementation should have the same sensitivity to the
1188: control flow.
1189:
1190:
1191: \vskip\baselineskip
1192: \noindent{\bf Assignment statement} \SEM{L}{v:=exp}.
1193:
1194: \begin{enumerate}
1195: \item In the state $L$\/ evaluate $exp$ using evaluation transformer formally defined
1196: earlier in {\bf Section \ref{Term-evaluation}}. Let $L^\prime$\/ be a result of the evaluation
1197: and let $E$\/ be a nonterminal such that $E\DERIV{}{}~exp$.
1198: \item Perform \SOP{L^\prime}{\downarrow \overline{\ACC{v,L^\prime}}}. Do not remove $E$.
1199: \item Add the term $v$\/ so that $E\DERIV{}{}~v$.
1200: \end{enumerate}
1201:
1202:
1203: Unfortunately, in some cases this abstraction of the assignment
1204: statement fails as before. For example, this assignment
1205: transformer corresponding to {\bf x:=x+1} and being applied to the
1206: state $L=\{(x>0)=TRU\!E\}$\/ gives only the trivial identity
1207: $\SEM{L}{x:=x+1}=\{x=x\}$. To improve accuracy of the analysis in
1208: these cases we can consider ``artificial'' variables associated
1209: with scalar variables of the program which will store previous
1210: values of the original ones. Under this approach between first and
1211: second steps of the assignment statement effect definition we
1212: should insert the step
1213: \begin{description}
1214: \item[\dots~] Let $A\rightarrow v$\/ and $B\rightarrow v^\prime$\/ where $v^\prime$\/
1215: is associated with $v$. Remove the second rule and add $A\rightarrow v^\prime$\/
1216: if it is needed.
1217: \end{description}
1218: Under this approach we shall have $\SEM{L}{x:=x+1}=\{(x^\prime>0)=TRU\!E,x=x^\prime+1\}$\/
1219: from where we can deduce that $x>0$\/ also.
1220: %Such tracking of previous values of program variables is useful
1221:
1222:
1223: \subsection{Other transformers}
1224:
1225: \begin{itemize}
1226:
1227: \item {\it Program}\/.
1228:
1229: Given a program
1230:
1231: {\bf
1232: \begin{tabular}{ll}
1233: PROGRAM; & \\
1234: ~VAR~~x : T;& $(*$ variables $*)$ \\
1235: BEGIN & \\
1236: ~S & $(*$ statements $*)$\\
1237: END.
1238: \end{tabular}
1239: }
1240:
1241: \noindent we can define the following transformer corresponding to
1242: it
1243:
1244: \[
1245: \SEM{\ABOT}{PROGRAM}=\SEM{\SEM{\ABOT}{x:=\omega}}{S}
1246: \]
1247: where $\omega$\/ represents the indefinite value. Notice that
1248: $\omega$\/ is not a constant.
1249:
1250:
1251:
1252: \item {\it Empty statement}\/.
1253: \[
1254: \SEM{L}{~} = L
1255: \]
1256:
1257:
1258: \item {\it Sequence of statements}\/.
1259: \[
1260: \SEM{L}{S_{1}; S_{2}} = \SEM{\SEM{L}{S_{1}}}{S_{2}}
1261: \]
1262:
1263: \item {\it Read statement}\/.
1264: \[
1265: \SEM{L}{ READ(x) } = \SOP{\SOP{L}{x}}{\downarrow\overline{\ACC{x}}}
1266: \]
1267: Notice that if for {\em read statement} as well as for other statements
1268: some set of user's pre-- or post--assertions represented in the form of
1269: equalities of program terms is supplied then the analyzer can take
1270: them into consideration to check consistency and to include in the current
1271: computation state.
1272:
1273:
1274: \item {\it Write statement}\/.
1275: \[
1276: \SEM{L}{ W\!RITE(x) } = \SOP{L}{x}
1277: \]
1278:
1279:
1280:
1281: \item {\it Conditional statement}\/.
1282: \[
1283: {\bf IF~~p~~THEN~~~S_{t}~~~ELSE~~~S_{f}~~~END}.
1284: \]
1285: If $L^\prime=\SOP{L}{p}$\/ then
1286: %\newpage
1287: \[
1288: \SEM{L}{IF} = \SEM{\SOP{L^\prime}{p\equiv T\!RU\!E}}{S_{t}}\ACAP
1289: \SEM{\SOP{L^\prime}{p\equiv F\!ALSE}}{S_{f}}.
1290: \]
1291:
1292:
1293: \item {\it Cycle statement}\/.
1294:
1295:
1296: {\bf
1297: \begin{tabular}{lll}
1298: CYCLE & & \\
1299: & S & $(*$ body of cycle $*)$\\
1300: END & & \\
1301: \end{tabular}
1302: }
1303:
1304: where {\bf S} is a composed statement that possibly contains occurrences of
1305: exit-of-cycle statements {\bf EXIT$_k$}. When the sequence
1306: \[
1307: L_{0}=L\mbox{,~~}L_{n} = \SEM{L_{n-1}}{S}\mbox{~~for~~} n > 0
1308: \]
1309: becomes stabilize
1310: \[
1311: \SEM{L}{CYCLE} = \ACAP_{k}E_{k}
1312: \]
1313: where $E_{k}$\/ is a stationary entry state for ${\bf EXIT_{k}}$.
1314: If this process does not become stabilize then some widening
1315: operator should be used (see Section \ref{WideningOp}).
1316:
1317:
1318:
1319: \item {\it Halt, exit and return statements}\/.
1320: \[
1321: \SEM{L}{EXIT}~=~\SEM{L}{RETURN}~=~\ATOP
1322: \]
1323:
1324:
1325:
1326:
1327: \item {\it Call of function}\/. We assume that return of results
1328: of function calls is implemented as an assignment to variables
1329: having the same names as invoked functions (connection with their
1330: call sites should be taken into account). The function bodies may
1331: contain {\bf RETURN} statements as well.
1332:
1333: {\bf
1334: \begin{tabular}{ll}
1335: FUNCTION F(x: T$_1$) : T; & \\
1336: ~~VAR~~y: T$_2$; & $(*$ local variables $*)$ \\
1337: BEGIN & \\
1338: ~~S & $(*$ statements $*)$\\
1339: END.
1340: \end{tabular}
1341: }
1342: \[
1343: \SEM{L}{FUNCTION}=\SEM{\SEM{L}{x:=e;y:=\omega;F:=\omega}}{S}\ACAP
1344: R
1345: \] where $e$\/ is a factual parameter of the function (see the
1346: note for {\bf READ} statement) and $R=\ACAP R_k$\/ is intersection
1347: of stationary entry states for return statements in {\bf S} if
1348: they exist and $R=\ATOP$\/ otherwise. In the term being evaluated
1349: the result of a function call is represented by $F$.
1350:
1351: %Also, we
1352: %should take into account side effects of computations of the
1353: %function concerning modification of access terms that have been
1354: %evaluated before this function call in the same expression. In
1355: %context--sensitive versions of \ERA\/ different call sites of the
1356: %function $F$\/ can be distinguished by introducing supplementary
1357: %variables $F^{(i)}$\/ corresponding to these sites and
1358: %pseudo--assignments $F^{(i)}:=F:=e$\/ replacing the original ones.
1359:
1360:
1361:
1362: \begin{figure}[t]
1363: \centering {\small \unitlength=0.70mm
1364: %\special{em:linewidth 0.4pt}
1365: \linethickness{0.4pt}
1366: \begin{picture}(100.00,50.00)
1367: \put(10.00,30.00){\oval(22.00,12.00)[]}
1368: \put(3.0,27.90){\framebox(4.2,4.2)[cc]{$x$}}
1369: \put(12.80,27.90){\framebox(4.2,4.2)[cc]{$y$}}
1370: \put(10.00,20.00){\makebox(0,0)[cc]{$L$}}
1371: \put(75.00,40.00){\oval(30.00,12.00)[]}
1372: \put(65.0,37.90){\framebox(4.2,4.2)[cc]{$x$}}
1373: \put(72.90,37.90){\framebox(4.2,4.2)[cc]{\tiny $+$}}
1374: \put(81.20,37.90){\framebox(4.2,4.2)[cc]{\tiny $F$}}
1375: \put(74.00,37.90){\vector(-1,-1){12.00}}
1376: \put(76.00,37.90){\vector(1,-1){12.00}}
1377: \put(60.00,20.00){\circle{12.00}}
1378: \put(90.00,20.00){\circle{12.00}}
1379: \put(57.9,17.90){\framebox(4.2,4.2)[cc]{$y$}}
1380: \put(87.90,17.90){\framebox(4.2,4.2)[cc]{$1$}}
1381: \put(75.0,5.00){\makebox(0,0)[cc]{$\SEM{L}{x := F(x)}$}}
1382: \end{picture}
1383: \caption{Function call for $F(a)=a+1$.} }
1384: \end{figure}
1385:
1386:
1387:
1388:
1389: \end{itemize}
1390:
1391:
1392:
1393:
1394:
1395: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1396:
1397: %Let us consider an example of a system of semantic equations.
1398: %$L_i$\/ in comments are used to refer computation
1399: %states corresponding to these program points. The program is
1400: %
1401: %\begin{tabular}{lll}
1402: %&PROGRAM;\\
1403: %&VAR~ x: INTEGER;\\
1404: %&BEGIN\\
1405: %&\Z READ(x);\\
1406: %(*$L_0$*)&\Z CYCLE&\\
1407: %(*$L_1$*)&\Z\Z IF~ c$_1$ ~THEN&\\
1408: %(*$L_2$*)&\Z\Z\Z EXIT&(*$L_3$*)\\
1409: % &\Z\Z ELSE&\\
1410: %(*$L_4$*)&\Z\Z\Z IF c$_2$ THEN x:=e$_1$ ELSE x:=e$_2$ END&(*$L_5$*)\\
1411: % &\Z\Z END&(*$L_6$*)\\
1412: % &\Z END&(*$L_7$*)\\
1413: % &END.&
1414: %\end{tabular}
1415:
1416: %\vskip\baselineskip
1417: %\noindent and the system of semantic equations built for is
1418:
1419: %\[
1420: %\begin{array}{ll}
1421: %L_0=\SEM{\ABOT}{READ(x)}\\
1422: %L_1=L_0\ACAP L_6 \\
1423: %L_2=\SEM{L_1}{c_1\!\equiv\!TRU\!E} \\
1424: %L_3=\ATOP \\
1425: %L_4=\SEM{L_1}{c_1\!\equiv\!FALSE} \\
1426: %L_5=\SEM{L_4}{IF~c_2~THEN~x\!:=\!e\mbox{$_1$}~ELSE~x\!:=\!e\mbox{$_2$}~END} \\
1427: %L_6=L_3\ACAP L_5 \\
1428: %L_7=L_2.
1429: %\end{array}
1430: %\]
1431:
1432: %\noindent If not all states are interested for some reasons we can simplify
1433: %this system into the following one:
1434:
1435: %\[
1436: %\begin{array}{l}
1437: %L_1=\SOP{\ABOT}{READ(x)}\ACAP L_5\\
1438: %L_4=\SOP{L_1}{c_1\equiv FALSE}\\
1439: %L_5=\SEM{\SOP{L_4}{c_2\equiv TRU\!E}}{x:=e\mbox{$_1$}}\ACAP
1440: % \SEM{\SOP{L_4}{c_2\equiv FALSE}}{x:=e\mbox{$_2$}}\\
1441: %L_7=\SOP{L_1}{c_1\equiv TRU\!E}.\\
1442: %\end{array}
1443: %\]
1444:
1445:
1446:
1447: \section{Widening operator and convergence of the analysis}\label{WideningOp}
1448:
1449: Our abstract semantic domain does not satisfy the {\em
1450: (descending) chain condition} and therefore it requires some {\em
1451: widening operator} \cite{CC/77,CC/92b,CC/92a}. To guarantee the
1452: convergence of the abstract interpretation we should use a dual
1453: widening operator:
1454: \begin{itemize}
1455: \item $\forall x,y\in\PSEQS~~\Rightarrow~~
1456: x~\AINCL~x\tilde{\bigtriangledown}y~~\&~~
1457: y~\AINCL~x\tilde{\bigtriangledown}y$,
1458:
1459: \item for all decreasing chains $x_0\AINCL x_1\AINCL\ldots$,
1460: the decreasing chains defined by
1461: $y_0=x_0,\ldots,y_{i+1}=y_i\tilde{\bigtriangledown}x_{i+1},\dots$\/
1462: are not strictly decreasing.
1463: \end{itemize}
1464: The iteration sequence with widening is convergent and
1465: its limit is a sound approximation of the fixpoint.
1466:
1467:
1468:
1469: \subsection{Widening operator for \ERA--grammars}
1470:
1471:
1472:
1473:
1474: Infinite chains can appear because corresponding languages have
1475: common infinite subsets generated by cyclic derivations in
1476: grammars. The source of that in \ERA\/ is {\bf term
1477: identification}. We can avoid this problem by imposing the
1478: constraint that grammars must be acyclic. Within the semilattice
1479: \ASDE, the subsemilattice of finite languages\footnote{Notice that
1480: sets of term equalities of a special form corresponding to these
1481: languages were used by V. Sabelfeld to develop effective
1482: algorithms of recognizing equivalence for some classes of program
1483: schemata \cite{Sa/79,Sa/80}.} generated by such grammars satisfies
1484: the chain condition, but such languages are not expressive enough.
1485: Our solution is the following. Grammars are not originally
1486: restricted but if in course of abstract interpretation the grammar
1487: size becomes greater than some parameter, then ``harmful'' cycles
1488: must be destroyed. To this end we remove grammar rules
1489: participating in cyclic derivations. Correctness of this
1490: approximation of intersection follows from {\bf Lemma
1491: \ref{DeleteTerms}}.
1492:
1493: %The lengths of such chains are bounded by $\vert{\bf G}_0\vert$\/ where ${\bf
1494: %G}_0$ are the first acyclic grammars in the chains under consideration.
1495:
1496: Detecting such rules is no simpler than the
1497: ``mi\-ni\-mum-\-feed\-back-arc/ver\-tex-set'' problem ({\bf MFAS}
1498: or {\bf MFVS}) if we consider the grammars as directed graphs.
1499: These sets are the smallest sets of arcs or vertices,
1500: respectively, whose removal makes a graph acyclic. We suppose that
1501: the ``feedback vertices'' choice is more natural for our purposes.
1502: In the general case this problem is ${\EuScript NP}$--hard, but
1503: there are approximate algorithms that solve this problem in
1504: polynomial \cite{Speckenmeyer/89} or even linear \cite{Rosen/82}
1505: time. Consideration of weighted digraphs makes it possible to
1506: distinguish grammar rules with respect to their worth for accuracy
1507: of the analysis algorithm. However, perspectives of this approach
1508: are not clear now for complexity/precision reasons of such
1509: algorithms. For example, \cite{ENSS/95} proposes an algorithm for
1510: weighted {\bf FVS}-problem requiring $O(n^2\mu(n)\log^2n)$\/ time
1511: where $\mu(n)$\/ is complexity of matrix multiplication.
1512:
1513: \begin{figure}[h]
1514: \centering
1515: \unitlength=0.825mm
1516: \linethickness{0.4pt}
1517: %\vskip-1cm
1518: \begin{picture}(160.00,80.00)
1519: %%%%%%%%%%%%%%%%%%%%%%% PIC1.PIC %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1520: \put(12.50,45.00){\oval(25.00,10.00)[]}
1521: \put(5.00,42.00){\framebox(6.00,6.00)[cc]{$g$}}
1522: \put(15.00,42.00){\framebox(6.00,6.00)[cc]{$r$}}
1523: \put(44.00,42.00){\framebox(6.00,6.00)[cc]{$h$}}
1524: \put(54.00,42.00){\framebox(6.00,6.00)[cc]{$y$}}
1525: \put(64.00,42.00){\framebox(6.00,6.00)[cc]{$f$}}
1526: \put(57.00,45.00){\oval(34.00,10.00)[]}
1527: \put(41.80,38.33){\oval(47.60,6.67)[b]}
1528: \put(65.60,42.00){\line(0,-1){4.67}}
1529: \put(66.67,37.33){\line(0,1){0.00}}
1530: \put(18.00,39.75){\vector(0,1){0.20}}
1531: \put(18.00,37.33){\line(0,1){2.00}}
1532: \put(39.50,45.00){\vector(1,0){0.20}}
1533: \put(21.00,45.00){\line(1,0){18.33}}
1534: \put(29.50,15.00){\oval(25.00,10.00)[]}
1535: \put(22.00,12.00){\framebox(6.00,6.00)[cc]{$h$}}
1536: \put(32.00,12.00){\framebox(6.00,6.00)[cc]{$z$}}
1537: \put(28.50,20.33){\vector(3,-4){0.20}}
1538: \multiput(10.00,42.00)(0.12,-0.14){150}{\line(0,-1){0.14}}
1539: \put(21.00,20.33){\vector(2,-3){0.20}}
1540: \multiput(6.00,42.00)(0.12,-0.17){126}{\line(0,-1){0.17}}
1541: \put(35.33,20.33){\vector(-1,-2){0.20}}
1542: \multiput(47.00,42.00)(-0.12,-0.22){98}{\line(0,-1){0.22}}
1543: \put(14.49,11.80){\oval(21.00,13.00)[b]}
1544: \put(4.00,39.75){\vector(0,1){0.20}}
1545: \put(4.00,11.90){\line(0,1){27.67}}
1546: \put(24.00,72.00){\framebox(6.00,6.00)[cc]{$x$}}
1547: \put(34.00,72.00){\framebox(6.00,6.00)[cc]{$f$}}
1548: \put(44.00,72.00){\framebox(6.00,6.00)[cc]{$a$}}
1549: \put(37.00,75.00){\oval(34.00,10.00)[]}
1550: \put(75.00,38.33){\oval(12.00,6.67)[b]}
1551: \put(69.00,42.00){\line(0,-1){4.00}}
1552: \put(78.00,72.67){\oval(6.00,5.33)[rt]}
1553: \put(81.00,37.33){\line(0,1){35.67}}
1554: \put(78.00,75.33){\vector(-1,0){23.67}}
1555: \put(35.00,72.00){\vector(-1,-1){22.00}}
1556: \put(38.33,72.00){\vector(3,-4){16.25}}
1557: \put(40.00,0.00){\makebox(0,0)[cc]{{\small$L({\bf G})$}}}
1558: %%%%%%%%%%%%%%%%%%%%%%% PIC2.PIC %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1559: \put(117.50,45.00){\oval(25.00,10.00)[]}
1560: \put(110.00,42.00){\framebox(6.00,6.00)[cc]{$g$}}
1561: \put(120.00,42.00){\framebox(6.00,6.00)[cc]{$r$}}
1562: \put(149.00,42.00){\framebox(6.00,6.00)[cc]{$y$}}
1563: \put(159.00,42.00){\framebox(6.00,6.00)[cc]{$h$}}
1564: \put(171.67,37.33){\line(0,1){0.00}}
1565: \put(144.50,45.00){\vector(1,0){0.20}}
1566: \put(126.00,45.00){\line(1,0){18.33}}
1567: \put(136.50,15.00){\oval(15.00,10.00)[]}
1568: \put(133.67,12.00){\framebox(6.00,6.00)[cc]{$z$}}
1569: \put(123.67,72.00){\framebox(6.00,6.00)[cc]{$x$}}
1570: \put(133.67,72.00){\framebox(6.00,6.00)[cc]{$f$}}
1571: \put(143.67,72.00){\framebox(6.00,6.00)[cc]{$a$}}
1572: \put(136.67,75.00){\oval(34.00,10.00)[]}
1573: \put(157.50,45.00){\oval(25.00,10.00)[]}
1574: \put(135.00,72.00){\vector(-3,-4){16.25}}
1575: \put(138.00,72.00){\vector(3,-4){16.25}}
1576: \put(111.67,42.00){\vector(3,-4){18.00}}
1577: \put(161.00,42.00){\vector(-3,-4){18.00}}
1578: \put(114.67,42.00){\vector(1,-1){22.00}}
1579: \put(136.67,0.00){\makebox(0,0)[cc]{{\small$L({\bf G})\setminus_{\bf fvs}$}}}
1580: \end{picture}
1581: %
1582: %%%%%%%%%%%%%%%%%%%%%%% mfvs.eps %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1583: %
1584: %\vskip 2cm
1585: \epsfig{file=mfvs.eps,width=390pt}
1586: %\vskip 2cm
1587: \caption{{\bf FVS}--transformation.}\label{MFVS}
1588: \end{figure}
1589:
1590: Our widening operator for the analysis of equality relationships
1591: is defined in the following way. A vertex set of the cyclomatic
1592: graph\footnote{A graph represents cyclic derivations in the
1593: grammar. The author did not find another appropriate name for this
1594: object in the Computer Science and Discrete Mathematics
1595: literature.} of the grammar {\bf G} is a set of functional symbols
1596: existing in {\bf G}. An arc $(f,g)$\/ belongs to its arc set if
1597: {\bf G} contains rules $A\rightarrow f(\ldots,B,\ldots)$\/ and
1598: $B\rightarrow g(\ldots)$. A transformation of a cyclomatic graph
1599: which involves detecting some {\bf FVS} (it can be an upper
1600: approximation of a minimal feedback set) and removing all vertices
1601: from the {\bf FVS} is said to be an {\bf FVS}--transformation (an
1602: example is shown in {\bf Figure \ref{MFVS}}). Let \MFVS{L}\/ be a
1603: language obtained from $L$\/ by {\bf FVS}--transformation applied
1604: to the grammar generating $L$. We define
1605: \[
1606: L(\mbox{\bf G}_1)\DWID L(\mbox{\bf G}_2) =
1607: \left\{\begin{array}{ll}
1608: \MFVS{L(\mbox{\bf G}_1)}~\ACAP~L(\mbox{\bf G}_2) &
1609: \mbox{~if~~}\vert\mbox{\bf G}_2\vert\geq\vert\mbox{\bf G}_1\vert>d,\\
1610: L(\mbox{\bf G}_1)~\ACAP~\MFVS{L(\mbox{\bf G}_2)} &
1611: \mbox{~if~~}\vert\mbox{\bf G}_1\vert>\vert\mbox{\bf G}_2\vert>d,\\
1612: L(\mbox{\bf G}_1)~\ACAP~L(\mbox{\bf G}_2) & \mbox{~otherwise,}
1613: \end{array}\right.
1614: \]
1615: where $d$\/ is a user-defined parameter. It is reasonable to
1616: choose this parameter, depending on number of variables of
1617: analyzed programs, as a linear function with a small factor of
1618: proportionality. Notice that in this case the lengths of appearing
1619: chains linearly depend on number of variables living
1620: simultaneously.
1621:
1622:
1623: \subsection{Divergence of the analysis}
1624:
1625: Is the widening operator, being rather complex, really needed for
1626: the analysis of equality relationships? Are there programs which,
1627: being analyzed, generate infinite chains of semantic properties?
1628: It should be mentioned that constructing such program examples has
1629: been a problem for a long time. In \cite{Em/96} we stated our
1630: belief that their existence seems hardly probable. These attempts
1631: failed, because they concentrated on constructing an example with
1632: completely non-interpretable functional symbols, i.e. in the frame
1633: of the ``pure'' theory of program schemata.
1634:
1635: As already noticed, we can widely vary the interpretability of the
1636: analysis algorithm. In order to construct an example, it will
1637: suffice to use the following rule of completion:
1638: \[
1639: \mbox{if } (t_1=t_2)=TRU\!E\in L \mbox{ then {\bf Co}}(L)=\SOP{L}{t_1\equiv t_2}.
1640: \]
1641:
1642: We consider the following example:
1643:
1644: \vskip\baselineskip
1645: \begin{center}
1646: \hspace*{-7.5mm}\begin{tabular}{ccc}
1647: \begin{tabular}{l}
1648: %\setlength{\baselineskip}{1pt}
1649: ...\\\vspace*{-2pt}
1650: x:=f(y);\\\vspace*{-2pt}
1651: \KW{if}~ f(x)=f(y) ~\KW{then}\\\vspace*{-2pt}
1652: ~~ \KW{while}~ y=f(g(y)) ~\KW{do}\\\vspace*{-2pt} %
1653: ~~~~ y:=g(y)\\\vspace*{-2pt} ~~%
1654: \KW{end}\\\vspace*{-2pt}
1655: \KW{end}\\\vspace*{-2pt}
1656: ...\\
1657: \hspace*{6mm}{\bf program scheme}
1658: \end{tabular}
1659: &\hspace*{7.5mm}&
1660: \begin{tabular}{l}
1661:
1662: ...\\\vspace*{-2pt}
1663: x:=sign(y);\\\vspace*{-2pt}
1664: \KW{if}~ sign(x)=sign(y) ~\KW{then}\\\vspace*{-2pt}
1665: ~~ \KW{while}~ y=sign(abs(y)) ~\KW{do}\\\vspace*{-2pt} %
1666: ~~~~ y:=abs(y)\\\vspace*{-2pt} ~~%
1667: \KW{end}\\\vspace*{-2pt}
1668: \KW{end}\\\vspace*{-2pt}
1669: ...\\
1670: \hspace*{5mm}{\bf ``real-world'' program}
1671: \end{tabular}\\
1672: \end{tabular}
1673: \end{center}
1674: \vskip\baselineskip
1675:
1676:
1677: The properties computed at the body entry belong to an infinite
1678: decreasing chain. On {\bf Figure \ref{RealChain}} a state $L_e$\/
1679: describes properties valid before cycle execution; states $L_1$\/
1680: and $L_2$\/ describe properties at the entry of the cycle body for
1681: first and second iteration, respectively. It is easy to see that
1682: $L_1\ACAP L_2$\/ coincides with $L_2$\/ except in the equality
1683: relationships containing terms generated by $g^*$. Therefore every
1684: time we obtain the next state, the functional element $g^*$\/ is
1685: absent and the sub-net placed in the dashed box will repeat more
1686: and more times.
1687:
1688:
1689: \begin{figure}[t]
1690: \centering
1691: {%\small
1692: \unitlength=0.85mm
1693: %\special{em:linewidth 0.4pt}
1694: \linethickness{0.4pt}
1695: \begin{picture}(140,100.00)
1696: \put(4.00,81.00){\framebox(4.00,4.00)[cc]{$x$}}
1697: \put(10.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}
1698: \put(12.00,81.00){\vector(0,-1){13.00}}
1699: \put(16.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}
1700: \put(12.00,83.00){\oval(22.00,10.00)[]}
1701: \put(22.50,76.00){\oval(9.00,10.00)[b]}
1702: \put(22.50,90.00){\oval(9.00,9.00)[t]}
1703: \put(18.00,90.00){\vector(0,-1){2.00}}
1704: \put(27.00,90.00){\line(0,-1){14.00}}
1705: \put(18.00,81.00){\line(0,-1){5.00}}
1706: \put(10.00,61.00){\framebox(4.00,4.00)[cc]{$y$}}
1707: \put(12.00,63.00){\circle{10.00}}
1708: \put(12.00,50.00){\makebox(0,0)[cc]{$L_e$}}
1709:
1710: \put(54.00,81.00){\framebox(4.00,4.00)[cc]{$x$}}
1711: \put(60.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}
1712: \put(62.00,81.00){\vector(0,-1){13.00}}
1713: \put(66.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}
1714: \put(62.00,83.00){\oval(22.00,10.00)[]}
1715: \put(72.50,76.00){\oval(9.00,10.00)[b]}
1716: \put(72.50,90.00){\oval(9.00,9.00)[t]}
1717: \put(68.00,90.00){\vector(0,-1){2.00}}
1718: \put(77.00,90.00){\line(0,-1){14.00}}
1719: \put(68.00,81.00){\line(0,-1){5.00}}
1720: \put(62.00,63.00){\oval(18.00,10.00)[]}
1721: \put(56.00,61.00){\framebox(4.00,4.00)[cc]{$y$}}
1722: \put(63.00,61.00){\framebox(4.00,4.00)[cc]{$f$}}
1723: \put(65.00,61.00){\vector(0,-1){13.00}}
1724: \put(65.00,43.00){\circle{10.00}}
1725: \put(63.00,41.00){\framebox(4.00,4.00)[cc]{$g$}}
1726: \put(65.00,41.00){\line(0,-1){5.00}}
1727: \put(69.50,36.00){\oval(9.00,9.00)[b]}
1728: \put(74.00,36.00){\line(0,1){24.00}}
1729: \put(71.00,60.00){\oval(6.00,6.00)[rt]}
1730: \put(71.50,63.00){\vector(-1,0){0.75}}
1731: \put(62.00,29.00){\makebox(0,0)[cc]{$L_1$}}
1732:
1733: \put(114.00,83.00){\oval(22.00,10.00)[]}
1734: \put(106.00,81.00){\framebox(4.00,4.00)[cc]{$x$}}
1735: \put(112.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}
1736: \put(114.00,81.00){\vector(0,-1){13.00}}
1737: \put(118.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}
1738: \put(124.50,76.00){\oval(9.00,10.00)[b]}
1739: \put(124.50,90.00){\oval(9.00,9.00)[t]}
1740: \put(120.00,90.00){\vector(0,-1){2.00}}
1741: \put(129.00,90.00){\line(0,-1){14.00}}
1742: \put(120.00,81.00){\line(0,-1){5.00}}
1743: \put(103.00,56.00){\dashbox(22.00,14.00){}}
1744: \put(112.00,61.00){\framebox(4.00,4.00)[cc]{$f$}}
1745: \put(114.00,63.00){\circle{10.00}}
1746: \put(114.00,61.00){\vector(0,-1){13.00}}
1747: \put(114.00,43.00){\oval(22.00,10.00)[]}
1748: \put(106.00,41,00){\framebox(4.00,4.00)[cc]{{\footnotesize$\!g^*$}}}
1749: \put(108.00,41.00){\line(0,-1){5.00}}
1750: \put(103.50,37.00){\oval(9.00,9.00)[b]}
1751: \put(99.00,37.00){\line(0,1){24.00}}
1752: \put(102.00,61.00){\oval(6.00,6.00)[lt]}
1753: \put(102.00,64.00){\vector(1,0){7.0}}
1754: \put(112.00,41.00){\framebox(4.00,4.00)[cc]{$f$}}
1755: \put(118.00,41.00){\framebox(4.00,4.00)[cc]{$y$}}
1756: \put(114.00,41.00){\vector(0,-1){13.00}}
1757: \put(114.00,23.00){\circle{10.00}}
1758: \put(112.00,21.00){\framebox(4.00,4.00)[cc]{$g$}}
1759: \put(114.00,21.00){\line(0,-1){5.00}}
1760: \put(121.25,16.00){\oval(14.50,9.00)[b]}
1761: \put(128.50,16.00){\line(0,1){24.00}}
1762: \put(125.50,40.00){\oval(6.00,6.00)[rt]}
1763: \put(125.50,43.00){\vector(-1,0){0.75}}
1764: \put(114.00,3.00){\makebox(0,0)[cc]{$L_2$}}
1765: \end{picture}
1766: \caption{The case of divergence of the analysis.}\label{RealChain} }
1767: \end{figure}
1768:
1769: To obtain a ``real-world'' program from this program scheme, we
1770: interpret the functional symbols in the following manner: $f\equiv
1771: sign$\/ and $g\equiv abs$. We would like to point out the
1772: following interesting property. Execution of this piece of code
1773: (i.e., its behavior determined with the standard semantics)
1774: diverges only for two values of \OPR{y}: 0 and 1. At the same time
1775: the analysis algorithm (i.e. execution of the piece of code under
1776: our nonstandard semantics) is always divergent on condition that a
1777: widening operator is not used and the assumption on interpretation
1778: mentioned above holds.
1779:
1780: Is this program actually real-world? The reader should decide that
1781: by himself but we would like to underline the
1782: following\footnote{The penetrating reader may notice that this
1783: program to be considered as human written is really stupid (it can
1784: be slightly intellectualized if we interpret $g$\/ as ``{\em add
1785: 1}''). But for automatic generators of programs such a code does
1786: not seem improbable.}. On the one hand, the interpretability of
1787: the analysis algorithm can be varied in wide ranges and, on the
1788: other, we are not able to prove formally the impossibility of such
1789: behavior of the analyzer under the considered interpretation.
1790: So, we can choose either a lean analysis using acyclic grammars
1791: only or another one using arbitrary grammars and some widening
1792: operator.
1793:
1794:
1795: \subsection{Complexity of the analysis}\label{ComplexityERA}
1796:
1797: In \cite{Em/96} we pointed out the following upper bound on time
1798: for the algorithm of \ERA: $O(nmG^2_{max})$, where $n$\/ is
1799: program size, $m$\/ is maximum of number of program variables
1800: existing at a time, and $G_{max}$\/ is maximum of sizes of
1801: grammars appearing in course of the analysis. Due to our
1802: construction of widening operator (namely, choice of the parameter
1803: $d$\/ linearly depended on $m$) we can assume that $G_{max}=O(m)$.
1804:
1805:
1806: This bound can be deduced with help of {\bf Theorem 6} in
1807: \cite{Bou/93a}. The theorem states that for the recursive strategy
1808: of chaotic iteration maximum complexity is
1809: \[
1810: h\cdot\sum_{c\in C}\delta(c) \hspace*{10mm}
1811: \left(\strut\leq h\cdot\vert C\vert\cdot\vert W\vert\right),
1812: \lefteqn{\hspace*{2.5cm}{\bf(**)}}
1813: \]
1814: where $h$\/ is maximum length of increasing chains built by a
1815: widening operator, $C$\/ is set of control program points,
1816: $\delta(c)$\/ is depth of the control point $c$\/ in hierarchy of
1817: nested strongly connected components of the control flow graph
1818: containing $c$, and $W$\/ is set of vertices where a widening
1819: operator is applied during the analysis.
1820:
1821: For well-structured programs we can assume that maximum depth of
1822: nested loops does not depend on program size and is bounded by
1823: some constant. By ${\bf(**)}$\/ we conclude that number of
1824: algorithm steps does not exceed $O(nm)$. Since time complexity of
1825: all operations used in the analysis are estimated by $G^2_{max}$\/
1826: we obtain $O(nmG^2_{max})$\/ (or $O(nm^3)$) upper bound. Notice
1827: that to improve the results of the analysis it is possible to use
1828: rich semantic completion and more precise {\bf FVS}--algorithms
1829: that have more than quadratic time complexity.
1830:
1831: However, experimental results show that an approximation of a
1832: fixed point for the heads of cycle bodies is usually attained
1833: after at most two iterations and time complexity of the analysis
1834: is proportional to $nG_{max}$. Also, the user can turn off
1835: checking a threshold after which widening is started. In this
1836: case, he (consciously) admits some chance that the analysis
1837: diverges but we believe that this chance is not too big.
1838:
1839: It is easy to see that the space complexity of the equality relationship
1840: analysis is $O(nG_{max})$\/ and it is essentially depended
1841: on the number of variables.
1842: %Unfortunately, this bound does not provide useful information about the actual
1843: %space requirements for the analysis algorithm.
1844: We estimate the actual space requirements as 1.5--2.0 Mb
1845: per 1000 program lines for middle-size programs.
1846: %and emphasize that it
1847: %comes into particular importance for virtual memory systems. Random management
1848: %of swapping makes impracticable analyzing large programs in small RAM.
1849:
1850:
1851:
1852: \section{Processing of invariants and experimental results}\label{Experiments}
1853:
1854:
1855: \subsection{Usage of \ERA--invariants}
1856:
1857: \ERA\/ produces some set of invariants involving program terms
1858: which can be useful at different steps of program development and
1859: processing: debugging, verification (for that the invariants are
1860: interesting themselves), specialization, and optimization. The
1861: automatic prover mentioned above can be used at step of
1862: post-processing results of the analysis.
1863:
1864: We notice that the analyzer can tell the user useful information both at the stage of
1865: analyzing (this means that it is possible that there exist execution traces where
1866: such computational states appear; we mark with $+$\/ properties which can be detected
1867: at this stage) and at the stage of processing of results of the analysis
1868: (these properties hold for each execution trace leading to this program point).
1869: We shall briefly list some program properties that can be extracted from computation
1870: states $L_{in}$\/ and $L_{out}$\/ being for a statement {\bf S} the input and output
1871: states respectively.
1872:
1873:
1874: \begin{description}
1875: \item[$+$] {\em Variable $x$\/ has an indefinite value}\/
1876: if $L_{in}$\/ contains $x=\omega$.
1877:
1878: %If $x$\/ is a variable corresponding
1879: %to a function then existence of such an identity in entry states for
1880: %return statements of the function or the output state for the function body
1881: %says that a result of evaluation of the function may be undefined.
1882:
1883: \item[$+$] {\em Error in evaluation of an expression}: division by zero,
1884: out of type ranges, nil--pointer dereferencing, etc.
1885:
1886: \item[$-$] {\em {\bf S} is inaccessible} if $L_{in}=\ATOP$. This information
1887: can correspond to different properties of program execution:
1888: potentially infinite cycles and recursive calls, dead branches of
1889: conditional statements, useless procedural definitions, etc.
1890:
1891: \item[$-$] {\em Assignment statement {\bf v:=exp} is redundant} if
1892: $L_{out}$\/ contains $v=v^\prime$\/ where $v^\prime$\/ is the
1893: variable associated with $v$.
1894:
1895: \item[$-$] {\em Unused definitions} (constants, variables, types).
1896:
1897: \item[$-$] {\em Constant propagation}. Notice that \ERA\/ can detect that an expression
1898: is constant not only when constants for all variables in this expression
1899: are known.
1900:
1901: \item[$-$] More general: for some expression {\em there exists an expression that is
1902: equal to the original one and calculated more efficiently}\/
1903: (with respect to an given criterion
1904: time/space and target computer architecture).
1905: \end{description}
1906: Obviously, this list is not complete and there are many other properties
1907: which can be extracted from the invariants. For example, we can consider
1908: systems of equations/inequalities contained in the gathered invariants
1909: and try to solve them to derive more precise ranges for values of expressions
1910: or the inconsistency of this computational state.
1911:
1912: Apart from the automatic mode when invariants are processed
1913: automatically, we provide an interactive mode to visualize results
1914: of our analysis in a hypertext system. {\em HyperCode} presented
1915: in \cite{BuKo/98,BBEF/2001} is an open tunable system for
1916: visualization of properties of syntactic structures. There are two
1917: cases: visualization of all properties detected in the automatic
1918: mode and the user-driven processing and visualization of
1919: properties.
1920:
1921: The experiments show that not all program properties of interest
1922: can be automatically extracted out of the computed invariants. It
1923: is not judicious to consider many particular cases and to hardly
1924: embed them into the system. Instead, the system facilitates the
1925: specification of the user request with some friendly interface. He
1926: chooses a program point and an expression and obtains those and
1927: only those equality relationships, valid at this point, where this
1928: expression occurs as a super- or sub-term.
1929: %The following strategies of selection of the subsets of properties are provided:
1930: %{\bf a)} all terms occurring as sub-terms of the term under consideration; {\bf b)}
1931: %all terms using the term under consideration as a sub-term; {\bf c)} the union of sets
1932: %determined by the previous cases {\bf a)} and {\bf b)}. These problems are
1933: %obviously reduced to computing a transitive closure of the corresponding
1934: %digraph with respect to some vertex and orientation of arcs.
1935:
1936:
1937:
1938: \subsection{Program examples}
1939:
1940:
1941: An example of a program is presented below. The properties detected by the
1942: analyzer are indicated in comments.
1943:
1944: \vskip\baselineskip
1945: \begin{tabular}{ll}
1946: \KW{var} x,y,z: \KW{integer}; &\\\vspace*{-2pt}
1947: \KW{proce}\lefteqn{\mbox{\rm\KW{dure} P(a,b:
1948: \KW{integer}):\KW{integer};}}&\\\vspace*{-2pt} \KW{begin}
1949: &(*parameters are always equal*)\\\vspace*{-2pt} ~~ \KW{return}
1950: a+b &(*expression can be simplified:
1951: {\bf 2*a}*)\\\vspace*{-2pt} \KW{end} P; &\\\vspace*{-2pt}
1952: \KW{begin} &\\\vspace*{-2pt} ~~ Read(x); &\\\vspace*{-2pt} ~~
1953: \KW{while}~ x$\leq$0 ~\KW{do} &\\\vspace*{-2pt} ~~~~ Read(x);
1954: &\\\vspace*{-2pt} ~~~~ x := x+1; &\\\vspace*{-2pt} ~~~~ z :=
1955: x+z; &(*variable {\bf z} might be uninitialized*)\\\vspace*{-2pt}
1956: ~~~~ y := x+1; &\\\vspace*{-2pt} ~~~~
1957: \KW{if}~ x=0 ~\KW{then} &\\\vspace*{-2pt}
1958: ~~~~~~ z := y; &(*r-value can be
1959: simplified: {\bf z:=1}*)\\\vspace*{-2pt} ~~~~ \KW{else}
1960: &\\\vspace*{-2pt} ~~~~~~ z := x+1; &(*r-value can be simplified:
1961: {\bf z:=y}*)\\\vspace*{-2pt} ~~~~~~ x := y; &\\\vspace*{-2pt} ~~~~
1962: \KW{end}; &\\\vspace*{-2pt} ~~~~ Write( P(y,z) ) &(*call can be
1963: transformed: {\bf Write(2*y)}*)\\\vspace*{-2pt} ~~ \KW{end};
1964: &\\\vspace*{-2pt} ~~ x := z \KW{div} (y - z); &(*arithmetical
1965: error*)\\\vspace*{-2pt} ~~ Write(x) &(*inaccessible
1966: point*)\\\vspace*{-2pt}
1967: \KW{end.}\\
1968: \end{tabular}
1969: \vskip\baselineskip
1970:
1971: \noindent On basis of the analysis, this program can be
1972: transformed into the following:
1973:
1974: \vskip\baselineskip
1975: \begin{center}
1976: \begin{tabular}{l}
1977: \KW{var} x:\KW{integer};\\\vspace*{-2pt}
1978: \KW{begin}\\\vspace*{-2pt} ~~ Read(x);\\\vspace*{-2pt} ~~
1979: \KW{while}~ x$\leq$0 ~\KW{do}\\\vspace*{-2pt} ~~~~ Read(x);~~
1980: x := x+2;~~ Write(2*x)\\\vspace*{-2pt} ~~
1981: \KW{end}\\\vspace*{-2pt} ~~ \KW{ERROR\_EXCEPTION};\\\vspace*{-2pt}
1982: \KW{end.}\\
1983: \end{tabular}
1984: \end{center}
1985: \vskip\baselineskip
1986:
1987: \begin{table}
1988: \begin{center}
1989: \begin{tabular}{|| l || c | c | c || c | c | c ||}
1990: \hline\hline
1991: \raisebox{-2ex}{program}
1992: & \multicolumn{3}{c||}{length (lines)} & \multicolumn{3}{c||}{size (bytes)}\\
1993: \cline{2-4} \cline{5-7}
1994: & {\em M2Mix} & \ERA & improv. & {\em M2Mix} & \ERA & improv.\\
1995: \hline
1996: \bf KMP & 167 & 133 & 20.35\% & 2996 & 2205& 26.4\%\\
1997: \bf Lambert & 361 & 326 & 9.7\% & 6036 & 2564& 57.5\%\\
1998: \bf Automaton & 37 & 35 & 5.4\% & 969 & 926 & 4.5\%\\
1999: \bf Int$_{Fib}$& 87 & 77 & 11.5\% & 1647 & 1432& 13.05\%\\
2000: \bf Ackerman & 64 & 62 & 3.1\% & 1384 & 1322& 4.5\%\\
2001: \hline
2002: & \multicolumn{2}{r|}{average}&10.01\%&\multicolumn{2}{r|}{average}&21.19\%\\
2003: \hline\hline
2004: \end{tabular}
2005: \caption{Comparison \ERA\/ and {\em
2006: M2Mix}.}\label{M2Mix-analysis}\label{TableCMP}
2007: \end{center}
2008: \end{table}
2009:
2010: In {\bf Table \ref{M2Mix-analysis}} we present some results of
2011: optimization based on our analysis of residual programs generated
2012: by {\em M2Mix} specializer \cite{BuKo/96,Ko/95:phd}. To compile
2013: these examples, we used XDS Modula/Obe\-ron compiler v.2.30
2014: \cite{XDS/2000}. The following programs have been investigated:
2015:
2016: \begin{itemize}
2017: \item {\bf KMP} --- the ``na{\"\i}ve'' matching algorithm
2018: specialized with respect to some pattern;
2019: the residual program is comparable to
2020: Knuth, Morris, and Pratt's algorithm in efficiency.
2021: (see also {\bf Appendix}).
2022:
2023: \item {\bf Lambert} --- a program drawing Lambert's figure
2024: and specialized with respect to number of points.
2025:
2026: \item {\bf Automaton} --- an interpreter of a deterministic finite-state
2027: automaton specialized with respect to some language.
2028:
2029: \item {\bf Int$_{Fib}$} --- an interpreter of MixLan \cite{Ko/95:phd}
2030: specialized with respect to a program computing
2031: Fibonacci numbers.
2032:
2033: \item {\bf Ackerman} --- a program computing some values of Akcerman's
2034: function and specialized with respect to its
2035: first argument.
2036: \end{itemize}
2037:
2038: Let us comment briefly on the obtained results. Reducing length of
2039: a program can be considered as reducing number of operators and
2040: declarations. In these examples the optimizing effect was
2041: typically attained by the removal of redundant assignments, dead
2042: operators, unused variables and the reduction of operator
2043: strength. The only exception is {\bf KMP} program characterized by
2044: high degree of polyvariance (roughly speaking it means presence of
2045: deep--nested conditional statements) and an active usage of array
2046: references. Here some IF--statements with constant conditions and
2047: redundant range checks were eliminated. Notice that the last
2048: optimizing transformation is very important for Modula--like
2049: languages where such checks are defined by the language standard.
2050: Such notable optimizing effect for the {\bf Lambert} program is
2051: explained by deep reduction of power of floating-point operations
2052: which cannot be achieved by optimizing techniques now used in
2053: compilers. Since {\bf Automaton} and {\bf Ackerman} programs are
2054: quite small, their optimization gives conservative results.
2055: However, they would be better for the {\bf Ackerman} program if
2056: the implementation of \ERA\/ were context-sensitive. Substantial
2057: speed-up of these optimized programs was not obtained (it was less
2058: than 2\%) and this is not surprising since the great bulk of
2059: specializers take it as a criterion of optimality.
2060:
2061: These experiments show that an average reduction of size of
2062: residual programs is 20--25\%. Because the case of {\bf KMP}
2063: program seems to be the most realistic\footnote{Unfortunately our
2064: experiments were not exhaustive enough since the partial
2065: evaluation is not involved yet in real technological process of
2066: the software development and hence finding large resudial programs
2067: is a hard problem.}, we suppose that such improvement can be
2068: achieved in practice for real--world programs and it will be
2069: increased for large residual programs with a
2070: high degree of polyvariance %(for example, deep hierarchy of nested conditional statements)
2071: and active usage of arrays and float-point arithmetics. It is the
2072: author opinion that the analysis of automatically generated (from
2073: high--level specifications as well) programs is the most promising
2074: direction of its application, especially in the context-sensitive
2075: implementation of \ERA.
2076:
2077:
2078:
2079: \section*{Acknowledgement} The author wishes to thank M.A.
2080: Bulyonkov, P. Cousot, R. Cousot, and V.K. Sabelfeld for support,
2081: useful discussions and remarks.
2082: %\end{acknowledgement}
2083:
2084:
2085:
2086:
2087: %\bibliography{era}
2088: %\bibliographystyle{plain}
2089:
2090:
2091:
2092: \begin{thebibliography}{10}
2093:
2094: \bibitem{AU/72}
2095: A.~Aho and J.~Ullman.
2096: \newblock {\em The theory of parsing, translation and compilation}, volume~1.
2097: \newblock Prentice-Hall Inc., 1972.
2098:
2099: \bibitem{AWZ/88}
2100: B.~Alpern, M.N. Wegman, and F.K. Zadeck.
2101: \newblock Detecting equality of variables in programs.
2102: \newblock In {\em Proc. of the 15$^{th}$ Annual ACM Symposium Principles of
2103: Programming Languages}, pages 1--11. ACM Press, 1988.
2104:
2105: \bibitem{BBEF/2001}
2106: D.~Baburin, M.~Bulyonkov, P.~Emelianov, and N.~Filatkina.
2107: \newblock Visualization facilities in program reengineering.
2108: \newblock {\em Programmirovanie}, 27(2):21--33, 2001.
2109:
2110: \bibitem{BerMar/76}
2111: L.~Berman and G.~Markowsky.
2112: \newblock Linear and non-linear approximate invariants.
2113: \newblock Research Report RC7241, T.J. Watson Research Center of IBM, Yorktown,
2114: NY, 1976.
2115:
2116: \bibitem{Bou/93a}
2117: F.~Bourdoncle.
2118: \newblock Efficient chaotic iteration strategies with widenings.
2119: \newblock In {\em Proc. of the International Conference on Formal Methods in
2120: Programming and Their Applications}, volume 735 of {\em Lecture Notes in
2121: Computer Science}, pages 129--141. Springer-Verlag, 1993.
2122:
2123: \bibitem{BuKo/96}
2124: M.~Bulyonkov and D.~Kochetov.
2125: \newblock Practical aspects of specialization of {A}lgol-like programs.
2126: \newblock In {\em Proc. of the International Seminar on Partial Evaluation},
2127: volume 1110 of {\em Lecture Notes in Computer Science}, pages 17--32.
2128: Springer-Verlag, 1996.
2129:
2130: \bibitem{BuKo/98}
2131: M.~Bulyonkov and D.~Kochetov.
2132: \newblock Visualization of program properties.
2133: \newblock Research Report~51, Institute of Informatics Systems, Novosibirsk,
2134: Russia, 1998.
2135:
2136: \bibitem{ClarisoCortadella-SAS04}
2137: R.~Claris\'o and J.~Cortadella.
2138: \newblock The octahedron abstract domain.
2139: \newblock In {\em Proc. of the 11$^{th}$ International Static Analysis
2140: Symposium}, volume 3148 of {\em Lecture Notes in Computer Science}, pages
2141: 312--327. Springer--Verlag, 2004.
2142:
2143: \bibitem{CC/77}
2144: P.~Cousot and R.~Cousot.
2145: \newblock Abstract interpretation: {A} unified lattice model for static
2146: analysis of programs by construction or approximation of fixpoints.
2147: \newblock In {\em Rec. of the 4$^{th}$ ACM Symposium on Principles of
2148: Programming Languages}, pages 238--252. ACM Press, 1977.
2149:
2150: \bibitem{CC/92b}
2151: P.~Cousot and R.~Cousot.
2152: \newblock Abstract interpretation and application to logic program.
2153: \newblock {\em Journal of Logic Programming}, 13(2--3):103--180, 1992.
2154:
2155: \bibitem{CC/92a}
2156: P.~Cousot and R.~Cousot.
2157: \newblock Abstract interpretation frameworks.
2158: \newblock {\em Journal of Logic and Computation}, 2(4):511--547, 1992.
2159:
2160: \bibitem{CC/95}
2161: P.~Cousot and R.~Cousot.
2162: \newblock Formal languages, grammar and set--constraint--based program analysis
2163: by abstract interpretation.
2164: \newblock In {\em Rec. of the Conference on Functional Programming Languages
2165: and Computer Architecture}, pages 170--181. ACM Press, 1995.
2166:
2167: \bibitem{CousotCousot-CAV02}
2168: P.~Cousot and R.~Cousot.
2169: \newblock Software analysis and model checking.
2170: \newblock In {\em Proceedings of the 14$^{th}$ International Conference on
2171: Computer Aided Verification}, volume 2404 of {\em Lecture Notes in Computer
2172: Science}, pages 37--56. Springer-Verlag, 2002.
2173:
2174: \bibitem{CouHal/78}
2175: P.~Cousot and N.~Halbwachs.
2176: \newblock Automatic discovery of linear restraints among variables of a
2177: program.
2178: \newblock In {\em Records of the 5$^{th}$ annual {ACM} Symposium on Principles
2179: of Programming Languages}, pages 84--96. ACM, ACM Press, January 1978.
2180:
2181: \bibitem{Deu/94}
2182: A.~Deutsch.
2183: \newblock Interprocedural may--alias analysis for pointers: beyond
2184: $k$-limiting.
2185: \newblock {\em SIGPLAN Notices}, 29(6):230--241, 1994.
2186: \newblock Proc. of the ACM SIGPLAN'94 Conference on Program Language Design and
2187: Implementation.
2188:
2189: \bibitem{SAS-4/97}
2190: P.G. Emelianov and D.E. Baburin.
2191: \newblock Semantic analyzer of {M}odula-programs.
2192: \newblock In {\em Proc. of the 4$^{th}$ International Static Analysis
2193: Symposium}, volume 1302 of {\em Lecture Notes in Computer Science}, pages
2194: 361--363. Springer--Verlag, 1997.
2195:
2196: \bibitem{EmSa/94}
2197: P.G. Emelianov and V.K. Sabelfeld.
2198: \newblock Analyzer of semantic properties of {M}odula-programms.
2199: \newblock In {\em Software intellectualization and quality}, pages 100--107.
2200: Institute of Informatics Systems, Novosibirsk, Russia, 1994.
2201:
2202: \bibitem{Em/96}
2203: P.G. Emelianov.
2204: \newblock Analysis of the equality relation for the program terms.
2205: \newblock In {\em Proc. of the Third International Static Analysis Symposium},
2206: volume 1145 of {\em Lecture Notes in Computer Science}, pages 174--188.
2207: Springer--Verlag, 1996.
2208:
2209: \bibitem{ENSS/95}
2210: G.~Even, J.~Naor, B.~Schieber, and M.~Sudan.
2211: \newblock Approximating minimum feedback sets and multi-cuts.
2212: \newblock In {\em Proc. of the 4$^{th}$ International Conference on Integer
2213: Programming and Combinatorial Optimization}, volume 920 of {\em Lecture Notes
2214: in Computer Science}, pages 14--28. Springer--Verlag, 1995.
2215:
2216: \bibitem{XDS/2000}
2217: {\rm Excelsior LLC}.
2218: \newblock Native xds-x86 modula-2/oberon-2 development toolset.
2219:
2220: \bibitem{Gargi-PLDI02}
2221: K.~Gargi.
2222: \newblock A sparse algorithm for predicated global value numbering.
2223: \newblock In {\em Proceedings of the ACM SIGPLAN 2002 Conference on Programming
2224: Language Design and Implementation}, pages 45--56, 2002.
2225:
2226: \bibitem{HOOTS/98}
2227: A.D. Gordon and A.M. Pitts, editors.
2228: \newblock {\em High Order Operational Techniques in Semantics}.
2229: \newblock Publications of Newton Institute. Cambridge University Press, 1998.
2230:
2231: \bibitem{Gra/89}
2232: Ph. Granger.
2233: \newblock Static analysis of arithmetical congruences.
2234: \newblock {\em International Journal of Computer Mathematics}, 30:165--199,
2235: 1989.
2236:
2237: \bibitem{Gra/91b}
2238: Ph. Granger.
2239: \newblock Static analysis of linear congruence equalities among variables of a
2240: program.
2241: \newblock In {\em Proceedings of the International Joint Conference on Theory
2242: and Practice of Software Developement}, volume 493 of {\em Lecture Notes in
2243: Computer Science}, pages 169--192. Springer--Verlag, 1991.
2244:
2245: \bibitem{GulwaniNecula-POPL03}
2246: S.~Gulwani and G.C. Necula.
2247: \newblock Discovering affine equalities using random interpretation.
2248: \newblock In {\em Proc. of the ACM SIGPLAN-SIGACT 2003 Principles of
2249: Programming Languages}, pages 74--84, 2003.
2250:
2251: \bibitem{GulwaniNecula-SAS04}
2252: S.~Gulwani and G.C. Necula.
2253: \newblock A polynomial-time algorithm for global value numbering.
2254: \newblock In {\em Proc. of the 11$^{th}$ International Static Analysis
2255: Symposium}, volume 3148 of {\em Lecture Notes in Computer Science}, pages
2256: 212--227. Springer--Verlag, 2004.
2257:
2258: \bibitem{HalbwachsProyRoumanoff/97}
2259: N.~Halbwachs, Y.-E. Proy, and P.~Roumanoff.
2260: \newblock Verification of real-time systems using linear relation analysis.
2261: \newblock {\em Formal Methods in System Design}, 11(2):157--185, 1997.
2262:
2263: \bibitem{HeintzeJafVoi00}
2264: Nevin Heintze, Joxan Jaffar, and R\u{a}zvan Voicu.
2265: \newblock A framework for combining analysis and verification.
2266: \newblock In {\em Records of the 27$^{th}$\/ Annual {ACM} Symposium on
2267: Principles of Programming Languages}, pages 26--39, 2000.
2268:
2269: \bibitem{HeiJaf/94}
2270: N.~Heintze and J.~Jaffar.
2271: \newblock Set constraint and set--based program analysis.
2272: \newblock In A.~Borning, editor, {\em Principles and Practice of Constraint
2273: Programming}, volume 874 of {\em Lecture Notes in Computer Science}, pages
2274: 281--298. Springer--Verlag, 1994.
2275:
2276: \bibitem{PFA/81}
2277: N.D. Jones and S.S. Muchnick, editors.
2278: \newblock {\em Program Flow Analysis: Theory and Applications}.
2279: \newblock Prentice-Hall, 1981.
2280:
2281: \bibitem{Jo/87}
2282: N.D. Jones.
2283: \newblock Flow analysis of lazy higher--order functional programs.
2284: \newblock In S.~Abramsky and C.~Hankin, editors, {\em Abstract Interpretation
2285: of Declarative Languages}, pages 103--122. Ellis Horwood, 1987.
2286:
2287: \bibitem{Karr/76}
2288: M.~Karr.
2289: \newblock Affine relationships among variables of a program.
2290: \newblock {\em Acta Informatica}, 6:133--151, 1976.
2291:
2292: \bibitem{Ko/95:phd}
2293: D.V. Kochetov.
2294: \newblock {\em Effective specialization of {A}lgol--like programs}.
2295: \newblock {Ph.D. Thesis}, Institute of Informatics Systems, Novosibirsk,
2296: Russia, 1995.
2297:
2298: \bibitem{Mas/92}
2299: F.~Masdupuis.
2300: \newblock Array operations abstraction using semantic analysis of trapezoid
2301: congruences.
2302: \newblock In {\em Proceedings of the International Conference on
2303: Supercomputing}, pages 226--235. ACM Press, 1992.
2304:
2305: \bibitem{Mas/93}
2306: F.~Masdupuis.
2307: \newblock Semantic analysis of interval congruences.
2308: \newblock In {\em Proceedings of the International Conference Formal Methods in
2309: Programming and Their Applications}, volume 735 of {\em Lecture Notes in
2310: Computer Science}, pages 142--155. Springer--Verlag, 1993.
2311:
2312: \bibitem{Mine/2001:pado}
2313: A.~Min{\'e}.
2314: \newblock A new numerical abstract domain based on difference-bound matrices.
2315: \newblock In {\em Proceedings of the Second Symposium on Programs as Data
2316: Objects}, volume 2053, pages 155--169, 2001.
2317:
2318: \bibitem{Rosen/82}
2319: B.K. Rosen.
2320: \newblock Robust linear algorithms for cutsets.
2321: \newblock {\em Journal Algorithms}, 3:205--217, 1982.
2322:
2323: \bibitem{RuthingKnoopSteffen-SAS99}
2324: O.~R{\"u}thing, J.~Knoop, and B.~Steffen.
2325: \newblock Detecting equalities of variables: Combining efficiency with
2326: precision.
2327: \newblock In {\em Proc. of the 6$^{th}$ International Static Analysis
2328: Symposium}, volume 1694 of {\em Lecture Notes in Computer Science}, pages
2329: 232--247. Springer--Verlag, 1999.
2330:
2331: \bibitem{Sa/79}
2332: V.~Sabelfeld.
2333: \newblock Polynomial upper bound for the complexity of the logic-termal
2334: equivalence decision.
2335: \newblock {\em Doklady Akademii Nauk, Matematika}, 249(4):793--796, 1979.
2336:
2337: \bibitem{Sa/80}
2338: V.~Sabelfeld.
2339: \newblock The logic--termal equivalence is polynomial--time decidable.
2340: \newblock {\em Information Processing Letters}, 10(2):102--112, 1980.
2341:
2342: \bibitem{Speckenmeyer/89}
2343: E.~Speckenmeyer.
2344: \newblock On feedback problems in digraphs.
2345: \newblock In {\em Proceedings of the 15$^{th}$ International Workshop on
2346: Graph--Theoretic Concepts in Computer Science}, volume 411 of {\em Lecture
2347: Notes in Computer Science}, pages 218--231. Springer--Verlag, 1990.
2348:
2349: \bibitem{Venet/99}
2350: A.~Venet.
2351: \newblock Automatic analysis of pointer aliasing for untyped programs.
2352: \newblock {\em Science of Computer Programming}, 35(2-3):223--248, 1999.
2353:
2354: \bibitem{WZ/91}
2355: M.~Wegman and F.K. Zadeck.
2356: \newblock Constant propagation with conditional branches.
2357: \newblock {\em ACM Transactions on Programming Language and Systems},
2358: 13(2):181--210, 1991.
2359:
2360: \end{thebibliography}
2361:
2362:
2363:
2364:
2365:
2366: %\end{document}
2367:
2368: \newpage
2369:
2370:
2371: \section*{Appendix. Analysis of KMP}
2372:
2373: The appendix presents results of application of \ERA\/ to {\bf
2374: KMP} program generated by the specializer {\em M2Mix}. This
2375: program is a specialization of a program implementing the
2376: na{\"\i}ve pattern matching {\bf Match(p,str)} with respect to the
2377: pattern {\bf p="ababb"}. The invariants are written as comments at
2378: program points where they hold.
2379: %
2380: %Let us consider the results of our analysis of the program \linebreak
2381: %{\bf Match("ababb",str)} written as comments at program points where
2382: %they hold. Only the most interesting of them are given.
2383: We can conclude that:
2384:
2385:
2386: \begin{itemize}
2387: \item The target string necessarily ends with "\#" and the variable
2388: {\bf ls} is equal to the string length (line 10).
2389:
2390: \item Every time when some element of {\bf str}
2391: (lines 20, 31, 42, 53, 58, 67, 72, 78, 86, 108, 113, 129, 140, 151, 162)
2392: is used in second {\bf LOOP}, the value of its index
2393: expression does not exceed the value of the variable {\bf ls}.
2394: The same is true for the value of a variable before the
2395: increment statements {\bf INC(s)}
2396: (lines 23, 34, 45, 62, 81, 89, 94, 98, 103, 116, 120, 132, 143, 154, 165).
2397: Therefore, it suffices to check that a value of {\bf ls}
2398: is not beyond the ranges determined by the type {\bf \_TYPE354a04}
2399: during input of the target string (line 8). So, in the second cycle
2400: all range checks can be eliminated.
2401:
2402: \item The assignment {\bf \_cfg\_counter:=0} is redundant (line 24).
2403:
2404: \item Conditions {\bf str[s+2]='a'} and {\bf str[s]='a'} are always false
2405: (lines 78 and 86, respectively) because two different constants are equal.
2406: So, the code of {\bf THEN}--branches is dead.
2407:
2408: \item The conditions at the lines 63, 68, 74, 82, 104, 109 are false, too.
2409: However, automatic detection of these properties are not as easily as the previous.
2410:
2411:
2412: \end{itemize}
2413:
2414:
2415: Using this semantic information, it is possible to build a new
2416: program functionally equivalent to {\bf Match("ababb",str)}. In
2417: text of the program given below the underlined code can be
2418: eliminated.
2419:
2420: \vskip\baselineskip
2421:
2422: \fontsize{8}{10}\selectfont
2423: {\setcounter{numline}{0}
2424: \begin{tabular}{ll} \vspace*{-1pt}
2425: MODULE Match; &\\
2426: \vspace*{-1pt} \Z FROM FIO IMPORT
2427: File,Open,ReadChar,\lefteqn{\mbox{WriteInt,stdout;}}&\\
2428: \vspace*{-1pt}
2429:
2430: \Z VAR \_cfg\_counter : CARDINAL; str\_file : File; &\\
2431: \vspace*{-1pt}
2432:
2433: \Z TYPE \_TYPE354a04 = [0..20]; &\\
2434: \vspace*{-1pt}
2435:
2436: \Z TYPE \_TYPE355004 = ARRAY \_TYPE354a04 \lefteqn{\mbox{OF
2437: CHAR;}}&\\ \vspace*{-1pt}
2438:
2439: \Z VAR str : \_TYPE355004; ls,s : \_TYPE354a04;
2440: &\\ \vspace*{-1pt}
2441: %
2442: BEGIN &\\\NM
2443: \Z str\_file := Open("target.dat"); &\\\NM
2444: \Z ls := 0; &\\\NM
2445: \Z LOOP &\\\NM
2446: \Z\Z str[ls] := ReadChar(str\_file); &\\\NM
2447: \Z\Z IF (str[ls]='\#') THEN &$(*str[ls]='\!\!\#'*)$\\\NM
2448: \Z\Z\Z EXIT &\\\NM
2449: \Z\Z ELSE &$(*str[ls]\neq'\!\!\#'*)$\\\NM
2450: \Z\Z\Z INC(ls) &\\\NM
2451: \Z\Z END &\\\NM
2452: \Z END; &$(*str[ls]='\!\!\#'*)$\\\NM
2453: \Z s := 0; &\\\NM
2454: \Z \_cfg\_counter := 0; &$(*s=\_cfg\_counter=0*)$\\
2455: \end{tabular}
2456:
2457: %\hspace*{-1cm}
2458: \begin{tabular}{ll} \NM
2459: \Z LOOP &\\\NM
2460: \Z\Z CASE \_cfg\_counter OF &\\\NM
2461: \Z\Z $|$ 0 : &$(*\_cfg\_counter=0*)$\\\NM
2462: \Z\Z\Z IF ((s+0)$\geq$ls) THEN &$(*s\geq ls*)$\\\NM
2463: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2464: \Z\Z\Z\Z EXIT &\\\NM
2465: \Z\Z\Z END; &$(*0<ls,s<ls*)$\\\NM
2466: \Z\Z\Z IF (str[(s+0)]='a') THEN &$(*str[s]='\!\!a',0<ls,s<ls*)$\\\NM
2467: \Z\Z\Z\Z \_cfg\_counter := 1 &\\\NM
2468: \Z\Z\Z ELSE &$(*0<ls,s<ls,str[0]\neq'\!\!a'*)$\\\NM
2469: \Z\Z\Z\Z INC(s); &$(*\_cfg\_counter=0*)$\\\NM
2470: \Z\Z\Z\Z \underline{\_cfg\_counter := 0} &\\\NM
2471: \Z\Z\Z END &$(*0<ls,s<ls*)$\\\NM
2472: \Z\Z $|$ 1 : &$(*\_cfg\_counter=1*)$\\\NM
2473: \Z\Z\Z IF ((s+1)$\geq$ls) THEN &$(*s+1\geq ls*)$\\\NM
2474: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2475: \Z\Z\Z\Z EXIT &\\\NM
2476: \Z\Z\Z END; &$(*s+1<ls*)$\\\NM
2477: \Z\Z\Z IF (str[(s+1)]='b') THEN &$(*str[s+1]='\!\!b'*)$\\\NM
2478: \Z\Z\Z\Z \_cfg\_counter := 2 &\\\NM
2479: \Z\Z\Z ELSE &$(*str[s+1]\neq'\!\!b'*)$\\\NM
2480: \Z\Z\Z\Z INC(s); &$(*str[s]\neq'\!\!b',s<ls*)$\\\NM
2481: \Z\Z\Z\Z \_cfg\_counter := 0 &\\\NM
2482: \Z\Z\Z END &\\\NM
2483: \Z\Z $|$ 2 : &$(*\_cfg\_counter=2*)$\\\NM
2484: \Z\Z\Z IF ((s+2)$\geq$ls) THEN &$(*s+2\geq ls*)$\\\NM
2485: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2486: \Z\Z\Z\Z EXIT &\\\NM
2487: \Z\Z\Z END; &$(*s+2<ls*)$\\\NM
2488: \Z\Z\Z IF (str[(s+2)]='a') THEN &$(*str[s+2]='\!\!a'*)$\\\NM
2489: \Z\Z\Z\Z \_cfg\_counter := 3 &\\\NM
2490: \Z\Z\Z ELSE &$(*str[s+2]\neq'\!\!a'*)$\\\NM
2491: \Z\Z\Z\Z INC(s); &$(*str[s+1]\neq'\!\!a',s+1<ls*)$\\\NM
2492: \Z\Z\Z\Z \_cfg\_counter := 4 &\\\NM
2493: \Z\Z\Z END &\\\NM
2494: \Z\Z $|$ 3 : &$(*\_cfg\_counter=3*)$\\\NM
2495: \Z\Z\Z IF ((s+3)$\geq$ls) THEN &$(*s+3\geq ls*)$\\\NM
2496: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2497: \Z\Z\Z\Z EXIT &\\\NM
2498: \Z\Z\Z END; &$(*s+3<ls*)$\\\NM
2499: \Z\Z\Z IF (str[(s+3)]='b') THEN &$(*str[s+3]='\!\!b'*)$\\\NM
2500: \Z\Z\Z\Z IF ((s+4)$\geq$ls) THEN &$(*s+4\geq ls,s+3<ls*)$\\\NM
2501: \Z\Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2502: \Z\Z\Z\Z\Z EXIT &\\\NM
2503: \Z\Z\Z\Z END; &$(*s+4<ls*)$\\\NM
2504: \Z\Z\Z\Z IF (str[(s+4)]='b') THEN &$(*str[s+3]=str[s+4]='\!\!b'*)$\\\NM
2505: \Z\Z\Z\Z\Z WriteInt(stdout,s,0); &\\\NM
2506: \Z\Z\Z\Z\Z EXIT &\\\NM
2507: \Z\Z\Z\Z ELSE &$(*str[s+3]='\!\!b',str[s+4]\neq'\!\!b',s+4<ls*)$\\\NM
2508: \Z\Z\Z\Z\Z INC(s); &$(*str[s+2]='\!\!b',str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM
2509: \Z\Z\Z\Z\Z \underline{IF ((s+0)$\geq$ls) THEN} &$(*s\geq ls,s+3<ls*)$\\\NM
2510: \Z\Z\Z\Z\Z\Z \underline{WriteInt(stdout,(-1),0);} &\\\NM
2511: \Z\Z\Z\Z\Z\Z \underline{EXIT} &\\\NM
2512: \Z\Z\Z\Z\Z \underline{END;} &$(*str[s+2]='\!\!b',str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM
2513: \Z\Z\Z\Z\Z IF (str[(s+0)]='a') THEN &$(*str[s]='\!\!a',str[s+2]='\!\!b',str[s+3]\neq'\!\!b',$\\
2514: &$\opn s+3<ls*)$\\\NM
2515: \Z\Z\Z\Z\Z\Z \underline{IF ((s+1)$\geq$ls) THEN} &$(*s+1\geq ls,s+3<ls*)$\\\NM
2516: \Z\Z\Z\Z\Z\Z\Z \underline{WriteInt(stdout,(-1),0);}&\\\NM
2517: \Z\Z\Z\Z\Z\Z\Z \underline{EXIT} &\\\NM
2518: \Z\Z\Z\Z\Z\Z \underline{END;} &$(*\mbox{the same as at line 67}*)$\\
2519: \end{tabular}
2520:
2521: %\hspace*{-1cm}
2522: \begin{tabular}{ll} \NM
2523: \Z\Z\Z\Z\Z\Z IF (str[(s+1)]='b') THEN &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]='\!\!b',$\\\NM
2524: &$\opn str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM
2525: \Z\Z\Z\Z\Z\Z\Z \underline{IF ((s+2)$\geq$ls) THEN} &$(*s+2\geq ls,s+3<ls*)$\\\NM
2526: \Z\Z\Z\Z\Z\Z\Z\Z \underline{WriteInt(stdout,(-1),0);}&\\\NM
2527: \Z\Z\Z\Z\Z\Z\Z\Z \underline{EXIT} &\\\NM
2528: \Z\Z\Z\Z\Z\Z\Z \underline{END;} &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]='\!\!b',$\\
2529: &$\opn str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM
2530: \Z\Z\Z\Z\Z\Z\Z \underline{IF (str[(s+2)]='a') THEN}&$(*\mbox{inaccessible point}*)$\\\NM
2531: \Z\Z\Z\Z\Z\Z\Z\Z \underline{\_cfg\_counter := 3} &\\\NM
2532: \Z\Z\Z\Z\Z\Z\Z \underline{ELSE} &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]='\!\!b',$\\
2533: &$\opn str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM
2534: \Z\Z\Z\Z\Z\Z\Z\Z INC(s); &$(*str[s-1]='\!\!a',str[s]='\!\!b',str[s+1]='\!\!b',$\\
2535: &$\opn str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2536: \Z\Z\Z\Z\Z\Z\Z\Z \underline{IF ((s+0)$\geq$ls) THEN}&$(*s\geq ls,s+2<ls*)$\\\NM
2537: \Z\Z\Z\Z\Z\Z\Z\Z\Z \underline{WriteInt(stdout,(-1),0);}&\\\NM
2538: \Z\Z\Z\Z\Z\Z\Z\Z\Z \underline{EXIT} &\\\NM
2539: \Z\Z\Z\Z\Z\Z\Z\Z \underline{END;} &$(*str[s-1]='\!\!a',str[s]='\!\!b',str[s+1]='\!\!b',$\\
2540: &$\opn str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2541: \Z\Z\Z\Z\Z\Z\Z\Z \underline{IF (str[(s+0)]='a') THEN}&$(*\mbox{inaccessible point}*)$\\\NM
2542: \Z\Z\Z\Z\Z\Z\Z\Z\Z \underline{\_cfg\_counter:=14} &\\\NM
2543: \Z\Z\Z\Z\Z\Z\Z\Z \underline{ELSE} &$(*str[s-1]='\!\!a',str[s]='\!\!b',str[s+1]='\!\!b',$\\
2544: &$\opn str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2545: \Z\Z\Z\Z\Z\Z\Z\Z\Z INC(s); &$(*str[s-2]='\!\!a',str[s-1]='\!\!b',str[s]='\!\!b',$\\
2546: &$\opn str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM
2547: \Z\Z\Z\Z\Z\Z\Z\Z\Z \_cfg\_counter:=4 &\\\NM
2548: \Z\Z\Z\Z\Z\Z\Z\Z \underline{END} &\\\NM
2549: \Z\Z\Z\Z\Z\Z\Z \underline{END} &$(*str[s-2]='\!\!a',str[s-1]='\!\!b',str[s]='\!\!b',$\\
2550: &$\opn str[s+1]\neq'\!\!b',s+1<ls*)$\\
2551: \NM
2552: \Z\Z\Z\Z\Z\Z ELSE &$(*str[s]='\!\!a',str[s+1]\neq'\!\!b',str[s+2]='\!\!b',$\\
2553: &$\opn str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM
2554: \Z\Z\Z\Z\Z\Z\Z INC(s); &$(*str[s-1]='\!\!a',str[s]\neq'\!\!b',str[s+1]='\!\!b',$\\
2555: &$\opn str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2556: \Z\Z\Z\Z\Z\Z\Z \_cfg\_counter := 12 &\\\NM
2557: \Z\Z\Z\Z\Z\Z END &\\\NM
2558: \Z\Z\Z\Z\Z ELSE &$(*str[s]='\!\!a',str[s+2]='\!\!b',str[s+3]\neq'\!\!b',$\\
2559: &$\opn s+3<ls*)$\\\NM
2560: \Z\Z\Z\Z\Z\Z INC(s); &$(*str[s-1]='\!\!a',str[s+1]='\!\!b',str[s+2]\neq'\!\!b',$\\
2561: &$\opn s+2<ls*)$\\\NM
2562: \Z\Z\Z\Z\Z\Z \_cfg\_counter := 12 &\\\NM
2563: \Z\Z\Z\Z\Z END &\\\NM
2564: \Z\Z\Z\Z END &\\\NM
2565: \Z\Z\Z ELSE &$(*str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM
2566: \Z\Z\Z\Z INC(s); &$(*str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2567: \Z\Z\Z\Z \underline{IF ((s+0)$\geq$ls) THEN} &$(*s\geq ls,s+2<ls*)$\\\NM
2568: \Z\Z\Z\Z\Z \underline{WriteInt(stdout,(-1),0);} &\\\NM
2569: \Z\Z\Z\Z\Z \underline{EXIT} &\\\NM
2570: \Z\Z\Z\Z \underline{END;} &$(*str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2571: \Z\Z\Z\Z IF (str[(s+0)]='a') THEN &$(*str[s]='\!\!a',str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2572: \Z\Z\Z\Z\Z \underline{IF ((s+1)$\geq$ls) THEN} &$(*s+1\geq ls,s+2<ls*)$\\\NM
2573: \Z\Z\Z\Z\Z\Z \underline{WriteInt(stdout,(-1),0);}&\\\NM
2574: \Z\Z\Z\Z\Z\Z \underline{EXIT} &\\\NM
2575: \Z\Z\Z\Z\Z \underline{END;} &$(*str[s]='\!\!a',str[s+2]\neq'\!\!b',s+2<ls*)$\\
2576: \end{tabular}
2577:
2578: %\hspace*{-1cm}
2579: \begin{tabular}{ll} \NM
2580: \Z\Z\Z\Z\Z IF (str[(s+1)]='b') THEN &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]\neq'\!\!b',$\\
2581: &$\opn s+2<ls*)$\\\NM
2582: \Z\Z\Z\Z\Z\Z \_cfg\_counter := 2 &\\\NM
2583: \Z\Z\Z\Z\Z ELSE &$(*str[s]='\!\!a',str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2584: \Z\Z\Z\Z\Z\Z INC(s); &$(*str[s-1]='\!\!a',str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM
2585: \Z\Z\Z\Z\Z\Z \_cfg\_counter := 10 &\\\NM
2586: \Z\Z\Z\Z\Z END &\\\NM
2587: \Z\Z\Z\Z ELSE &$(*str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM
2588: \Z\Z\Z\Z\Z INC(s); &$(*str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM
2589: \Z\Z\Z\Z\Z \_cfg\_counter := 10 &\\\NM
2590: \Z\Z\Z\Z END &\\\NM
2591: \Z\Z\Z END &\\\NM
2592: \Z\Z $|$ 4 : &$(*\_cfg\_counter=4*)$\\\NM
2593: \Z\Z\Z IF ((s+0)$\geq$ls) THEN &$(*s\geq ls*)$\\\NM
2594: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2595: \Z\Z\Z\Z EXIT &\\\NM
2596: \Z\Z\Z END; &$(*s<ls*)$\\\NM
2597: \Z\Z\Z IF (str[(s+0)]='a') THEN &$(*str[s]='\!\!a',s<ls*)$\\\NM
2598: \Z\Z\Z\Z \_cfg\_counter := 1 &\\\NM
2599: \Z\Z\Z ELSE &$(*str[s]\neq'\!\!a',s<ls*)$\\\NM
2600: \Z\Z\Z\Z INC(s); &$(*str[s-1]\neq'\!\!a',s\leq ls*)$\\\NM
2601: \Z\Z\Z\Z \_cfg\_counter := 0 &\\\NM
2602: \Z\Z\Z END &\\\NM
2603: \Z\Z $|$ 10: &$(*\_cfg\_counter=10*)$\\\NM
2604: \Z\Z\Z IF ((s+0)$\geq$ls) THEN &$(*s\geq ls*)$\\\NM
2605: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2606: \Z\Z\Z\Z EXIT &\\\NM
2607: \Z\Z\Z END; &$(*s<ls*)$\\\NM
2608: \Z\Z\Z IF (str[(s+0)]='a') THEN &$(*str[s]='\!\!a',s<ls*)$\\\NM
2609: \Z\Z\Z\Z \_cfg\_counter := 1 &\\\NM
2610: \Z\Z\Z ELSE &$(*str[s]\neq'\!\!a',s<ls*)$\\\NM
2611: \Z\Z\Z\Z INC(s); &$(*str[s-1]\neq'\!\!a',s\leq ls*)$\\\NM
2612: \Z\Z\Z\Z \_cfg\_counter := 0 &\\\NM
2613: \Z\Z\Z END &\\
2614: \NM
2615: \Z\Z $|$ 12: &$(*\_cfg\_counter=12*)$\\\NM
2616: \Z\Z\Z IF ((s+0)$\geq$ls) THEN &$(*s\geq ls*)$\\\NM
2617: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2618: \Z\Z\Z\Z EXIT &\\\NM
2619: \Z\Z\Z END; &$(*s<ls*)$\\\NM
2620: \Z\Z\Z IF (str[(s+0)]='a') THEN &$(*str[s]='\!\!a',s<ls*)$\\\NM
2621: \Z\Z\Z\Z \_cfg\_counter := 14 &\\\NM
2622: \Z\Z\Z ELSE &$(*str[s]\neq'\!\!a',s<ls*)$\\\NM
2623: \Z\Z\Z\Z INC(s); &$(*str[s-1]\neq'\!\!a',s\leq ls*)$\\\NM
2624: \Z\Z\Z\Z \_cfg\_counter := 4 &\\\NM
2625: \Z\Z\Z END &\\\NM
2626: \Z\Z $|$ 14: &$(*\_cfg\_counter=14*)$\\\NM
2627: \Z\Z\Z IF ((s+1)$\geq$ls) THEN &$(*s+1\geq ls*)$\\\NM
2628: \Z\Z\Z\Z WriteInt(stdout,(-1),0); &\\\NM
2629: \Z\Z\Z\Z EXIT &\\\NM
2630: \Z\Z\Z END; &$(*s+1<ls*)$\\\NM
2631: \Z\Z\Z IF (str[(s+1)]='b') THEN &$(*str[s+1]='\!\!b',s+1<ls*)$\\\NM
2632: \Z\Z\Z\Z \_cfg\_counter := 2 &\\\NM
2633: \Z\Z\Z ELSE &$(*str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM
2634: \Z\Z\Z\Z INC(s); &$(*str[s]\neq'\!\!b',s<ls*)$\\\NM
2635: \Z\Z\Z\Z \_cfg\_counter := 4 &\\\NM
2636: \Z\Z\Z END &\\\NM
2637: \Z\Z END &\\
2638: \Z END &\\
2639: END Match. &\\
2640: \end{tabular}
2641:
2642:
2643:
2644:
2645:
2646:
2647: \end{document}
2648: