0609:cs0609092/era.tex

1: \documentclass{article}

2:

3: \usepackage{euscript}

4: \usepackage{amssymb}

5: \usepackage{epsfig}

6:

7: \textwidth=1.1\textwidth

8: %\textheight=1.1\textheight

9: %\def\baselinestretch{1.2}

10:

11:

12: %\newtheorem{Theorem}{Theorem}

13: \newtheorem{lem}{Lemma}

14: \newenvironment{pf}{{\bf Proof}}{~~{Q.E.D.}}

15:

16:

17:

18:

19: \newcommand{\ERA}{{\bf ERA}}

20:

21: \newlength{\opencomm}

22: \settowidth{\opencomm}{$(*$}

23: \newcommand{\opn}{\hspace*{\opencomm}}

24: \newlength{\gammalen}

25: \settowidth{\gammalen}{$\hspace*{1mm}\gamma(\tilde{\pi}\setminus S^\prime)=$}

26: \newcommand{\gammaspc}{\hspace*{\gammalen}}

27:

28:

29: \newcommand{\No}{\mbox{${N}^{\circ}$}}

30:

31:

32: \newcommand{\ET}[1]{\underline{#1}}

33: \newcommand{\C}[1]{\mbox{$\EuScript #1$}}

34: \newcommand{\GC}[2]{\mbox{#1\hspace*{-0.5em}\makebox[2em][c]{%

35:                     \raisebox{-1pt}{$\rightharpoondown$}\hspace*{-1em}%

36:                     \raisebox{1pt}{$\leftharpoonup$}\hspace*{-0.75em}%

37:                     \raisebox{-2pt}{{\tiny$\alpha$}}\hspace*{-0.4em}%

38:                     \raisebox{5pt}{{\tiny$\gamma$}}%

39:                    }#2}}

40:

41: \newcommand{\notsim}{\mbox{$\not\sim$}}

42: \newcommand{\BIGSQCAP}{\mbox{{\LARGE$\sqcap$}}}

43: \newcommand{\DERIV}[2]{\mbox{$\stackrel{\hspace*{-1mm}\mbox{{\tiny#1}}}{\Rightarrow}

44:                        \hspace*{-1.1em}

45:                        \raisebox{-1.25mm}{{\tiny #2}}

46:                        \hspace*{0.4em}$}}

47: \newcommand{\IMP}[1]{\mbox{${\EuScript ALS}(#1)$}}

48: \newcommand{\SEM}[2]{\mbox{$#1[\![#2]\!]$}}

49: \newcommand{\SOP}[2]{\mbox{$#1\lefteqn{\{}\hspace*{2.7pt}\vert#2\lefteqn{\vert}\}$}}

50: \newcommand{\CSEM}[2]{\mbox{$Sem[\![#2]\!]#1$}}

51:

52: \newcommand{\TrS}{\mbox{$\langle{\EuScript S},\tau,\iota,\varsigma\rangle$}}

53: \newcommand{\TAU}{\mbox{$\wp({\EuScript S}\times{\EuScript S})$}}

54: \newcommand{\PSS}{\mbox{$\wp({\EuScript S})$}}

55: \newcommand{\CBOT}{\mbox{$\bot$}}

56: \newcommand{\CTOP}{\mbox{$\top$}}

57: \newcommand{\CINCL}{\mbox{$\subseteq$}}

58: \newcommand{\CCUP}{\mbox{$\cup$}}

59: \newcommand{\CSD}{\mbox{$\langle\PSS;\CINCL,\CBOT,\CCUP\rangle$}}

60: \newcommand{\CSF}[1]{\mbox{{\it sp}$_{\iota}^{\tau^{#1}}$}}

61:

62: \newcommand{\APSS}{\mbox{$\overline{\PSS}$}}

63: \newcommand{\ABOT}{\mbox{$\bot^\prime$}}

64: \newcommand{\ATOP}{\mbox{$\top^\prime$}}

65: \newcommand{\AINCL}{\mbox{$\supseteq$}}

66: \newcommand{\ACAP}{\mbox{$\cap$}}

67: \newcommand{\ACUP}{\mbox{$\cup$}}

68: \newcommand{\CV}{\mbox{${\EuScript C\EuScript V}$}}

69: \newcommand{\FP}{\mbox{${\EuScript F\EuScript P}$}}

70: \newcommand{\TERM}{\mbox{${\EuScript T\EuScript R\EuScript S}$}}

71: \newcommand{\TYPES}{\mbox{${\EuScript T\EuScript Y\EuScript P\EuScript E\EuScript S}$}}

72: \newcommand{\EQS}{\mbox{${\EuScript E\!\EuScript Q\EuScript S}$}}

73: \newcommand{\PSEQS}{\mbox{$\wp(\EQS)$}}

74: \newcommand{\PPSEQS}{\mbox{$\wp(\wp(\EQS))$}}

75: \newcommand{\PDEQS}{\mbox{$\wp^2(\EQS)$}}

76: \newcommand{\CSDE}{\mbox{$\PDEQS(\subseteq,\emptyset,\cup)$}}

77: \newcommand{\ASDE}{\mbox{$\PSEQS(\AINCL,\EQS,\ACAP)$}}

78: \newcommand{\INF}{\mbox{$\lefteqn{\bigcirc}\infty$}}

79: \newcommand{\ACC}[1]{\mbox{${\EuScript A}(#1)$}}

80:

81: \newcommand{\Red}[1]{\mbox{\bf Red(#1)}}

82: \newcommand{\Co}[1]{\mbox{\bf Co(#1)}}

83: \newcommand{\Know}[2]{\mbox{$#1\DERIV{+}{G}#2$}}

84: \newcommand{\DWID}{\mbox{$\lefteqn{\mbox{\raisebox{1.1ex}[0pt][0pt]{\hspace*{0.5pt}$\sim$}}}\bigtriangledown$}}

85: \newcommand{\MFVS}[1]{\mbox{$#1\backslash_{\mbox{\tiny\bf FVS}}$}}

86: \newcommand{\mfvs}{\mbox{{\small\bf FVS}}}

87: \newcommand{\OPR}[1]{\mbox{\tt\bf #1}}

88: \newcommand{\DEREF}{\mbox{$^{\wedge}$}}

89:

90: \newcounter{numline}

91: \newcommand{\NM}{\vspace*{-1pt}\stepcounter{numline}\lefteqn{\mbox{\tiny\bf\thenumline:}}}

92: \newcommand{\PHC}{\lefteqn{\phantom{\mbox{$\displaystyle\bigcap_i$}}}}

93:

94: \newcommand{\Z}{\hspace*{5mm}}

95:

96: \newcommand{\ADT}{\mbox{\,\raisebox{3pt}[0pt][0pt]{\fontsize{5}{5}\selectfont$\swarrow\!\searrow$}\,}}

97:

98: \newcommand{\KW}[1]{{\bf #1}}

99:

100:

101: \begin{document}

102:

103: \title{Analysis of Equality Relationships \\ for Imperative Programs}

104:

105: \author{Pavel Emelianov%

106: \footnote{This work was partly done when the author was in {\em

107: Laboratoire d'informatique, Ecole polytechnique}\/ (Palaiseau,

108: France) and  {\em Ecole normale sup\'erieure d'ingenieur}

109: (Bourges, France).} \\

110: Institute of Informatics Systems, \\

111: 6 avenue Lavrentiev \\

112: 630090 Novosibirsk, Russia \\

113: emelianov@iis.nsk.su}

114:

115: %\author{Pavel Emelyanov}

116: %\institute{Institute of Informatics Systems\thanks{This work was

117: %done when the author was in Laboratoire d'informatique, Ecole

118: %polytechnique (Palaiseau, France) and  Ecole normale sup\'erieure

119: %d'ingenieur (Bourges, France) in 2000-2002.}, 6 avenue Lavrentiev,

120: %630090 Novosibirsk, Russia \\ \email{emelianov@iis.nsk.su}}

121:

122:

123: %\date{19 December 2005}

124:

125:

126: \maketitle

127:

128: \begin{abstract}

129: In this article, we discuss a flow--sensitive analysis of equality

130: relationships for imperative programs. We describe its semantic

131: domains, general purpose operations over abstract computational

132: states (term evaluation and identification, semantic completion,

133: widening operator, etc.) and semantic transformers corresponding

134: to program constructs. We summarize our experiences from the last

135: few years concerning this analysis and give attention to

136: applications of analysis of automatically generated code. Among

137: other illustrating examples, we consider a program for which the

138: analysis diverges without a widening operator and results of

139: analyzing residual programs produced by some automatic partial

140: evaluator. An example of analysis of a program generated by this

141: evaluator is given.

142:

143: \vskip\baselineskip

144:

145: \noindent{\bf Keywords}: abstract interpretation, value numbering,

146: equality relationships for program terms, formal grammars,

147: semantic transformers, widening operator, automatically generated

148: programs.

149:

150: \end{abstract}

151:

152:

153:

154:

155:

156:

157: \section*{Introduction}

158:

159: {\em Semantic analysis}\/ is a powerful technique for building

160: effective and reliable programming systems. In

161: \cite{EmSa/94,Em/96,SAS-4/97} we presented a new kind of a

162: semantic flow--sensitive analysis designed in the framework of

163: {\em abstract interpretation}\/ \cite{CC/77,CC/92b,CC/92a}. This

164: analysis which determines an approximation of sets of invariant

165: term equalities $t_1=t_2$\/ was called {\em the analysis of

166: equality relationships for program terms} (hereinafter referred to

167: as \ERA).

168:

169: Most traditional static analyses of imperative programs are

170: interested in finding the (in)equalities of a specific kind

171: (so--called {\em value analyses}; only they are discussed here)

172: describing regular approximations (i.e. they have simple

173: mathematical descriptions and machine representations) of sets of

174: values: convex polyhedrons/oc\-ta\-hed\-rons/oc\-ta\-gons

175: \cite{CouHal/78,HalbwachsProyRoumanoff/97,Mine/2001:pado,ClarisoCortadella-SAS04},

176: affine \cite{Karr/76,GulwaniNecula-POPL03} and congruent

177: \cite{Gra/91b,Mas/92} hyper-planes, their non-relational

178: counterparts \cite{CC/77,WZ/91,Gra/89,Mas/93} as well, etc. They

179: are carefully designed to be reasonable (i.e. they express

180: non-trivial semantic properties) and effectively computed (i.e.

181: there are polynomial

182: algorithms to handle them\footnote{%

183: An example of this sort is an approximation of value sets by conic

184: shapes that has only one ``computational disadvantage'': semantic

185: transformers involve algorithms known to be ${\mathcal NP}$-hard.

186: Proposed in \cite{BerMar/76} many years ago it did not gain

187: ground.}) but ``regular'' nature does not allow them to treat well

188: programs with irregular control/data-flows. Hence of special

189: interest is investigations of approximations based on sets of

190: terms which can have potentially arbitrary nature, i.e. they could

191: be powerful (due to their irregularity) but effectively computed.

192: One well known example is the set--based analysis

193: \cite{HeiJaf/94,CC/95}.

194:

195: In our case, terms represent all expressions computed in programs.

196: This enables the analysis to take into account different aspects

197: of program behavior in a unified way. A such unified treatment of

198: all semantic information allows the analysis to improve its

199: accuracy. This does not mean that \ERA\/ is a generalization of

200: all other value analyses (except the {\em constant propagation}

201: one), because they use different approaches (semantic domains and

202: transformers) to extract effectively and precisely the limited

203: classes of semantic properties. In general, the results of the

204: analyses are not comparable.

205:

206: \ERA\/ provides interesting possibilities for gathering and

207: propagating different invariant information about programs in a

208: unified way. This information can be used both for verification

209: and optimization purposes. The second is especially interesting

210: for automatically generated programs: residual, i.e. obtained in

211: the process of the partial evaluation, and synthesized from

212: high--level specifications. Due to nature of automatic generation

213: processes, such programs have specific control flows (for example,

214: hierarchy of nested conditional statements with specific

215: conditions; in the case of residual programs this hierarchy is

216: more deep as ``degree'' of the partial evaluation increases) that

217: can be successfully optimized on base of gathered invariant

218: information.

219:

220: Besides the peculiarity of \ERA\/ mentioned above, let us discuss

221: some common properties of the semantic analyses. Such taxonomic

222: properties of the analysis algorithms as the attribute

223: (in)dependence, context (in)sensitivity, flow (in)sensitivity,

224: scalability and some other properties are well known. However, it

225: is the author's opinion that a notion of ``{\em interpretability

226: of a semantic analysis}''\/ has not been considered adequately

227: yet. Here the interpretability of analysis means how extensively

228: the properties of primitive operations of the language

229: (arithmetical, logical, etc.) and type information are allowed for

230: analyzing and can be handled when the analysis works.

231:

232: One extreme point of view on the interpretability is an approach

233: accepted in the ``pure'' program scheme theory where no

234: interpretations of functional symbols or type information are allowed\footnote{%

235: More precisely, there exist some works in the program scheme

236: theory where some semantic interpretations of functional symbols

237: (like commutativity of superposition $f\circ g\equiv g\circ f$,

238: etc.) are considered.}. Unfortunately, the results obtained under

239: this approach are not reasonably strong. Nevertheless, it must be

240: underscored that \ERA\/ dates back to V. Sabelfeld's works in the

241: program scheme theory \cite{Sa/79,Sa/80}. Another extreme leads to

242: the complete description of the program behavior that is also not

243: workable. Obviously, it is closely allied to its flow sensitivity

244: (ignoring some part of semantic information does not allow us to

245: treat precisely some control flow constructs of analyzed programs)

246: and its scalability (attempts to take into account large quantity

247: of semantic properties, for example, using some theorem prover

248: which is invoked while an semantic analyzer works and deduces new

249: properties, can lead to combinatorial explosion for abstract

250: computational states).

251:

252:

253: It is possible that the interpretability has not been highlighted

254: enough, because most of analysis algorithms take into account the

255: limited classes of primitive operations and type information and

256: they cannot be enriched in some natural way. For example, an

257: interval analysis is not able to incorporate congruence properties

258: in some natural way, etc\footnote{Of course, it is possible to use

259: sophisticated approaches for combinations of analyses but it

260: introduces complicated problems under implementations and it is

261: not an enrichment of original ones.}. Essentially another case is

262: \ERA\/ where we have a choice to handle expressiveness of the

263: analysis. We intend to illustrate the notion of ``interpretability

264: of analysis'', its importance and usefulness on this example of

265: analysis.

266:

267: Among the analyses closely related to ours we would like to point

268: out the following. A semantic analysis for detection of equalities

269: between program variables (and simple relationships among them)

270: was described in \cite{AWZ/88}. It makes a list of sets of

271: variables discovered to be equal by using the Hopcroft's

272: partitioning algorithm for finite--state automata. This algorithm

273: being quite efficient is not however precise enough. Further

274: value-numbering techniques were developed in

275: \cite{RuthingKnoopSteffen-SAS99,Gargi-PLDI02,GulwaniNecula-SAS04}.

276: These algorithms demonstrate that adequacy of value numbering is

277: resource-consuming. For example, in the last case time complexity

278: of the algorithm is $O(k^3jN)$\/ where $k$\/ is a number of

279: program variables, $j$\/ is a number join-points, and $N$\/ is

280: program size.

281:

282: Another important example is the set-based analysis

283: \cite{HeiJaf/94} mentioned above. Here approximating sets of terms

284: are found with resolving some system of set-theoretical equations.

285: Formal grammars were used for an analysis of recursive data

286: structures of functional languages (see, for example,

287: \cite{Jo/87}). Formal languages were applied to coding of memory

288: access paths in \cite{Deu/94,Venet/99} and values of program

289: variables in the set-based analysis. \cite{CC/95} established

290: common foundations connecting and generalizing different

291: approaches using formal languages to represent semantic

292: properties. Of course, we should mention techniques from the

293: automatic proof theory and the term rewriting theory which can be

294: widely applied both at the analysis stage to improve its accuracy

295: and the post-processing stage to present its results to the user.

296:

297: This article is organized as follows: In {\bf Sections

298: \ref{PConcrete}} and {\bf\ref{PAbstract}} we describe the semantic

299: properties, concrete and abstract, respectively, which are

300: considered in \ERA. In {\bf Section \ref{OpBasic}} we discuss some

301: basic operations over the semantic properties used to define the

302: semantic transformers which are next presented in {\bf Section

303: \ref{STransformers}}. In {\bf Section \ref{WideningOp}} we

304: consider a widening operator and  the complexity of \ERA\/ is

305: discussed. Finally, {\bf Section \ref{Experiments}} describes

306: processing of \ERA\/ invariants and presents some results of our

307: experiments with \ERA. In {\bf Appendix} an example of analysis of

308: some residual program is considered.

309:

310:

311:

312: \section{Properties of interest}

313:

314: \subsection{Concrete properties}\label{PConcrete}

315:

316: A usual choice for the description of the operational semantics is

317: a specification of some transition relationship on the pairs

318: $<${\em control point, state of program memory}$>$ (see, for

319: example, \cite{PFA/81,HOOTS/98}) where the states of program

320: memory are described by mapping the cells of memory into a

321: universe of values. Here variables (groups of cells) and their

322: values (constants) are in asymmetric roles. Another example of

323: ``asymmetry'': manipulations over the structured objects of

324: programs (arrays, records etc.) are not so transparent as over the

325: primary ones.  To describe the operational semantics for \ERA, we

326: used another approach. All objects of a program are considered to

327: be ``identical'' in the following meaning.

328:

329: Let \CV\/ be a set of $0$--ary symbols representing variables and

330: constants. The last ones may be of the following kinds: scalars,

331: compositions over scalars (i.e., constant arrays, records, etc.),

332: names of record fields, and {\em indefiniteness}. Let \FP\/ be a

333: set of n--ary ({\em functional}) symbols which represent primitive

334: operations of programming languages: arithmetic, logic, type

335: casting, and all the kinds of memory addressing, as well. Let

336: \TERM\/ be a set of well-formed terms over \CV\/ and \FP,

337: hereinafter referred to as {\em program terms}. They represent

338: expressions computed during execution of a program. So, as a state

339: of program memory we take a reflexive, symmetrical, and transitive

340: relationship (i.e., {\em equivalence relationship}) over \TERM.

341: The relationship defines some set of term equalities which we use

342: to describe the operational semantics and call a {\em computation

343: state}.

344: %

345: %This approach should be compared to "Algebaric data types"

346: %

347: %Notice that this set is always infinite because there exists infinitely consistent

348: %equalities of terms in contrast to the finite mapping {\em cells$\rightarrow$values}

349: %where it is always possible for convergent programs.

350:

351:

352:

353:

354: \begin{table}%[ht]

355: \begin{center}

356: \fbox{\fontsize{8}{10}\selectfont

357: \begin{tabular}{lcc}

358: &&\\

359:        & {\sc then}-branch   &     {\sc else}-branch\\

360: &&\\

361: \hline

362: &&\\

363: \sc entry

364:        & $\!\!\!\!\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}

365:                        a[1]=1,a[2]=j=2,a[3]=i=3,\\

366:                        a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=TRUE\\

367:           \end{tabular}}\!\!\right\}\!\!\right\}$

368:        & $\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}

369:                        a[1]=1,a[2]=j=2,a[3]=i=3,\\

370:                        a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=FALSE\\

371:           \end{tabular}}\!\!\right\}\!\!\right\}$

372:        \\

373: &&\\

374: \sc exit

375:        & $\!\!\!\!\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}

376:                        a[1]=i=j=1,a[2]=2,a[3]=3,\\

377:                        a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=TRUE\\

378:           \end{tabular}}\!\!\right\}\!\!\right\}$

379:        & $\left\{\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}

380:                        a[1]=i=j=3,a[2]=2,a[3]=1,\\

381:                        a=\fbox{3}\fbox{2}\fbox{1},ODD(x)=FALSE\\

382:           \end{tabular}}\!\!\right\}\!\!\right\}$

383:        \\

384: &&\\

385: \hline

386: &&\\

387: & \hfill \sc exit ~of~ $\lefteqn{\mbox{\footnotesize\sc if--statement}}$ & \\

388: &&\\

389: {\Large$\pi=$}

390:        & $\!\!\!\!\left\{~\rule[10mm]{0pt}{0pt}\!\!\left\{\!\!\mbox{\begin{tabular}{l}

391:                        a[1]=i=j=1,a[2]=2,a[3]=3,\\

392:                        a=\fbox{1}\fbox{2}\fbox{3},ODD(x)=TRUE

393:                        \smash{\lefteqn{\mbox{\hspace*{3mm}\huge,}}}\\

394:           \end{tabular}}\!\!\right\}\!\!\right.$

395:        & \hspace*{-2mm}$\!\!\left.\!\!\left\{\mbox{\begin{tabular}{l}

396:                        a[1]=i=j=3,a[2]=2,a[3]=1,\\

397:                        a=\fbox{3}\fbox{2}\fbox{1},ODD(x)=FALSE\\

398:           \end{tabular}}\!\!\right\}\rule[10mm]{0pt}{0pt}~\!\!\right\}$

399:        \\

400: &&\\

401: \end{tabular}

402: }

403: \end{center}

404: \caption{Description of collecting semantics for {\bf Example 1}

405:          (here the constant \fbox{$c_1$}\fbox{$c_2$}\fbox{$c_3$}

406:          represents constant arrays).}\label{Collecting}

407: \end{table}

408:

409:

410:

411:

412:

413: Suppose that the following code

414:

415: \begin{tabbing}

416: \quad \=\quad \=\quad           \kill

417:    \KW{var} x,i,j : \KW{integer}; a : \KW{array} [1..3] \KW{of integer}=\{1,2,3\};\\

418:    ...\\

419:    i := 3;\\

420:    j := i-1;\\

421:    \KW{if} \KW{odd}(x) \KW{then}\\

422: \>   i := i \KW{mod} j;\\

423: \>   j := 1;\\

424:    \KW{else}\\

425: \>   j := a[i];\\

426: \>   a[i] := a[1];\\

427: \>   a[1] := j\\

428:    \KW{end}\\

429:    ...

430: \end{tabbing}

431: \begin{center}{\bf Example 1}\end{center}

432:

433: \noindent is executed at least twice for the different parities of

434: the variable {\sc x}. In {\bf Table \ref{Collecting}} static

435: semantics for five control points is given. We present a minimum%

436: \footnote{A set of equalities can be completed with any number of

437: consistent equalities.} subset of term equalities concerning

438: dynamic behavior of the piece. We shall use the property

439: {\large$\pi$}\/ (see {\bf Table \ref{Collecting}}) to illustrate

440: our further reasoning.

441:

442: Formally it is described as follows. Let \EQS\/ be a set of all

443: equalities of the terms from \TERM, i.e.,

444: $\EQS=\{t_1=t_2~\vert~t_1, t_2\in\TERM\}$. A set $S\in\PSEQS$\/ is

445: a computation state interpreted in the following way. For each

446: equality $t_i=t_j\in S$\/ values of the expressions represented by

447: $t_i$\/ and $t_j$\/ must be equal at this point for some execution

448: trace. We take the set \PPSEQS\/ as a set for a concrete semantic

449: domain describing {\em the collecting semantics}\/ of \ERA. So, an

450: element of the concrete semantic domain is a set of computation

451: states. For a particular point in a particular program it is a set

452: of computation states each of them corresponds to some execution

453: trace in the program that reaches this point.

454:

455:

456: Properties considered in \ERA\/ are presented by means of

457: context--free grammars  ${\bf G}=({\EuScript N},{\EuScript

458: T},{\EuScript P},S)$\/ where ${\EuScript N}$\/ is a finite set of

459: nonterminals denoted by capital letters, $S\in{\EuScript N}$\/ is

460: the initial symbol of the grammar ${\bf G}$, ${\EuScript

461: T}=\CV\cup\FP\cup\{~(,~),~=,$~\raisebox{1mm}{{\Huge ,}}$\}$\/ is a

462: finite set of terminal symbols, and ${\EuScript P}$\/ is a finite

463: set of grammar rules. We do not give their precise description

464: because we shall use quite simple machinery from formal languages

465: theory that is not an object of considerations itself and serves

466: for demonstrations. We could use functional nets language as well

467: but it is rather machine--oriented and is not widely used. We

468: expect that all these descriptive ways become apparent from the

469: examples on {\bf Figure \ref{SemPrps}} and further ones.

470:

471:

472: In that way, a state of computation is represented by a language

473: $L{\bf(G)}$\/ generated by some grammar {\bf G} of the described

474: form. If for $A\in{\EuScript N} ~:~ \linebreak A\DERIV{+}{G}t_1

475: ~\wedge~ A\DERIV{+}{G}t_2$, i.e. $t_{1}=t_{2}\in L$({\bf G}), we

476: say that the nonterminal $A$\/ and the language $L$({\bf G}) know

477: the terms $t_{1},~t_{2}$. Obviously, such a grammar representation

478: has some superfluous ``syntactic sugaring''. We can use

479: $S\rightarrow A$\/ rules only and say about $A$--nonterminals as

480: classes of equal values.

481:

482:

483: \begin{figure}%[ht]

484: \centering

485: \unitlength=0.75mm

486: \linethickness{0.4pt}

487: \begin{picture}(170,61.50)

488: \put(15.00,52.50){\makebox(0,0)[cc]{Equality}}

489: \put(15.00,47.00){\makebox(0,0)[cc]{relationships}}

490: \put(82.00,52.50){\makebox(0,0)[cc]{Grammar}}

491: \put(82.00,47.00){\makebox(0,0)[cc]{representation}}

492: \put(148.00,52.50){\makebox(0,0)[cc]{Functional net}}

493: \put(148.00,47.00){\makebox(0,0)[cc]{representation}}

494: \put(0,8){\shortstack[l]{$a=h(y)$\\$x=f(a,y)$\\

495:                           $x=f(h(y),y)$\\$f(a,y)=f(h(y),y)$}}

496: \put(52,8){\shortstack[l]{$S\rightarrow A_1=A_1|A_2=A_2|A_3=A_3$\\

497:                            $A_1\rightarrow x|f(A_2,A_3)$\\

498:                            $A_2\rightarrow a|h(A_3)$\\

499:                            $A_3\rightarrow y$}}

500: \put(148.50,36.50){\oval(27.00,10.00)[]}

501: \put(140.00,34.00){\framebox(5.00,5.00)[cc]{$x$}}

502: \put(151.00,34.00){\framebox(5.00,5.00)[cc]{$f$}}

503: \put(153.00,34.00){\vector(-3,-4){13.67}}

504: \put(139.50,10.50){\oval(27.00,10.00)[]}

505: \put(131.00,8.00){\framebox(5.00,5.00)[cc]{$a$}}

506: \put(142.00,8.00){\framebox(5.00,5.00)[cc]{$h$}}

507: \put(166.00,10.50){\circle{11.0}}

508: \put(163.50,8.00){\framebox(5.00,5.00)[cc]{$y$}}

509: \put(154.00,34.00){\vector(2,-3){12.0}}

510: \put(147.00,10.00){\vector(1,0){13.50}}

511: \end{picture}

512: \caption{Semantic properties and their representations. The set of equalities

513: does not contain trivial equalities like $x=x$\/ and equalities given by

514: the symmetry of the equality relationship.}\label{SemPrps}

515: \end{figure}

516:

517: Evidently we may suppose that the set of rules ${\EuScript P}$\/

518: does not contain rules having identical right parts. It is

519: convenient to consider the grammars which do not contain useless

520: and redundant nonterminals and rules. A rule is useless if it

521: produces only one term (the language knows only a trivial equality

522: like $t=t$) and this term is not an argument of other terms. A

523: nonterminal is useless if it does not participate in derivations

524: of sentential forms \cite{AU/72}. Such grammars can arise as

525: result of operations on grammars. If nonterminals and rules are

526: useless or redundant it is possible to remove them (see {\bf Lemma

527: \ref{DeleteTerms}}). This operation called {\bf state reduction}

528: consist in detecting and removing a set of useless/redundant

529: nonterminals/rules. They are revealed with the well--known

530: incremental markup algorithms (see, for example, \cite{AU/72}).

531:

532:

533: For illustrating purposes we shall use special functional nets for

534: these grammars. Here nonterminals are represented by ovals

535: containing $0$--ary and functional symbols from right parts of

536: rules. Arcs from functional symbols to ovals represent argument

537: dependencies ordered from left to right (see {\bf Figure

538: \ref{SemPrps}}).

539:

540:

541:

542:

543: \subsection{Abstract properties}\label{PAbstract}

544:

545: It is an interesting peculiarity of \ERA\/ that abstract (i.e.

546: approximate) properties have the same nature as the computation

547: states of the operational semantics. Formally this approximation

548: is defined as follows.

549:

550:

551: \subsubsection{Functions of abstraction and concretization}

552:

553: Given a concrete property $\pi\in\PPSEQS$\/ and an abstract

554: property $\tilde{\pi}\in\PSEQS$, the abstraction function

555: $\alpha:\PPSEQS\rightarrow\PSEQS$\/ and the concretization one

556: $\gamma:\PSEQS\rightarrow\PPSEQS$\/ are defined in the following

557: way

558: \[

559: \begin{array}{lcl}

560: \alpha(\pi)~=~\left\{

561: \begin{array}{lcl}

562: \EQS,              & ~ & \mbox{if } \pi = \emptyset,\\

563: \mbox{$\displaystyle\bigcap_{S\in\pi}$}S

564:                    &   & \mbox{otherwise}

565: \end{array}

566: \right.& \mbox{~~and~~} &

567: \gamma(\tilde{\pi}) = \CCUP\{~\pi~\vert~\alpha(\pi)~\AINCL~\tilde{\pi}~\}\\

568: \end{array}

569: \]

570: where \CCUP\/ is the set--theoretical union on \PPSEQS\/ and

571: \AINCL\/ and \ACAP\/ are the set--theoretical inclusion and

572: intersection of the languages (i.e. on \PSEQS) respectively. We

573: take the empty language as infimum \ABOT\/ (there are no computed

574: expressions) of the semi-lattice of abstract semantic properties.

575: The supremum \ATOP\/ (an inaccessible computation state) is the

576: language containing all possible equalities of program terms. Also

577: \CINCL\/ is the set--theoretical inclusion on \PPSEQS\/

578:

579:

580: \begin{lem}

581: The abstraction function $\alpha$\/ is monotonic.

582: \end{lem}

583:

584: \begin{pf}

585: The function $\alpha$\/ is monotonic iff $\forall

586: \pi_1,\pi_2\in\PPSEQS : \pi_1\CINCL\pi_2 \Rightarrow \linebreak

587: \alpha(\pi_1)\AINCL\alpha(\pi_2)$. Because $\pi_1\CINCL\pi_2$,

588: then $\pi_2=\pi_1\CCUP(\pi_2\setminus\pi_1)$. So, we have

589: $\alpha(\pi_1)\AINCL \linebreak

590: \alpha(\pi_1\CCUP(\pi_2\setminus\pi_1))=

591: \alpha(\pi_1)\cap\alpha(\pi_2\setminus\pi_1)=\alpha(\pi_1)\cap(\cap_{S\in\pi_2\setminus\pi_1}S)$.

592: %\qed

593: \end{pf}

594:

595: \noindent $\gamma(\tilde{\pi})$\/ is the most imprecise element of \PPSEQS\/

596: that can be soundly approximated by $\tilde{\pi}\in\PSEQS$.

597: % Because for each abstract property $\gamma$\/ gives a unique set of languages

598: % all of which contain a sublanguage corresponding to this abstract property as a common

599: % part and not more we {\bf\large can} conclude that \ERA\/ provides the Galois surjection.

600: For the example of {\bf Table \ref{Collecting}}, the best approximation

601: of the concrete property {\large$\pi$} is

602: \[

603: \alpha(\pi)=\left\{\rule{0pt}{5mm}a[1]=i=j,a[2]=2\right\}.~~~~~~~~~~~\mbox{{\bf$(*)$}}

604: \]

605:

606:

607: %

608: %{\Huge Why semi-lattice and not complete lattice?}

609: %

610:

611: \subsubsection{Intersection of \ERA--languages}

612:

613: Finding of the intersection of context--free languages is an

614: undecidable problem in the general case. In our case, for the

615: languages of term equalities, an algorithm exists (see an example

616: at {\bf Fig. \ref{Intersection}}). It is similar to constructing a

617: Cartesian product of automata.

618:

619: \vskip 0.75\baselineskip

620: \noindent{\bf Algorithm.} Intersection of two languages of term equalities.\\

621: Input:  grammars  ${\bf G_1}=({\EuScript N}_1,{\EuScript T},{\EuScript P}_1,S_1)$

622:         and ${\bf G_2}=({\EuScript N}_2,{\EuScript T},{\EuScript P}_2,S_2)$.\\

623: Output: grammar ${\bf G}=({\EuScript N},{\EuScript T},{\EuScript

624: P},S)$ such that

625:         $L({\bf G})=L({\bf G_1})\cap L({\bf G_2})$.\\

626: Description:

627: \begin{enumerate}

628: \item Let ${\EuScript N}=\{ \langle N_1,N_2\rangle \vert%

629:           N_1\in{\EuScript N}_1~\&~N_2\in{\EuScript N}_2 \}={\EuScript N}_1\times{\EuScript N}_2$.

630:

631: \item The set of rules ${\EuScript P}$ is defined as follows:

632:       \begin{itemize}

633:       \item The rule $\langle N_1,N_2\rangle\rightarrow t$ is introduced

634:             if and only if

635:             $t\in\CV~~\&~~N_1\rightarrow t\in{\EuScript P}_1~~\&~~

636:                           N_2\rightarrow t\in{\EuScript P}_2$.

637:       \item The rule $\langle N_1,N_2\rangle\rightarrow t(%

638:             \langle N_1^1,N_2^1\rangle,\ldots,\langle N_1^k,N_2^k\rangle)$

639:             is introduced if and only if

640:             $t\in\FP~~\&~~N_1\rightarrow t(N_1^1,\ldots,N_1^k)\in{\EuScript P}_1

641:                     ~~\&~~N_2\rightarrow t(N_2^1,\ldots,N_2^k)\in{\EuScript P}_2$.

642:       \end{itemize}

643:

644: \item Add rules $S\rightarrow\langle N_1,N_2\rangle=\langle N_1,N_2\rangle$

645:       for the initial nonterminal $S$ of {\bf G} and for all

646:       $N_1\in{\EuScript N}_1\setminus\{S_1\}, N_2\in{\EuScript N}_2\setminus\{S_2\}$.

647:

648: \item Apply {\em state reduction}.

649: \end{enumerate}

650:

651:

652: \begin{figure}[t]

653: {\small

654: \centering

655: \unitlength=0.70mm

656: %\special{em:linewidth 0.4pt}

657: \linethickness{0.4pt}

658: \begin{picture}(150.00,103.00)

659: \put(26.50,95.50){\oval(25.00,9.00)[]}

660: \put(19.00,93.00){\framebox(5.00,5.00)[cc]{$f$}}

661: \put(29.00,93.00){\framebox(5.00,5.00)[cc]{$g$}}

662: \put(11.50,70.50){\oval(25.00,9.00)[]}

663: \put(4.00,68.00){\framebox(5.00,5.00)[cc]{$z$}}

664: \put(14.00,68.00){\framebox(5.00,5.00)[cc]{$f$}}

665: \put(34.00,68.00){\framebox(5.00,5.00)[cc]{$x$}}

666: \put(44.00,68.00){\framebox(5.00,5.00)[cc]{$a$}}

667: \put(54.00,68.00){\framebox(5.00,5.00)[cc]{$y$}}

668: \put(46.50,70.50){\oval(35.00,9.00)[]}

669: \put(30.00,93.00){\vector(-2,-3){12.00}}

670: \put(21.00,93.00){\vector(-2,-3){12.00}}

671: \put(33.00,93.00){\vector(3,-4){13.67}}

672: \put(32.00,61.50){\oval(30.00,3.00)[b]}

673: \put(17.00,68.00){\line(0,-1){7.00}}

674: \put(47.00,61.00){\vector(0,1){5.00}}

675: \put(103.50,95.50){\oval(25.00,9.00)[]}

676: \put(96.00,93.00){\framebox(5.00,5.00)[cc]{$f$}}

677: \put(106.00,93.00){\framebox(5.00,5.00)[cc]{$g$}}

678: \put(88.50,70.50){\oval(25.00,9.00)[]}

679: \put(81.00,68.00){\framebox(5.00,5.00)[cc]{$x$}}

680: \put(91.00,68.00){\framebox(5.00,5.00)[cc]{$f$}}

681: \put(111.00,68.00){\framebox(5.00,5.00)[cc]{$f$}}

682: \put(121.00,68.00){\framebox(5.00,5.00)[cc]{$a$}}

683: \put(131.00,68.00){\framebox(5.00,5.00)[cc]{$y$}}

684: \put(107.00,93.00){\vector(-2,-3){12.00}}

685: \put(98.00,93.00){\vector(-2,-3){12.00}}

686: \put(110.00,93.00){\vector(3,-4){13.50}}

687: \put(141.00,68.00){\framebox(5.00,5.00)[cc]{$z$}}

688: \put(128.50,70.50){\oval(45.00,9.00)[]}

689: \put(111.00,70.00){\vector(-1,0){10.00}}

690: \put(111.50,61.50){\oval(35.00,3.00)[b]}

691: \put(94.00,68.00){\line(0,-1){7.00}}

692: \put(129.00,61.00){\vector(0,1){5.00}}

693: \put(129.00,86.00){\makebox(0,0)[cc]{$L_{2}$}}

694: \put(51.00,86.00){\makebox(0,0)[cc]{$L_{1}$}}

695: \put(44.50,47.50){\oval(25.00,9.00)[]}

696: \put(37.00,45.00){\framebox(5.00,5.00)[cc]{$f$}}

697: \put(47.00,45.00){\framebox(5.00,5.00)[cc]{$g$}}

698: \put(48.00,45.00){\vector(-2,-3){12.00}}

699: \put(39.00,45.00){\vector(-2,-3){12.00}}

700: \put(51.00,45.00){\vector(3,-4){13.67}}

701: \put(29.00,20.00){\framebox(5.00,5.00)[cc]{$f$}}

702: \put(31.50,22.50){\oval(19.00,9.00)[]}

703: \put(64.50,22.50){\oval(25.00,9.00)[]}

704: \put(57.00,20.00){\framebox(5.00,5.00)[cc]{$a$}}

705: \put(67.00,20.00){\framebox(5.00,5.00)[cc]{$y$}}

706: \put(48.50,13.50){\oval(33.00,3.00)[b]}

707: \put(65.00,13.00){\vector(0,1){5.00}}

708: \put(32.00,20.00){\line(0,-1){7.00}}

709: \put(98.50,47.50){\oval(25.00,9.00)[]}

710: \put(91.00,45.00){\framebox(5.00,5.00)[cc]{$f$}}

711: \put(101.00,45.00){\framebox(5.00,5.00)[cc]{$z$}}

712: \put(96.00,20.00){\framebox(5.00,5.00)[cc]{$x$}}

713: \put(98.50,22.50){\oval(19.00,9.00)[]}

714: \put(94.00,45.00){\vector(1,-4){4.50}}

715: \put(133.00,36.00){\makebox(0,0)[cc]{$L_{1}\ACAP L_{2}$}}

716: \end{picture}

717: \caption{Intersection of computation states.}\label{Intersection}

718: }

719: \end{figure}

720:

721:

722:

723: The described algorithm of intersection is very ``na\"{\i}ve'' and

724: impractical. To improve it we should choose a more efficient

725: strategy for generating functional symbols. To do this we first do

726: a topological sorting of the functional symbols appearing in the

727: right parts of rules; intersect the $0$--ary symbol sets; and then

728: generate the next functional symbol in conformity with its

729: topological order if and only if all arguments of this symbol

730: already exist in the new grammar. For practical cases this

731: intersection can be done in an linear average time with respect to

732: grammar size and linear space. It demands quadratic time in the

733: worst case.

734:

735:

736:

737:

738:

739:

740:

741: \subsection{Operations over semantic properties}\label{OpBasic}

742:

743: Now we shall discuss some basic operations over semantic properties \linebreak

744: $\SOP{}{\centerdot}:\PSEQS\rightarrow\PSEQS$\/ used to define the semantic

745: transformers of \ERA. For all \SOP{}{\centerdot}~ the notation

746: \SOP{\SOP{L}{\centerdot}}{\centerdot} means

747: \SOP{(\SOP{L}{\centerdot})}{\centerdot}.

748:

749:

750: \subsubsection{Removing terms}

751:

752: Operations over abstract computation states use certain common

753: transformation of the sets of term equalities which consists in {\bf removing}

754: some subset $L^\prime$. The following statement holds.

755:

756: \begin{lem}\label{DeleteTerms}

757: Removing any subset of term equalities preserves  correctness of an

758: approximation.

759: \end{lem}

760:

761: \begin{pf}

762: It easy to see that

763: \[

764: \begin{array}{lcl}

765: \CCUP\{~\pi~\vert~\alpha(\pi)~\AINCL~\tilde{\pi}~\}=\gamma(\tilde{\pi})

766: &~\CINCL~&\gamma(\tilde{\pi}\setminus L^\prime)=\CCUP\{~\pi~\vert~\alpha(\pi)~\AINCL~(\tilde{\pi}\setminus L)~\}=\\

767: &&\gammaspc\CCUP\{~\pi~\vert~\mbox{$\displaystyle\bigcap_{L\in\pi}$}(L\ACUP

768:                              L^\prime)~\AINCL~\tilde{\pi}~\},

769: \end{array}

770: \]

771: which states that removing term equalities makes the approximation more rough

772: but it does preserve its correctness.

773: %~\qed

774: \end{pf}

775:

776: \noindent For (${\fontsize{17}{17}\selectfont*}$), for example,

777: $\gamma(\{\rule{0pt}{5mm}a[1]=i=j,a[2]=2\})\CINCL \gamma(\{a[1]=i\})$.

778:

779: We shall write \SOP{L}{\downarrow t} and \SOP{L}{\downarrow T} for

780: single term and term set removing followed by the state reduction

781: operation defined above.

782:

783:

784:

785:

786: \subsubsection{Term evaluation}\label{Term-evaluation}

787:

788: We define the abstract semantics for an {\bf evaluation of

789: a term} in an abstract computation state. The result of the

790: term evaluation is a state knowing the evaluated term.

791:

792: \vskip\baselineskip

793: \noindent{\bf Term evaluation} \SOP{L}{t}.

794:

795: \begin{enumerate}

796: \item If $t=t\in L$, then $\SOP{L}{t}=L$.

797: \item Otherwise, if $t\in\CV$, then add the new rules

798:       $S\rightarrow A=A$ and $A\rightarrow t$\/ to the

799:       grammar ${\bf G}$, where $A$\/ is a nonterminal

800:       which does not exist in ${\bf G}$.

801: \item Otherwise, if $t=f(t_{1},\ldots,t_{n})$\/ where

802:       $f\in\FP$\/ is a functional $n$--ary symbol and

803:       the sub-terms $t_{1},\ldots,t_{n}$\/ have been calculated

804:       (i.e. there exist derivations

805:       $A_1\DERIV{+}{G}t_1,\ldots,A_n\DERIV{+}{G}t_n$),

806:       then add the new rules $S\rightarrow A=A$\/ and

807:       $A\rightarrow f(A_1,\ldots,A_n)$\/ to the grammar

808:       ${\bf G}$, where $A$\/ is a nonterminal which

809:       still does not exist in ${\bf G}$.

810: \end{enumerate}

811: To improve an accuracy of analysis we can take into account, for example, the

812: commutativity of primitive operations.

813: %\footnote{This observation is not expanding to the real computations}.

814: If $L$\/ knows $f(t_1,t_2)$\/ and $f$\/ is commutative then $\SOP{L}{f(t_2,t_1)}=L$.

815:

816: \begin{figure}[t]

817: \centering

818: {\small

819: \unitlength=0.85mm

820: \linethickness{0.4pt}

821: \begin{picture}(118.00,70.00)

822: \put(12.50,15.00){\oval(25.00,10.00)[]}

823: \put(4.00,12.00){\framebox(6.00,6.00)[cc]{$y$}}

824: \put(15.00,12.00){\framebox(6.00,6.00)[cc]{$a$}}

825: \put(12.50,40.00){\oval(25.00,10.00)[]}

826: \put(4.00,37.00){\framebox(6.00,6.00)[cc]{$x$}}

827: \put(15.00,37.00){\framebox(6.00,6.00)[cc]{$h$}}

828: \put(18.00,37.00){\vector(-1,-3){5.67}}

829: \put(82.50,15.00){\oval(25.00,10.00)[]}

830: \put(74.00,12.00){\framebox(6.00,6.00)[cc]{$y$}}

831: \put(85.00,12.00){\framebox(6.00,6.00)[cc]{$a$}}

832: \put(82.50,40.00){\oval(25.00,10.00)[]}

833: \put(74.00,37.00){\framebox(6.00,6.00)[cc]{$x$}}

834: \put(85.00,37.00){\framebox(6.00,6.00)[cc]{$h$}}

835: \put(88.00,37.00){\vector(-1,-3){5.67}}

836: \put(79.33,62.00){\framebox(6.00,6.00)[cc]{$f$}}

837: \put(52.00,37.00){\framebox(6.00,6.00)[cc]{$g$}}

838: \put(107.00,37.00){\framebox(6.00,6.00)[cc]{$f$}}

839: \put(107.00,12.00){\framebox(6.00,6.00)[cc]{$b$}}

840: \put(81.00,62.00){\vector(-3,-2){25.00}}

841: \put(83.67,62.00){\vector(3,-2){25.67}}

842: \put(99.50,33.33){\oval(19.00,6.67)[b]}

843: \put(109.00,37.00){\line(0,-1){4.00}}

844: \put(90.00,33.00){\vector(0,1){2.00}}

845: \put(111.00,37.00){\vector(0,-1){17.00}}

846: \put(65.83,33.33){\oval(19.00,6.67)[b]}

847: \put(56.33,37.00){\line(0,-1){4.00}}

848: \put(75.33,33.00){\vector(0,1){2.00}}

849: \put(57.17,18.33){\oval(7.67,6.67)[lb]}

850: \put(57.00,15.00){\vector(1,0){13.00}}

851: \put(53.33,37.00){\line(0,-1){19.00}}

852: \put(12.33,0.00){\makebox(0,0)[cc]{$L$}}

853: \put(82.33,0.00){\makebox(0,0)[cc]{$\SOP{L}{f(g(a,x),f(h(y),b))}$}}

854: \put(110.00,15.00){\oval(16.00,10.00)[]}

855: \put(110.00,40.00){\oval(16.00,10.00)[]}

856: \put(82.33,65.00){\oval(16.00,10.00)[]}

857: \put(55.00,40.00){\oval(16.00,10.00)[]}

858: \end{picture}

859: \caption{Term evaluation.}\label{TermEval}

860: }

861: \end{figure}

862:

863:

864: \subsubsection{Identification of terms}

865:

866: %{\bf Identification of terms}\/ corresponds to a situation when, during execution

867: %of a program, it turns out that the values of computed expressions represented

868: %by the terms become equal.

869:

870: When the standard semantics defines that during program execution

871: values of computed expressions are equal, we can incorporate this

872: information in the computation state (but it can also be left

873: out). {\bf Identification of terms}\/ transforms the state into a

874: new one incorporating this information. For example,  we know that

875: a value of a term representing the conditional expression of an

876: {\bf IF}-statement coincides with 0-ary terms representing the

877: constants {\bf TRUE} or {\bf FALSE} when, respectively, {\bf

878: THEN}-branch or {\bf ELSE}-branch is being executed. So,

879: identification of terms along with {\em semantic completion}

880: considered below provides powerful facilities to take into account

881: real control flow in programs.

882:

883: \vskip\baselineskip

884: \noindent{\bf Identification of terms} \SOP{L}{t_1\equiv t_2}.

885:

886: \begin{enumerate}

887: \item If $t_1=t_2\in L$ then $\SOP{L}{t_1\equiv t_2}=L$.

888:

889: \item Let $A_{1}\DERIV{+}{G}t_1$\/ and $A_2\DERIV{+}{G}t_2$.

890:       We replace the nonterminal $A_2$\/ by the nonterminal

891:       $A_1$\/ in all rules of ${\EuScript P}$.

892:       If rules with an identical right side

893:       $B_1\rightarrow w,\ldots,B_k\rightarrow w$ have appeared, then

894:       a certain nonterminal from the left sides of the rules (for example $B_1$)

895:       must be taken and all nonterminals $B_2,\ldots,B_k$\/

896:       in the grammar must be replaced by it.

897:

898: \item Repeat step 2 until stabilization.

899:       If after that we have a state $L^\prime$\/ containing inconsistent

900:       term equalities\footnote{There exists a wide spectrum of

901:       inconsistency conditions. The simplest of them is an

902:       equality of two different constants (see {\bf Section

903:       \ref{Semantic-completion}}

904:       for the further discussion).} then the result is \ATOP\/

905:       else it is a reduction of~$L^\prime$.

906: \end{enumerate}

907:

908: \noindent An example of identification is given in {\bf Figure \ref{Identification}}.

909:

910: \begin{figure}%[ht]

911: \centering

912: {%\small

913: \unitlength=0.85mm

914: %\special{em:linewidth 0.4pt}

915: \linethickness{0.4pt}

916: \begin{picture}(140.00,70.00)

917: \put(19.00,59.00){\framebox(6.0,6.0)[cc]{$g$}}

918: \put(29.00,59.00){\framebox(6.0,6.0)[cc]{$a$}}

919: \put(27.00,62.00){\oval(24.00,12.00)[]}

920: \put(9.00,40.00){\framebox(6.0,6.0)[cc]{$f$}}

921: \put(12.00,43.00){\circle{12.00}}

922: \put(27.00,40.00){\framebox(6.0,6.0)[cc]{$g$}}

923: \put(37.00,40.00){\framebox(6.0,6.0)[cc]{$f$}}

924: \put(35.00,43.00){\oval(24.00,12.00)[]}

925: \put(21.00,59.00){\vector(-3,-4){7.33}}

926: \put(23.00,59.00){\vector(1,-1){10.00}}

927: \put(27.00,43.00){\vector(-1,0){8.50}}

928: \put(9.00,21.00){\framebox(6.0,6.0)[cc]{$x$}}

929: \put(12.00,24.00){\circle{12.00}}

930: \put(32.00,21.00){\framebox(6.0,6.0)[cc]{$b$}}

931: \put(35.00,24.00){\circle{12.00}}

932: \put(12.00,40.00){\vector(0,-1){10.00}}

933: \put(30.00,40.00){\vector(1,-3){3.33}}

934: \put(40.00,40.00){\vector(-1,-3){3.33}}

935: \put(89.00,59.00){\framebox(6.0,6.0)[cc]{$g$}}

936: \put(99.00,59.00){\framebox(6.0,6.0)[cc]{$a$}}

937: \put(97.00,62.00){\oval(24.00,12.00)[]}

938: \put(89.00,40.00){\framebox(6.0,6.0)[cc]{$g$}}

939: \put(99.00,40.00){\framebox(6.0,6.0)[cc]{$f$}}

940: \put(97.00,43.00){\oval(24.00,12.00)[]}

941: \put(91.00,59.00){\vector(0,-1){10.00}}

942: \put(93.00,59.00){\vector(3,-4){7.33}}

943: \put(89.00,21.00){\framebox(6.0,6.0)[cc]{$x$}}

944: \put(99.00,21.00){\framebox(6.0,6.0)[cc]{$b$}}

945: \put(97.00,24.00){\oval(24.00,12.00)[]}

946: \put(93.00,40.00){\vector(0,-1){10.00}}

947: \put(102.00,40.00){\vector(0,-1){10.00}}

948: \put(23.00,11.00){\makebox(0,0)[cc]{$L$}}

949: \put(97.00,11.00){\makebox(0,0)[cc]{\SOP{L}{x\equiv b}}}

950: \put(84.00,37.00){\oval(14.00,8.00)[b]}

951: \put(79.50,39.50){\oval(5.00,7.00)[lt]}

952: \put(77.00,40.00){\line(0,-1){4.00}}

953: \put(91.00,40.00){\line(0,-1){4.00}}

954: \put(79.00,43.00){\vector(1,0){6.00}}

955: \end{picture}

956: \caption{Identification of values of terms.}\label{Identification}

957: }

958: \end{figure}

959:

960:

961: \begin{lem}

962: Identification of values of terms is a correct transformation and

963: the resulting state is unique.

964: %

965: %More formal definition involving \alpha

966: %

967: \end{lem}

968:

969: \begin{pf}

970:

971: Let $\pi=\{~L_i~\vert~L_i\in\PSEQS~\}$\/ be a concrete semantic

972: property which holds before identification of terms $t_1$\/ and

973: $t_2$. If the values of $t_1$\/ and $t_2$\/ are equal in the

974: concrete semantics $\forall L_i ~:~ t_1=t_2\in L_i$, then they are

975: equal in the abstract semantics $t_1=t_2\in\alpha(\pi)$, too. If

976: their values are not equal, then identification gives us an

977: inconsistent computation state which obviously includes

978: \SOP{L}{t_1\equiv t_2}\/ for all $t_1$\/ and $t_2$. So, this

979: transformation is correct.

980:

981: Identification is done in finite steps because the size of

982: grammar decreases at each step. Uniqueness of the resulting state

983: is explained by the following observation. If we have two pairs

984: of terms which are candidates for identification, then

985: identification of one of them does not close a possibility of it

986: for another, because we remove a duplication of the functional

987: symbols only. In fact, after identification of a pair of terms we

988: obtain a new state, including the source one, and thus other

989: existing identification possibilities remain. So, the order of

990: ``merging'' of term pairs is not important for the resulting

991: state.

992: %~\qed

993: \end{pf}

994:

995:

996:

997: \subsubsection{Semantic completion}\label{Semantic-completion}

998:

999: We have not yet considered any interpretation of constants and

1000: functional symbols. We could continue developing \ERA\/ in the

1001: same way. As a result we shall obtain a noninterpretational

1002: version of the analysis likewise analysis algorithms in the

1003: program scheme theory. However, it is natural to use semantics of

1004: primitive operations of the programming language of interest in

1005: order to achieve better accuracy.

1006:

1007: \ERA\/ provides us with possibilities of taking into account

1008: properties of language constructs, and, what is especially

1009: important, we can easily handle complexity of these manipulations.

1010: In fact, inclusion of these properties corresponds to carrying out

1011: some finite part of completion of the computation states by

1012: consistent equalities. This manipulation is called {\bf semantic

1013: completion} (about ``conjunctive/disjunctive completion'' see

1014: \cite{CC/92a}).

1015:

1016:

1017:

1018: \begin{figure}%[ht]

1019: \centering

1020: {%\small

1021: \unitlength=0.85mm

1022: \linethickness{0.4pt}

1023: \begin{picture}(131.33,60.00)

1024: \put(12.50,45.00){\oval(25.00,10.00)[]}

1025: \put(4.00,42.00){\framebox(6.00,6.00)[cc]{$x$}}

1026: \put(15.00,42.00){\framebox(6.00,6.00)[cc]{$+$}}

1027: \put(52.50,45.00){\oval(25.00,10.00)[]}

1028: \put(44.00,42.00){\framebox(6.00,6.00)[cc]{$y$}}

1029: \put(55.00,42.00){\framebox(6.00,6.00)[cc]{{\large$\ast$}}}

1030: \put(33.33,37.67){\oval(27.33,5.33)[b]}

1031: \put(47.00,37.67){\vector(0,1){2.50}}

1032: \put(19.67,42.00){\line(0,-1){4.67}}

1033: \put(13.67,15.00){\framebox(6.00,6.00)[cc]{$z$}}

1034: \put(16.33,42.00){\vector(0,-1){18.50}}

1035: \put(16.67,18.17){\oval(17.33,10.33)[]}

1036: \put(57.00,42.00){\vector(-4,-3){31.50}}

1037: \put(53.33,15.00){\framebox(6.00,6.00)[cc]{$0$}}

1038: \put(56.33,18.17){\oval(17.33,10.33)[]}

1039: \put(58.67,42.00){\vector(0,-1){18.50}}

1040: \put(34.33,5.00){\makebox(0,0)[cc]{$L$}}

1041: %%

1042: \put(119.67,45.00){\oval(34.00,10.00)[]}

1043: \put(106.67,42.00){\framebox(6.00,6.00)[cc]{$+$}}

1044: \put(126.67,42.00){\framebox(6.00,6.00)[cc]{$z$}}

1045: \put(116.67,42.00){\framebox(6.00,6.00)[cc]{$x$}}

1046: \put(119.78,18.00){\oval(34.00,10.00)[]}

1047: \put(106.78,15.00){\framebox(6.00,6.00)[cc]{$y$}}

1048: \put(126.78,15.00){\framebox(6.00,6.00)[cc]{{\large$\ast$}}}

1049: \put(116.78,15.00){\framebox(6.00,6.00)[cc]{$0$}}

1050: \put(103.83,53.00){\oval(9.67,8.33)[t]}

1051: \put(103.83,37.50){\oval(9.67,8.33)[b]}

1052: \put(99.00,37.50){\line(0,1){15.67}}

1053: \put(108.65,42.00){\line(0,-1){5.00}}

1054: \put(108.67,54.0){\vector(0,-1){4.00}}

1055: \put(111.00,42.00){\vector(1,-3){6.3}}

1056: \put(136.00,11.50){\oval(10.67,9.00)[b]}

1057: \put(138.50,42.00){\oval(5.67,6.67)[rt]}

1058: \put(141.33,42.00){\line(0,-1){31.67}}

1059: \put(138.5,45.40){\vector(-1,0){1.67}}

1060: \put(130.67,15.00){\line(0,-1){4.33}}

1061: \put(124.17,10.33){\oval(9.67,6.67)[b]}

1062: \put(129.00,15.00){\line(0,-1){5.33}}

1063: \put(119.33,9.67){\vector(0,1){3.40}}

1064: \put(119.33,0.00){\makebox(0,0)[cc]{{\bf Co}($L$)}}

1065: \end{picture}

1066: \caption{Semantic completion.}\label{C-Cosure}

1067: }

1068: \end{figure}

1069:

1070:

1071: As a basic version of {\bf semantic completion} {\bf Co} we take

1072: computations over constant and equal arguments.

1073: %

1074: %and computations involving ``{\em additive}~{\bf 0}/{\em multiplicative}~{\bf 1}''

1075: %properties of primitive operations of the language.

1076: %

1077: When we detect that some term $t$\/ has some specific value $v$\/

1078: then {\bf Co}($L$)$=$\SOP{L}{t\equiv v}. Also, it is possible to

1079: apply identification involving dependencies among result of an

1080: operation and its arguments: if $(t_1$ AND $t_2)=TRU\!E\in L$\/

1081: then \linebreak {\bf Co}($L$)$=$\SOP{\SOP{L}{t_1\equiv

1082: TRU\!E}}{t_2\equiv TRU\!E} etc. This identification process is

1083: iterative because new possibilities for identification can appear

1084: at the next steps. It is conceivable that in doing so we shall

1085: detect inconsistency of a computation state. In this case result

1086: of semantic completion is \ATOP.

1087:

1088: This version can be extended by some intelligent theorem prover

1089: inferring new reasonable equalities and checking inconsistency of

1090: computation states. Such combining of analysis with proofs offers

1091: powerful facilities to the analyzer (see

1092: \cite{HeintzeJafVoi00,CousotCousot-CAV02}) and, as it was

1093: mentioned in the previous works, this prover is reusable for

1094: consequent (semi-)automatic processing of results of the analysis.

1095: ``Size'' of used completion can be tuned by options of

1096: interpretability of the analyzer.

1097:

1098: Some arithmetical errors (such as division by zero, out of type

1099: range etc.) will appear during the semantic completion. In this

1100: case the analyzer tells us about the error and sets the current

1101: computation state to \ATOP. Notice that for the languages where

1102: incomplete Boolean evaluation is admissible semantic completion

1103: over Boolean expressions should be carefully designed especially

1104: in presence of pointers.

1105:

1106: %The following example demonstrates a potential problem:

1107: %\begin{center}

1108: %\OPR{(p$\neq$NIL) AND (p\DEREF.f=a)}.

1109: %\end{center}

1110:

1111: An example of semantic completion is presented in {\bf Figure

1112: \ref{C-Cosure}}. Turning back to the identification example at

1113: {\bf Figure \ref{Identification}}, we consider the following

1114: interpretation of constants and functional symbols: $g$\/ is the

1115: exclusive disjunction, $f$\/ is the negation, $a$\/ is the

1116: constant {\bf TRUE} and $b$\/ is the constant {\bf FALSE}. It is

1117: easy to see that in this case application of semantic completion

1118: gives us $\SOP{L}{x\equiv b}=\ATOP$.

1119:

1120: In our analyzer we implemented an interpretational version of

1121: \ERA\/ which uses semantic completion \Co{$L$}. Under this

1122: approach, the definitions of the basic transformations mentioned

1123: above are chan\-ged to the following:

1124: \[

1125: \begin{array}{l}

1126: \SOP{L}{t}^\prime=\Co{\SOP{L}{t}},\\

1127: \SOP{L}{t_1\equiv t_2}^\prime=\Co{\SOP{L}{t_1\equiv t_2}}.

1128: \end{array}

1129: \]

1130: In short, we shall omit this ``interpretability'' prime.

1131:

1132:

1133:

1134: \section{Semantic transformers}\label{STransformers}

1135:

1136: In this section we describe semantic transformers

1137: $\SEM{}{\centerdot}:\PSEQS\rightarrow\PSEQS$\/ corresponding to

1138: common statements existing in imperative programming languages. If

1139: for a statement {\bf S} an input computation state is $L$\/ then

1140: \SEM{L}{S} is its output computation state. For all

1141: \SEM{}{\centerdot}~ the notation

1142: \SEM{\SEM{L}{\centerdot}}{\centerdot} means

1143: \SEM{(\SEM{L}{\centerdot})}{\centerdot}.

1144:

1145:

1146: \subsection{Assignment statement}

1147:

1148: Among all program terms considered in \ERA\/ we can pick out {\em

1149: access program terms} including array {\em elm}, record {\em fld},

1150: and pointer {\em val} referencing and playing an important role in

1151: determination of effect of an assignment statement. As in

1152: \cite{Deu/94,Venet/99} our abstraction of program memory

1153: manipulations is storeless and based on notion of memory access

1154: paths represented by access program terms. For example, for an

1155: address expression {\bf bar[i][j]\^{}.foo} the access term is

1156: $\mbox{\em fld}(\mbox{\em val}(\mbox{\em elm}(\mbox{\em

1157: elm}(bar,i),j)),foo)$.

1158:

1159: We shall assume that no operations other than memory addressing

1160: (for example comparisons) are allowed for structured variables

1161: such as arrays and records. So, for the previous example neither

1162: $a$\/ and $\mbox{\em elm}(a,i)$ ({\bf a[i]}) nor \linebreak

1163: $\mbox{\em val}(\mbox{\em elm}(\mbox{\em elm}(a,i),j))$ ({\bf

1164: a[i][j]\^{}}) can appear as arguments for operations other than

1165: {\em elm} and {\em fld} respectively. This limitation allows us to

1166: simplify definition of our assignment statement

1167: abstraction%

1168: \footnote{Otherwise we have either to accept that each assignment

1169: to some structured variables destroys all equalities involving

1170: other components of it or to implement some strategy (for example

1171: copying) preserving  useful and safe access terms.}.

1172:

1173: To preserve safety of the analysis we have to take into account

1174: memory aliasing appearing in programs. Two access terms are alias

1175: if they address the same memory location. In the general case

1176: \ERA\/ is inadequate itself to handle precisely all kinds of

1177: aliasing  and we should use other analyses. Next it is assumed

1178: that for each access term $t_a$\/ we know a set \ACC{t_a} of

1179: access terms covering a set of aliases for $t_a$\/ (may--alias

1180: information about $t_a$). Let

1181: \[\overline{\ACC{t_a,L({\bf G})}}=

1182: (\ACC{t_a}\cap\CV)\cup\{f\in\FP~\vert~ A\DERIV{+}{G}f(\dots) ~\wedge~ f(\dots)\!\in\!\ACC{t_a}\}

1183: \]

1184: (``roots'' of memory access terms in \ACC{t_a}). Notice that

1185: flow--insensitive approximations of alias information may cause

1186: conservative results of \ERA. Therefore \ERA\/ and alias analyses

1187: used in its implementation should have the same sensitivity to the

1188: control flow.

1189:

1190:

1191: \vskip\baselineskip

1192: \noindent{\bf Assignment statement} \SEM{L}{v:=exp}.

1193:

1194: \begin{enumerate}

1195: \item In the state $L$\/ evaluate $exp$ using evaluation transformer formally defined

1196:       earlier in {\bf Section \ref{Term-evaluation}}. Let $L^\prime$\/ be a result of the evaluation

1197:       and let $E$\/ be a nonterminal such that $E\DERIV{}{}~exp$.

1198: \item Perform \SOP{L^\prime}{\downarrow \overline{\ACC{v,L^\prime}}}. Do not remove $E$.

1199: \item Add the term $v$\/ so that $E\DERIV{}{}~v$.

1200: \end{enumerate}

1201:

1202:

1203: Unfortunately, in some cases this abstraction of the assignment

1204: statement fails as before. For example, this assignment

1205: transformer corresponding to {\bf x:=x+1} and being applied to the

1206: state $L=\{(x>0)=TRU\!E\}$\/ gives only the trivial identity

1207: $\SEM{L}{x:=x+1}=\{x=x\}$. To improve accuracy of the analysis in

1208: these cases we can consider ``artificial'' variables associated

1209: with scalar variables of the program which will store previous

1210: values of the original ones. Under this approach between first and

1211: second steps of the assignment statement effect definition we

1212: should insert the step

1213: \begin{description}

1214: \item[\dots~] Let $A\rightarrow v$\/ and $B\rightarrow v^\prime$\/ where $v^\prime$\/

1215: is associated with $v$. Remove the second rule and add $A\rightarrow v^\prime$\/

1216: if it is needed.

1217: \end{description}

1218: Under this approach we shall have $\SEM{L}{x:=x+1}=\{(x^\prime>0)=TRU\!E,x=x^\prime+1\}$\/

1219: from where we can deduce that $x>0$\/ also.

1220: %Such tracking of previous values of program variables is useful

1221:

1222:

1223: \subsection{Other transformers}

1224:

1225: \begin{itemize}

1226:

1227: \item {\it Program}\/.

1228:

1229: Given a program

1230:

1231: {\bf

1232: \begin{tabular}{ll}

1233: PROGRAM;    &                     \\

1234: ~VAR~~x : T;& $(*$ variables $*)$ \\

1235: BEGIN       &                     \\

1236: ~S          & $(*$ statements $*)$\\

1237: END.

1238: \end{tabular}

1239: }

1240:

1241: \noindent we can define the following transformer corresponding to

1242: it

1243:

1244: \[

1245: \SEM{\ABOT}{PROGRAM}=\SEM{\SEM{\ABOT}{x:=\omega}}{S}

1246: \]

1247: where $\omega$\/ represents the indefinite value. Notice that

1248: $\omega$\/ is not a constant.

1249:

1250:

1251:

1252: \item {\it Empty statement}\/.

1253: \[

1254: \SEM{L}{~} = L

1255: \]

1256:

1257:

1258: \item {\it Sequence of statements}\/.

1259: \[

1260: \SEM{L}{S_{1}; S_{2}} = \SEM{\SEM{L}{S_{1}}}{S_{2}}

1261: \]

1262:

1263: \item {\it Read statement}\/.

1264: \[

1265: \SEM{L}{ READ(x) } = \SOP{\SOP{L}{x}}{\downarrow\overline{\ACC{x}}}

1266: \]

1267: Notice that if for {\em read statement} as well as for other statements

1268: some set of user's pre-- or post--assertions represented in the form of

1269: equalities of program terms is supplied then the analyzer can take

1270: them into consideration to check consistency and to include in the current

1271: computation state.

1272:

1273:

1274: \item {\it Write statement}\/.

1275: \[

1276: \SEM{L}{ W\!RITE(x) } = \SOP{L}{x}

1277: \]

1278:

1279:

1280:

1281: \item {\it Conditional statement}\/.

1282: \[

1283: {\bf IF~~p~~THEN~~~S_{t}~~~ELSE~~~S_{f}~~~END}.

1284: \]

1285: If $L^\prime=\SOP{L}{p}$\/ then

1286: %\newpage

1287: \[

1288: \SEM{L}{IF} = \SEM{\SOP{L^\prime}{p\equiv T\!RU\!E}}{S_{t}}\ACAP

1289:               \SEM{\SOP{L^\prime}{p\equiv F\!ALSE}}{S_{f}}.

1290: \]

1291:

1292:

1293: \item {\it Cycle statement}\/.

1294:

1295:

1296: {\bf

1297: \begin{tabular}{lll}

1298: CYCLE  &     &                        \\

1299:        &  S  & $(*$ body of cycle $*)$\\

1300: END    &     &                        \\

1301: \end{tabular}

1302: }

1303:

1304: where {\bf S} is a composed statement that possibly contains occurrences of

1305: exit-of-cycle statements {\bf EXIT$_k$}. When the sequence

1306: \[

1307: L_{0}=L\mbox{,~~}L_{n} = \SEM{L_{n-1}}{S}\mbox{~~for~~} n > 0

1308: \]

1309: becomes stabilize

1310: \[

1311: \SEM{L}{CYCLE} = \ACAP_{k}E_{k}

1312: \]

1313: where $E_{k}$\/ is a stationary entry state for ${\bf EXIT_{k}}$.

1314: If this process does not become stabilize then some widening

1315: operator should be used (see Section \ref{WideningOp}).

1316:

1317:

1318:

1319: \item {\it Halt, exit and return statements}\/.

1320: \[

1321: \SEM{L}{EXIT}~=~\SEM{L}{RETURN}~=~\ATOP

1322: \]

1323:

1324:

1325:

1326:

1327: \item {\it Call of function}\/. We assume that return of results

1328: of function calls is implemented as an assignment to variables

1329: having the same names as invoked functions (connection with their

1330: call sites should be taken into account). The function bodies may

1331: contain {\bf RETURN} statements as well.

1332:

1333: {\bf

1334: \begin{tabular}{ll}

1335: FUNCTION F(x: T$_1$) : T;  &           \\

1336: ~~VAR~~y: T$_2$;           & $(*$ local variables $*)$ \\

1337: BEGIN                      &                     \\

1338: ~~S                        & $(*$ statements $*)$\\

1339: END.

1340: \end{tabular}

1341: }

1342: \[

1343: \SEM{L}{FUNCTION}=\SEM{\SEM{L}{x:=e;y:=\omega;F:=\omega}}{S}\ACAP

1344: R

1345: \] where $e$\/ is a factual parameter of the function (see the

1346: note for {\bf READ} statement) and $R=\ACAP R_k$\/ is intersection

1347: of stationary entry states for return statements in {\bf S} if

1348: they exist and $R=\ATOP$\/ otherwise. In the term being evaluated

1349: the result of a function call is represented by $F$.

1350:

1351: %Also, we

1352: %should take into account side effects of computations of the

1353: %function concerning modification of access terms that have been

1354: %evaluated before this function call in the same expression. In

1355: %context--sensitive versions of \ERA\/ different call sites of the

1356: %function $F$\/ can be distinguished by introducing supplementary

1357: %variables $F^{(i)}$\/ corresponding to these sites and

1358: %pseudo--assignments $F^{(i)}:=F:=e$\/ replacing the original ones.

1359:

1360:

1361:

1362: \begin{figure}[t]

1363: \centering {\small \unitlength=0.70mm

1364: %\special{em:linewidth 0.4pt}

1365: \linethickness{0.4pt}

1366: \begin{picture}(100.00,50.00)

1367: \put(10.00,30.00){\oval(22.00,12.00)[]}

1368: \put(3.0,27.90){\framebox(4.2,4.2)[cc]{$x$}}

1369: \put(12.80,27.90){\framebox(4.2,4.2)[cc]{$y$}}

1370: \put(10.00,20.00){\makebox(0,0)[cc]{$L$}}

1371: \put(75.00,40.00){\oval(30.00,12.00)[]}

1372: \put(65.0,37.90){\framebox(4.2,4.2)[cc]{$x$}}

1373: \put(72.90,37.90){\framebox(4.2,4.2)[cc]{\tiny $+$}}

1374: \put(81.20,37.90){\framebox(4.2,4.2)[cc]{\tiny $F$}}

1375: \put(74.00,37.90){\vector(-1,-1){12.00}}

1376: \put(76.00,37.90){\vector(1,-1){12.00}}

1377: \put(60.00,20.00){\circle{12.00}}

1378: \put(90.00,20.00){\circle{12.00}}

1379: \put(57.9,17.90){\framebox(4.2,4.2)[cc]{$y$}}

1380: \put(87.90,17.90){\framebox(4.2,4.2)[cc]{$1$}}

1381: \put(75.0,5.00){\makebox(0,0)[cc]{$\SEM{L}{x := F(x)}$}}

1382: \end{picture}

1383: \caption{Function call for $F(a)=a+1$.} }

1384: \end{figure}

1385:

1386:

1387:

1388:

1389: \end{itemize}

1390:

1391:

1392:

1393:

1394:

1395: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1396:

1397: %Let us consider an example of a system of semantic equations.

1398: %$L_i$\/ in comments are used to refer computation

1399: %states corresponding to these program points. The program is

1400: %

1401: %\begin{tabular}{lll}

1402: %&PROGRAM;\\

1403: %&VAR~ x: INTEGER;\\

1404: %&BEGIN\\

1405: %&\Z     READ(x);\\

1406: %(*$L_0$*)&\Z     CYCLE&\\

1407: %(*$L_1$*)&\Z\Z     IF~ c$_1$ ~THEN&\\

1408: %(*$L_2$*)&\Z\Z\Z     EXIT&(*$L_3$*)\\

1409: %         &\Z\Z     ELSE&\\

1410: %(*$L_4$*)&\Z\Z\Z     IF  c$_2$  THEN x:=e$_1$ ELSE x:=e$_2$ END&(*$L_5$*)\\

1411: %         &\Z\Z     END&(*$L_6$*)\\

1412: %         &\Z     END&(*$L_7$*)\\

1413: %         &END.&

1414: %\end{tabular}

1415:

1416: %\vskip\baselineskip

1417: %\noindent and the system of semantic equations built for is

1418:

1419: %\[

1420: %\begin{array}{ll}

1421: %L_0=\SEM{\ABOT}{READ(x)}\\

1422: %L_1=L_0\ACAP L_6 \\

1423: %L_2=\SEM{L_1}{c_1\!\equiv\!TRU\!E} \\

1424: %L_3=\ATOP \\

1425: %L_4=\SEM{L_1}{c_1\!\equiv\!FALSE} \\

1426: %L_5=\SEM{L_4}{IF~c_2~THEN~x\!:=\!e\mbox{$_1$}~ELSE~x\!:=\!e\mbox{$_2$}~END} \\

1427: %L_6=L_3\ACAP L_5 \\

1428: %L_7=L_2.

1429: %\end{array}

1430: %\]

1431:

1432: %\noindent If not all states are interested for some reasons we can simplify

1433: %this system into the following one:

1434:

1435: %\[

1436: %\begin{array}{l}

1437: %L_1=\SOP{\ABOT}{READ(x)}\ACAP L_5\\

1438: %L_4=\SOP{L_1}{c_1\equiv FALSE}\\

1439: %L_5=\SEM{\SOP{L_4}{c_2\equiv TRU\!E}}{x:=e\mbox{$_1$}}\ACAP

1440: %    \SEM{\SOP{L_4}{c_2\equiv FALSE}}{x:=e\mbox{$_2$}}\\

1441: %L_7=\SOP{L_1}{c_1\equiv TRU\!E}.\\

1442: %\end{array}

1443: %\]

1444:

1445:

1446:

1447: \section{Widening operator and convergence of the analysis}\label{WideningOp}

1448:

1449: Our abstract semantic domain does not satisfy the {\em

1450: (descending) chain condition} and therefore it requires some {\em

1451: widening operator} \cite{CC/77,CC/92b,CC/92a}. To guarantee the

1452: convergence of the abstract interpretation we should use a dual

1453: widening operator:

1454: \begin{itemize}

1455: \item $\forall x,y\in\PSEQS~~\Rightarrow~~

1456:       x~\AINCL~x\tilde{\bigtriangledown}y~~\&~~

1457:       y~\AINCL~x\tilde{\bigtriangledown}y$,

1458:

1459: \item for all decreasing chains $x_0\AINCL x_1\AINCL\ldots$,

1460:       the decreasing chains defined by

1461:       $y_0=x_0,\ldots,y_{i+1}=y_i\tilde{\bigtriangledown}x_{i+1},\dots$\/

1462:       are not strictly decreasing.

1463: \end{itemize}

1464: The iteration sequence with widening is convergent and

1465: its limit is a sound approximation of the fixpoint.

1466:

1467:

1468:

1469: \subsection{Widening operator for \ERA--grammars}

1470:

1471:

1472:

1473:

1474: Infinite chains can appear because corresponding languages have

1475: common infinite subsets generated by cyclic derivations in

1476: grammars. The source of that in \ERA\/ is {\bf term

1477: identification}. We can avoid this problem by imposing the

1478: constraint that grammars must be acyclic. Within the semilattice

1479: \ASDE, the subsemilattice of finite languages\footnote{Notice that

1480: sets of term equalities of a special form corresponding to these

1481: languages were used by V. Sabelfeld to develop effective

1482: algorithms of recognizing equivalence for some classes of program

1483: schemata \cite{Sa/79,Sa/80}.} generated by such grammars satisfies

1484: the chain condition, but such languages are not expressive enough.

1485: Our solution is the following. Grammars are not originally

1486: restricted but if in course of abstract interpretation the grammar

1487: size becomes greater than some parameter, then ``harmful'' cycles

1488: must be destroyed. To this end we remove grammar rules

1489: participating in cyclic derivations. Correctness of this

1490: approximation of intersection follows from {\bf Lemma

1491: \ref{DeleteTerms}}.

1492:

1493: %The lengths of such chains are bounded by $\vert{\bf G}_0\vert$\/ where ${\bf

1494: %G}_0$ are the first acyclic grammars in the chains under consideration.

1495:

1496: Detecting such rules is no simpler than the

1497: ``mi\-ni\-mum-\-feed\-back-arc/ver\-tex-set'' problem ({\bf MFAS}

1498: or {\bf MFVS}) if we consider the grammars as directed graphs.

1499: These sets are the smallest sets of arcs or vertices,

1500: respectively, whose removal makes a graph acyclic. We suppose that

1501: the ``feedback vertices'' choice is more natural for our purposes.

1502: In the general case this problem is ${\EuScript NP}$--hard, but

1503: there are approximate algorithms that solve this problem in

1504: polynomial \cite{Speckenmeyer/89} or even linear \cite{Rosen/82}

1505: time. Consideration of weighted digraphs makes it possible to

1506: distinguish grammar rules with respect to their worth for accuracy

1507: of the analysis algorithm. However, perspectives of this approach

1508: are not clear now for complexity/precision reasons of such

1509: algorithms. For example, \cite{ENSS/95} proposes an algorithm for

1510: weighted {\bf FVS}-problem requiring $O(n^2\mu(n)\log^2n)$\/ time

1511: where $\mu(n)$\/ is complexity of matrix multiplication.

1512:

1513: \begin{figure}[h]

1514: \centering

1515: \unitlength=0.825mm

1516: \linethickness{0.4pt}

1517: %\vskip-1cm

1518: \begin{picture}(160.00,80.00)

1519: %%%%%%%%%%%%%%%%%%%%%%%  PIC1.PIC  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1520: \put(12.50,45.00){\oval(25.00,10.00)[]}

1521: \put(5.00,42.00){\framebox(6.00,6.00)[cc]{$g$}}

1522: \put(15.00,42.00){\framebox(6.00,6.00)[cc]{$r$}}

1523: \put(44.00,42.00){\framebox(6.00,6.00)[cc]{$h$}}

1524: \put(54.00,42.00){\framebox(6.00,6.00)[cc]{$y$}}

1525: \put(64.00,42.00){\framebox(6.00,6.00)[cc]{$f$}}

1526: \put(57.00,45.00){\oval(34.00,10.00)[]}

1527: \put(41.80,38.33){\oval(47.60,6.67)[b]}

1528: \put(65.60,42.00){\line(0,-1){4.67}}

1529: \put(66.67,37.33){\line(0,1){0.00}}

1530: \put(18.00,39.75){\vector(0,1){0.20}}

1531: \put(18.00,37.33){\line(0,1){2.00}}

1532: \put(39.50,45.00){\vector(1,0){0.20}}

1533: \put(21.00,45.00){\line(1,0){18.33}}

1534: \put(29.50,15.00){\oval(25.00,10.00)[]}

1535: \put(22.00,12.00){\framebox(6.00,6.00)[cc]{$h$}}

1536: \put(32.00,12.00){\framebox(6.00,6.00)[cc]{$z$}}

1537: \put(28.50,20.33){\vector(3,-4){0.20}}

1538: \multiput(10.00,42.00)(0.12,-0.14){150}{\line(0,-1){0.14}}

1539: \put(21.00,20.33){\vector(2,-3){0.20}}

1540: \multiput(6.00,42.00)(0.12,-0.17){126}{\line(0,-1){0.17}}

1541: \put(35.33,20.33){\vector(-1,-2){0.20}}

1542: \multiput(47.00,42.00)(-0.12,-0.22){98}{\line(0,-1){0.22}}

1543: \put(14.49,11.80){\oval(21.00,13.00)[b]}

1544: \put(4.00,39.75){\vector(0,1){0.20}}

1545: \put(4.00,11.90){\line(0,1){27.67}}

1546: \put(24.00,72.00){\framebox(6.00,6.00)[cc]{$x$}}

1547: \put(34.00,72.00){\framebox(6.00,6.00)[cc]{$f$}}

1548: \put(44.00,72.00){\framebox(6.00,6.00)[cc]{$a$}}

1549: \put(37.00,75.00){\oval(34.00,10.00)[]}

1550: \put(75.00,38.33){\oval(12.00,6.67)[b]}

1551: \put(69.00,42.00){\line(0,-1){4.00}}

1552: \put(78.00,72.67){\oval(6.00,5.33)[rt]}

1553: \put(81.00,37.33){\line(0,1){35.67}}

1554: \put(78.00,75.33){\vector(-1,0){23.67}}

1555: \put(35.00,72.00){\vector(-1,-1){22.00}}

1556: \put(38.33,72.00){\vector(3,-4){16.25}}

1557: \put(40.00,0.00){\makebox(0,0)[cc]{{\small$L({\bf G})$}}}

1558: %%%%%%%%%%%%%%%%%%%%%%%  PIC2.PIC  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1559: \put(117.50,45.00){\oval(25.00,10.00)[]}

1560: \put(110.00,42.00){\framebox(6.00,6.00)[cc]{$g$}}

1561: \put(120.00,42.00){\framebox(6.00,6.00)[cc]{$r$}}

1562: \put(149.00,42.00){\framebox(6.00,6.00)[cc]{$y$}}

1563: \put(159.00,42.00){\framebox(6.00,6.00)[cc]{$h$}}

1564: \put(171.67,37.33){\line(0,1){0.00}}

1565: \put(144.50,45.00){\vector(1,0){0.20}}

1566: \put(126.00,45.00){\line(1,0){18.33}}

1567: \put(136.50,15.00){\oval(15.00,10.00)[]}

1568: \put(133.67,12.00){\framebox(6.00,6.00)[cc]{$z$}}

1569: \put(123.67,72.00){\framebox(6.00,6.00)[cc]{$x$}}

1570: \put(133.67,72.00){\framebox(6.00,6.00)[cc]{$f$}}

1571: \put(143.67,72.00){\framebox(6.00,6.00)[cc]{$a$}}

1572: \put(136.67,75.00){\oval(34.00,10.00)[]}

1573: \put(157.50,45.00){\oval(25.00,10.00)[]}

1574: \put(135.00,72.00){\vector(-3,-4){16.25}}

1575: \put(138.00,72.00){\vector(3,-4){16.25}}

1576: \put(111.67,42.00){\vector(3,-4){18.00}}

1577: \put(161.00,42.00){\vector(-3,-4){18.00}}

1578: \put(114.67,42.00){\vector(1,-1){22.00}}

1579: \put(136.67,0.00){\makebox(0,0)[cc]{{\small$L({\bf G})\setminus_{\bf fvs}$}}}

1580: \end{picture}

1581: %

1582: %%%%%%%%%%%%%%%%%%%%%%%  mfvs.eps  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1583: %

1584: %\vskip 2cm

1585: \epsfig{file=mfvs.eps,width=390pt}

1586: %\vskip 2cm

1587: \caption{{\bf FVS}--transformation.}\label{MFVS}

1588: \end{figure}

1589:

1590: Our widening operator for the analysis of equality relationships

1591: is defined in the following way. A vertex set of the cyclomatic

1592: graph\footnote{A graph represents cyclic derivations in the

1593: grammar. The author did not find another appropriate name for this

1594: object in the Computer Science and Discrete Mathematics

1595: literature.} of the grammar {\bf G} is a set of functional symbols

1596: existing in {\bf G}. An arc $(f,g)$\/ belongs to its arc set if

1597: {\bf G} contains rules $A\rightarrow f(\ldots,B,\ldots)$\/ and

1598: $B\rightarrow g(\ldots)$. A transformation of a cyclomatic graph

1599: which involves detecting some {\bf FVS} (it can be an upper

1600: approximation of a minimal feedback set) and removing all vertices

1601: from the {\bf FVS} is said to be an {\bf FVS}--transformation (an

1602: example is shown in {\bf Figure \ref{MFVS}}). Let \MFVS{L}\/ be a

1603: language obtained from $L$\/ by {\bf FVS}--transformation applied

1604: to the grammar generating $L$. We define

1605: \[

1606: L(\mbox{\bf G}_1)\DWID L(\mbox{\bf G}_2) =

1607: \left\{\begin{array}{ll}

1608:   \MFVS{L(\mbox{\bf G}_1)}~\ACAP~L(\mbox{\bf G}_2) &

1609:   \mbox{~if~~}\vert\mbox{\bf G}_2\vert\geq\vert\mbox{\bf G}_1\vert>d,\\

1610:   L(\mbox{\bf G}_1)~\ACAP~\MFVS{L(\mbox{\bf G}_2)} &

1611:   \mbox{~if~~}\vert\mbox{\bf G}_1\vert>\vert\mbox{\bf G}_2\vert>d,\\

1612:   L(\mbox{\bf G}_1)~\ACAP~L(\mbox{\bf G}_2)        & \mbox{~otherwise,}

1613: \end{array}\right.

1614: \]

1615: where $d$\/ is a user-defined parameter. It is reasonable to

1616: choose this parameter, depending on number of variables of

1617: analyzed programs, as a linear function with a small factor of

1618: proportionality. Notice that in this case the lengths of appearing

1619: chains linearly depend on number of variables living

1620: simultaneously.

1621:

1622:

1623: \subsection{Divergence of the analysis}

1624:

1625: Is the widening operator, being rather complex, really needed for

1626: the analysis of equality relationships? Are there programs which,

1627: being analyzed, generate infinite chains of semantic properties?

1628: It should be mentioned that constructing such program examples has

1629: been a problem for a long time. In \cite{Em/96} we stated our

1630: belief that their existence seems hardly probable. These attempts

1631: failed, because they concentrated on constructing an example with

1632: completely non-interpretable functional symbols, i.e. in the frame

1633: of the ``pure'' theory of program schemata.

1634:

1635: As already noticed, we can widely vary the interpretability of the

1636: analysis algorithm. In order to construct an example, it will

1637: suffice to use the following rule of completion:

1638: \[

1639: \mbox{if } (t_1=t_2)=TRU\!E\in L \mbox{ then {\bf Co}}(L)=\SOP{L}{t_1\equiv t_2}.

1640: \]

1641:

1642: We consider the following example:

1643:

1644: \vskip\baselineskip

1645: \begin{center}

1646: \hspace*{-7.5mm}\begin{tabular}{ccc}

1647: \begin{tabular}{l}

1648: %\setlength{\baselineskip}{1pt}

1649:      ...\\\vspace*{-2pt}

1650:      x:=f(y);\\\vspace*{-2pt}

1651:      \KW{if}~ f(x)=f(y) ~\KW{then}\\\vspace*{-2pt}

1652: ~~       \KW{while}~ y=f(g(y)) ~\KW{do}\\\vspace*{-2pt} %

1653: ~~~~     y:=g(y)\\\vspace*{-2pt} ~~%

1654:        \KW{end}\\\vspace*{-2pt}

1655:      \KW{end}\\\vspace*{-2pt}

1656:      ...\\

1657: \hspace*{6mm}{\bf program scheme}

1658: \end{tabular}

1659: &\hspace*{7.5mm}&

1660: \begin{tabular}{l}

1661:

1662:      ...\\\vspace*{-2pt}

1663:      x:=sign(y);\\\vspace*{-2pt}

1664:      \KW{if}~ sign(x)=sign(y) ~\KW{then}\\\vspace*{-2pt}

1665: ~~       \KW{while}~ y=sign(abs(y)) ~\KW{do}\\\vspace*{-2pt} %

1666: ~~~~     y:=abs(y)\\\vspace*{-2pt} ~~%

1667:          \KW{end}\\\vspace*{-2pt}

1668:      \KW{end}\\\vspace*{-2pt}

1669:     ...\\

1670: \hspace*{5mm}{\bf ``real-world'' program}

1671: \end{tabular}\\

1672: \end{tabular}

1673: \end{center}

1674: \vskip\baselineskip

1675:

1676:

1677: The properties computed at the body entry belong to an infinite

1678: decreasing chain. On {\bf Figure \ref{RealChain}} a state $L_e$\/

1679: describes properties valid before cycle execution; states $L_1$\/

1680: and $L_2$\/ describe properties at the entry of the cycle body for

1681: first and second iteration, respectively. It is easy to see that

1682: $L_1\ACAP L_2$\/ coincides with $L_2$\/ except in the equality

1683: relationships containing terms generated by $g^*$. Therefore every

1684: time we obtain the next state, the functional element $g^*$\/ is

1685: absent and the sub-net placed in the dashed box will repeat more

1686: and more times.

1687:

1688:

1689: \begin{figure}[t]

1690: \centering

1691: {%\small

1692: \unitlength=0.85mm

1693: %\special{em:linewidth 0.4pt}

1694: \linethickness{0.4pt}

1695: \begin{picture}(140,100.00)

1696: \put(4.00,81.00){\framebox(4.00,4.00)[cc]{$x$}}

1697: \put(10.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}

1698: \put(12.00,81.00){\vector(0,-1){13.00}}

1699: \put(16.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}

1700: \put(12.00,83.00){\oval(22.00,10.00)[]}

1701: \put(22.50,76.00){\oval(9.00,10.00)[b]}

1702: \put(22.50,90.00){\oval(9.00,9.00)[t]}

1703: \put(18.00,90.00){\vector(0,-1){2.00}}

1704: \put(27.00,90.00){\line(0,-1){14.00}}

1705: \put(18.00,81.00){\line(0,-1){5.00}}

1706: \put(10.00,61.00){\framebox(4.00,4.00)[cc]{$y$}}

1707: \put(12.00,63.00){\circle{10.00}}

1708: \put(12.00,50.00){\makebox(0,0)[cc]{$L_e$}}

1709:

1710: \put(54.00,81.00){\framebox(4.00,4.00)[cc]{$x$}}

1711: \put(60.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}

1712: \put(62.00,81.00){\vector(0,-1){13.00}}

1713: \put(66.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}

1714: \put(62.00,83.00){\oval(22.00,10.00)[]}

1715: \put(72.50,76.00){\oval(9.00,10.00)[b]}

1716: \put(72.50,90.00){\oval(9.00,9.00)[t]}

1717: \put(68.00,90.00){\vector(0,-1){2.00}}

1718: \put(77.00,90.00){\line(0,-1){14.00}}

1719: \put(68.00,81.00){\line(0,-1){5.00}}

1720: \put(62.00,63.00){\oval(18.00,10.00)[]}

1721: \put(56.00,61.00){\framebox(4.00,4.00)[cc]{$y$}}

1722: \put(63.00,61.00){\framebox(4.00,4.00)[cc]{$f$}}

1723: \put(65.00,61.00){\vector(0,-1){13.00}}

1724: \put(65.00,43.00){\circle{10.00}}

1725: \put(63.00,41.00){\framebox(4.00,4.00)[cc]{$g$}}

1726: \put(65.00,41.00){\line(0,-1){5.00}}

1727: \put(69.50,36.00){\oval(9.00,9.00)[b]}

1728: \put(74.00,36.00){\line(0,1){24.00}}

1729: \put(71.00,60.00){\oval(6.00,6.00)[rt]}

1730: \put(71.50,63.00){\vector(-1,0){0.75}}

1731: \put(62.00,29.00){\makebox(0,0)[cc]{$L_1$}}

1732:

1733: \put(114.00,83.00){\oval(22.00,10.00)[]}

1734: \put(106.00,81.00){\framebox(4.00,4.00)[cc]{$x$}}

1735: \put(112.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}

1736: \put(114.00,81.00){\vector(0,-1){13.00}}

1737: \put(118.00,81.00){\framebox(4.00,4.00)[cc]{$f$}}

1738: \put(124.50,76.00){\oval(9.00,10.00)[b]}

1739: \put(124.50,90.00){\oval(9.00,9.00)[t]}

1740: \put(120.00,90.00){\vector(0,-1){2.00}}

1741: \put(129.00,90.00){\line(0,-1){14.00}}

1742: \put(120.00,81.00){\line(0,-1){5.00}}

1743: \put(103.00,56.00){\dashbox(22.00,14.00){}}

1744: \put(112.00,61.00){\framebox(4.00,4.00)[cc]{$f$}}

1745: \put(114.00,63.00){\circle{10.00}}

1746: \put(114.00,61.00){\vector(0,-1){13.00}}

1747: \put(114.00,43.00){\oval(22.00,10.00)[]}

1748: \put(106.00,41,00){\framebox(4.00,4.00)[cc]{{\footnotesize$\!g^*$}}}

1749: \put(108.00,41.00){\line(0,-1){5.00}}

1750: \put(103.50,37.00){\oval(9.00,9.00)[b]}

1751: \put(99.00,37.00){\line(0,1){24.00}}

1752: \put(102.00,61.00){\oval(6.00,6.00)[lt]}

1753: \put(102.00,64.00){\vector(1,0){7.0}}

1754: \put(112.00,41.00){\framebox(4.00,4.00)[cc]{$f$}}

1755: \put(118.00,41.00){\framebox(4.00,4.00)[cc]{$y$}}

1756: \put(114.00,41.00){\vector(0,-1){13.00}}

1757: \put(114.00,23.00){\circle{10.00}}

1758: \put(112.00,21.00){\framebox(4.00,4.00)[cc]{$g$}}

1759: \put(114.00,21.00){\line(0,-1){5.00}}

1760: \put(121.25,16.00){\oval(14.50,9.00)[b]}

1761: \put(128.50,16.00){\line(0,1){24.00}}

1762: \put(125.50,40.00){\oval(6.00,6.00)[rt]}

1763: \put(125.50,43.00){\vector(-1,0){0.75}}

1764: \put(114.00,3.00){\makebox(0,0)[cc]{$L_2$}}

1765: \end{picture}

1766: \caption{The case of divergence of the analysis.}\label{RealChain} }

1767: \end{figure}

1768:

1769: To obtain a ``real-world'' program from this program scheme, we

1770: interpret the functional symbols in the following manner: $f\equiv

1771: sign$\/ and $g\equiv abs$. We would like to point out the

1772: following interesting property. Execution of this piece of code

1773: (i.e., its behavior determined with the standard semantics)

1774: diverges only for two values of \OPR{y}: 0 and 1. At the same time

1775: the analysis algorithm (i.e. execution of the piece of code under

1776: our nonstandard semantics) is always divergent on condition that a

1777: widening operator is not used and the assumption on interpretation

1778: mentioned above holds.

1779:

1780: Is this program actually real-world? The reader should decide that

1781: by himself but we would like to underline the

1782: following\footnote{The penetrating reader may notice that this

1783: program to be considered as human written is really stupid (it can

1784: be slightly intellectualized if we interpret $g$\/ as ``{\em add

1785: 1}''). But for automatic generators of programs such a code does

1786: not seem improbable.}. On the one hand, the interpretability of

1787: the analysis algorithm can be varied in wide ranges and, on the

1788: other, we are not able to prove formally the impossibility of such

1789:  behavior of the analyzer under the considered interpretation.

1790: So, we can choose either a lean analysis using acyclic grammars

1791: only or another one using arbitrary grammars and some widening

1792: operator.

1793:

1794:

1795: \subsection{Complexity of the analysis}\label{ComplexityERA}

1796:

1797: In \cite{Em/96} we pointed out the following upper bound on time

1798: for the algorithm of \ERA: $O(nmG^2_{max})$, where $n$\/ is

1799: program size, $m$\/ is maximum of number of program variables

1800: existing at a time, and $G_{max}$\/ is maximum of sizes of

1801: grammars appearing in course of the analysis. Due to our

1802: construction of widening operator (namely, choice of the parameter

1803: $d$\/ linearly depended on $m$) we can assume that $G_{max}=O(m)$.

1804:

1805:

1806: This bound can be deduced with help of {\bf Theorem 6} in

1807: \cite{Bou/93a}. The theorem states that for the recursive strategy

1808: of chaotic iteration maximum complexity is

1809: \[

1810: h\cdot\sum_{c\in C}\delta(c) \hspace*{10mm}

1811: \left(\strut\leq h\cdot\vert C\vert\cdot\vert W\vert\right),

1812: \lefteqn{\hspace*{2.5cm}{\bf(**)}}

1813: \]

1814: where $h$\/ is maximum length of increasing chains built by a

1815: widening operator, $C$\/ is set of control program points,

1816: $\delta(c)$\/ is depth of the control point $c$\/ in hierarchy of

1817: nested strongly connected components of the control flow graph

1818: containing $c$, and $W$\/ is set of vertices  where a widening

1819: operator is applied during the analysis.

1820:

1821: For well-structured programs we can assume that maximum depth of

1822: nested loops does not depend on program size and is bounded by

1823: some constant. By ${\bf(**)}$\/ we conclude that number of

1824: algorithm steps does not exceed $O(nm)$. Since time complexity of

1825: all operations used in the analysis are estimated by $G^2_{max}$\/

1826: we obtain $O(nmG^2_{max})$\/ (or $O(nm^3)$) upper bound. Notice

1827: that to improve the results of the analysis it is possible to use

1828: rich semantic completion and more precise {\bf FVS}--algorithms

1829: that have more than quadratic time complexity.

1830:

1831: However, experimental results show that an approximation of a

1832: fixed point for the heads of cycle bodies is usually attained

1833: after at most two iterations and time complexity of the analysis

1834: is proportional to $nG_{max}$. Also, the user can turn off

1835: checking a threshold after which widening is started. In this

1836: case, he (consciously) admits some chance that the analysis

1837: diverges but we believe that this chance is not too big.

1838:

1839: It is easy to see that the space complexity of the equality relationship

1840: analysis is $O(nG_{max})$\/ and it is  essentially depended

1841: on the number of variables.

1842: %Unfortunately, this bound does not provide useful information about the actual

1843: %space requirements for the analysis algorithm.

1844: We estimate the actual space requirements as 1.5--2.0 Mb

1845: per 1000 program lines for middle-size programs.

1846: %and emphasize that it

1847: %comes into particular importance for virtual memory systems. Random management

1848: %of swapping makes impracticable analyzing large programs in small RAM.

1849:

1850:

1851:

1852: \section{Processing of invariants and experimental results}\label{Experiments}

1853:

1854:

1855: \subsection{Usage of \ERA--invariants}

1856:

1857: \ERA\/ produces some set of invariants involving program terms

1858: which can be useful at different steps of program development and

1859: processing: debugging, verification (for that the invariants are

1860: interesting themselves), specialization, and optimization. The

1861: automatic prover mentioned above can be used at step of

1862: post-processing results of the analysis.

1863:

1864: We notice that the analyzer can tell the user useful information both at the stage of

1865: analyzing (this means that it is possible that there exist execution traces where

1866: such computational states appear; we mark with $+$\/ properties which can be detected

1867: at this stage) and at the stage of processing of results of the analysis

1868: (these properties hold for each execution trace leading to this program point).

1869: We shall briefly list some program properties that can be extracted from computation

1870: states $L_{in}$\/ and $L_{out}$\/ being for a statement {\bf S} the input and output

1871: states respectively.

1872:

1873:

1874: \begin{description}

1875: \item[$+$] {\em Variable $x$\/ has an indefinite value}\/

1876:            if $L_{in}$\/ contains $x=\omega$.

1877:

1878:            %If $x$\/ is a variable corresponding

1879:            %to a function then existence of such an identity in entry states for

1880:            %return statements of the function or the output state for the function body

1881:            %says that a result of evaluation of the function may be undefined.

1882:

1883: \item[$+$] {\em Error in evaluation of an expression}: division by zero,

1884:            out of type ranges, nil--pointer dereferencing, etc.

1885:

1886: \item[$-$] {\em {\bf S} is inaccessible} if $L_{in}=\ATOP$. This information

1887:            can correspond to different properties of program execution:

1888:            potentially infinite cycles and recursive calls, dead branches of

1889:            conditional statements, useless procedural definitions, etc.

1890:

1891: \item[$-$] {\em Assignment statement {\bf v:=exp} is redundant} if

1892:            $L_{out}$\/ contains $v=v^\prime$\/ where $v^\prime$\/ is the

1893:            variable associated with $v$.

1894:

1895: \item[$-$] {\em Unused definitions} (constants, variables, types).

1896:

1897: \item[$-$] {\em Constant propagation}. Notice that \ERA\/ can detect that an expression

1898:            is constant not only when constants for all variables in this expression

1899:            are known.

1900:

1901: \item[$-$] More general: for some expression {\em there exists an expression that is

1902:            equal to the original one and calculated more efficiently}\/

1903:            (with respect to an given criterion

1904:            time/space and target computer architecture).

1905: \end{description}

1906: Obviously, this list is not complete and there are many other properties

1907: which can be extracted from the invariants. For example, we can consider

1908: systems of equations/inequalities contained in the gathered invariants

1909: and try to solve them to derive more precise ranges for values of expressions

1910: or the inconsistency of this computational state.

1911:

1912: Apart from the automatic mode when invariants are processed

1913: automatically, we provide an interactive mode to visualize results

1914: of our analysis in a hypertext system. {\em HyperCode} presented

1915: in \cite{BuKo/98,BBEF/2001} is an open tunable system for

1916: visualization of properties of syntactic structures. There are two

1917: cases: visualization of all properties detected in the automatic

1918: mode and the user-driven processing and visualization of

1919: properties.

1920:

1921: The experiments show that not all program properties of interest

1922: can be automatically extracted out of the computed invariants. It

1923: is not judicious to consider many particular cases and to hardly

1924: embed them into the system. Instead, the system facilitates the

1925: specification of the user request with some friendly interface. He

1926: chooses a program point and an expression and obtains those and

1927: only those equality relationships, valid at this point, where this

1928: expression occurs as a super- or sub-term.

1929: %The following strategies of selection of the subsets of properties are provided:

1930: %{\bf a)} all terms occurring as sub-terms of the term under consideration; {\bf b)}

1931: %all terms using the term under consideration as a sub-term; {\bf c)} the union of sets

1932: %determined by the previous cases {\bf a)} and {\bf b)}. These problems are

1933: %obviously reduced to computing a transitive closure of the corresponding

1934: %digraph with respect to some vertex and orientation of arcs.

1935:

1936:

1937:

1938: \subsection{Program examples}

1939:

1940:

1941: An example of a program is presented below. The properties detected by the

1942: analyzer are indicated in comments.

1943:

1944: \vskip\baselineskip

1945: \begin{tabular}{ll}

1946: \KW{var}  x,y,z: \KW{integer}; &\\\vspace*{-2pt}

1947: \KW{proce}\lefteqn{\mbox{\rm\KW{dure} P(a,b:

1948: \KW{integer}):\KW{integer};}}&\\\vspace*{-2pt} \KW{begin}

1949: &(*parameters are always equal*)\\\vspace*{-2pt} ~~  \KW{return}

1950: a+b                           &(*expression can be simplified:

1951: {\bf 2*a}*)\\\vspace*{-2pt} \KW{end}  P; &\\\vspace*{-2pt}

1952: \KW{begin} &\\\vspace*{-2pt} ~~   Read(x); &\\\vspace*{-2pt} ~~

1953: \KW{while}~  x$\leq$0  ~\KW{do} &\\\vspace*{-2pt} ~~~~   Read(x);

1954: &\\\vspace*{-2pt} ~~~~   x := x+1; &\\\vspace*{-2pt} ~~~~   z :=

1955: x+z; &(*variable {\bf z} might be uninitialized*)\\\vspace*{-2pt}

1956: ~~~~ y := x+1;                         &\\\vspace*{-2pt} ~~~~

1957: \KW{if}~  x=0  ~\KW{then}                   &\\\vspace*{-2pt}

1958: ~~~~~~   z := y;                         &(*r-value can be

1959: simplified: {\bf z:=1}*)\\\vspace*{-2pt} ~~~~   \KW{else}

1960: &\\\vspace*{-2pt} ~~~~~~   z := x+1; &(*r-value can be simplified:

1961: {\bf z:=y}*)\\\vspace*{-2pt} ~~~~~~ x := y; &\\\vspace*{-2pt} ~~~~

1962: \KW{end}; &\\\vspace*{-2pt} ~~~~   Write( P(y,z) ) &(*call can be

1963: transformed: {\bf Write(2*y)}*)\\\vspace*{-2pt} ~~ \KW{end};

1964: &\\\vspace*{-2pt} ~~   x := z \KW{div} (y - z); &(*arithmetical

1965: error*)\\\vspace*{-2pt} ~~   Write(x) &(*inaccessible

1966: point*)\\\vspace*{-2pt}

1967: \KW{end.}\\

1968: \end{tabular}

1969: \vskip\baselineskip

1970:

1971: \noindent On basis of the analysis, this program can be

1972: transformed into the following:

1973:

1974: \vskip\baselineskip

1975: \begin{center}

1976: \begin{tabular}{l}

1977: \KW{var}  x:\KW{integer};\\\vspace*{-2pt}

1978: \KW{begin}\\\vspace*{-2pt} ~~   Read(x);\\\vspace*{-2pt} ~~

1979: \KW{while}~  x$\leq$0  ~\KW{do}\\\vspace*{-2pt} ~~~~   Read(x);~~

1980: x := x+2;~~   Write(2*x)\\\vspace*{-2pt} ~~

1981: \KW{end}\\\vspace*{-2pt} ~~ \KW{ERROR\_EXCEPTION};\\\vspace*{-2pt}

1982: \KW{end.}\\

1983: \end{tabular}

1984: \end{center}

1985: \vskip\baselineskip

1986:

1987: \begin{table}

1988: \begin{center}

1989: \begin{tabular}{|| l || c | c | c || c | c | c ||}

1990: \hline\hline

1991: \raisebox{-2ex}{program}

1992:           & \multicolumn{3}{c||}{length (lines)} & \multicolumn{3}{c||}{size (bytes)}\\

1993:             \cline{2-4}                           \cline{5-7}

1994:           & {\em M2Mix} & \ERA &  improv.        & {\em M2Mix} & \ERA & improv.\\

1995: \hline

1996: \bf KMP       &    167      & 133 &  20.35\%        &    2996     & 2205& 26.4\%\\

1997: \bf Lambert   &    361      & 326 &  9.7\%          &    6036     & 2564& 57.5\%\\

1998: \bf Automaton &    37       & 35  &  5.4\%          &    969      & 926 & 4.5\%\\

1999: \bf Int$_{Fib}$&   87       & 77  &  11.5\%         &    1647     & 1432& 13.05\%\\

2000: \bf Ackerman  &    64       & 62  &  3.1\%          &    1384     & 1322& 4.5\%\\

2001: \hline

2002:           & \multicolumn{2}{r|}{average}&10.01\%&\multicolumn{2}{r|}{average}&21.19\%\\

2003: \hline\hline

2004: \end{tabular}

2005: \caption{Comparison \ERA\/ and {\em

2006: M2Mix}.}\label{M2Mix-analysis}\label{TableCMP}

2007: \end{center}

2008: \end{table}

2009:

2010: In {\bf Table \ref{M2Mix-analysis}} we present some results of

2011: optimization based on our analysis of residual programs generated

2012: by {\em M2Mix} specializer \cite{BuKo/96,Ko/95:phd}. To compile

2013: these examples, we used  XDS Modula/Obe\-ron compiler v.2.30

2014: \cite{XDS/2000}. The following programs have been investigated:

2015:

2016: \begin{itemize}

2017: \item {\bf KMP}       --- the ``na{\"\i}ve'' matching algorithm

2018:                       specialized with respect to some pattern;

2019:                       the residual program is comparable to

2020:                       Knuth, Morris, and Pratt's algorithm in efficiency.

2021:                       (see also {\bf Appendix}).

2022:

2023: \item {\bf Lambert}   --- a program drawing Lambert's figure

2024:                       and specialized with respect to number of points.

2025:

2026: \item {\bf Automaton} --- an interpreter of a deterministic finite-state

2027:                       automaton specialized with respect to some language.

2028:

2029: \item {\bf Int$_{Fib}$} --- an interpreter of MixLan \cite{Ko/95:phd}

2030:                       specialized with respect to a program computing

2031:                       Fibonacci numbers.

2032:

2033: \item {\bf Ackerman}  --- a program computing some values of Akcerman's

2034:                       function and specialized with respect to its

2035:                       first argument.

2036: \end{itemize}

2037:

2038: Let us comment briefly on the obtained results. Reducing length of

2039: a program can be considered as reducing number of operators and

2040: declarations. In these examples the optimizing effect was

2041: typically attained by the removal of redundant assignments, dead

2042: operators, unused variables and the reduction of operator

2043: strength. The only exception is {\bf KMP} program characterized by

2044: high degree of polyvariance (roughly speaking it means presence of

2045: deep--nested conditional statements) and an active usage of array

2046: references. Here some IF--statements with constant conditions and

2047: redundant range checks were eliminated. Notice that the last

2048: optimizing transformation is very important for Modula--like

2049: languages where such checks are defined by the language standard.

2050: Such notable optimizing effect for the {\bf Lambert} program is

2051: explained by deep reduction of power of floating-point operations

2052: which cannot be achieved by optimizing techniques now used in

2053: compilers. Since {\bf Automaton} and {\bf Ackerman} programs are

2054: quite small, their optimization gives conservative results.

2055: However, they would be better for the {\bf Ackerman} program if

2056: the implementation of \ERA\/ were context-sensitive. Substantial

2057: speed-up of these optimized programs was not obtained (it was less

2058: than 2\%) and this is not surprising since the great bulk of

2059: specializers take it as a criterion of optimality.

2060:

2061: These experiments show that an average reduction of size of

2062: residual programs is 20--25\%. Because the case of {\bf KMP}

2063: program seems to be the most realistic\footnote{Unfortunately our

2064: experiments were not exhaustive enough since the partial

2065: evaluation is not involved yet in real technological process of

2066: the software development and hence finding large resudial programs

2067: is a hard problem.}, we suppose that such improvement can be

2068: achieved in practice for real--world programs and it will be

2069: increased for large residual programs with a

2070: high degree of polyvariance %(for example, deep hierarchy of nested conditional statements)

2071: and active usage of arrays and float-point arithmetics. It is the

2072: author opinion that the analysis of automatically generated (from

2073: high--level specifications as well) programs is the most promising

2074: direction of its application, especially in the context-sensitive

2075: implementation of \ERA.

2076:

2077:

2078:

2079: \section*{Acknowledgement} The author wishes to thank M.A.

2080: Bulyonkov, P. Cousot, R. Cousot, and V.K. Sabelfeld for support,

2081: useful discussions and remarks.

2082: %\end{acknowledgement}

2083:

2084:

2085:

2086:

2087: %\bibliography{era}

2088: %\bibliographystyle{plain}

2089:

2090:

2091:

2092: \begin{thebibliography}{10}

2093:

2094: \bibitem{AU/72}

2095: A.~Aho and J.~Ullman.

2096: \newblock {\em The theory of parsing, translation and compilation}, volume~1.

2097: \newblock Prentice-Hall Inc., 1972.

2098:

2099: \bibitem{AWZ/88}

2100: B.~Alpern, M.N. Wegman, and F.K. Zadeck.

2101: \newblock Detecting equality of variables in programs.

2102: \newblock In {\em Proc. of the 15$^{th}$ Annual ACM Symposium Principles of

2103:   Programming Languages}, pages 1--11. ACM Press, 1988.

2104:

2105: \bibitem{BBEF/2001}

2106: D.~Baburin, M.~Bulyonkov, P.~Emelianov, and N.~Filatkina.

2107: \newblock Visualization facilities in program reengineering.

2108: \newblock {\em Programmirovanie}, 27(2):21--33, 2001.

2109:

2110: \bibitem{BerMar/76}

2111: L.~Berman and G.~Markowsky.

2112: \newblock Linear and non-linear approximate invariants.

2113: \newblock Research Report RC7241, T.J. Watson Research Center of IBM, Yorktown,

2114:   NY, 1976.

2115:

2116: \bibitem{Bou/93a}

2117: F.~Bourdoncle.

2118: \newblock Efficient chaotic iteration strategies with widenings.

2119: \newblock In {\em Proc. of the International Conference on Formal Methods in

2120:   Programming and Their Applications}, volume 735 of {\em Lecture Notes in

2121:   Computer Science}, pages 129--141. Springer-Verlag, 1993.

2122:

2123: \bibitem{BuKo/96}

2124: M.~Bulyonkov and D.~Kochetov.

2125: \newblock Practical aspects of specialization of {A}lgol-like programs.

2126: \newblock In {\em Proc. of the International Seminar on Partial Evaluation},

2127:   volume 1110 of {\em Lecture Notes in Computer Science}, pages 17--32.

2128:   Springer-Verlag, 1996.

2129:

2130: \bibitem{BuKo/98}

2131: M.~Bulyonkov and D.~Kochetov.

2132: \newblock Visualization of program properties.

2133: \newblock Research Report~51, Institute of Informatics Systems, Novosibirsk,

2134:   Russia, 1998.

2135:

2136: \bibitem{ClarisoCortadella-SAS04}

2137: R.~Claris\'o and J.~Cortadella.

2138: \newblock The octahedron abstract domain.

2139: \newblock In {\em Proc. of the 11$^{th}$ International Static Analysis

2140:   Symposium}, volume 3148 of {\em Lecture Notes in Computer Science}, pages

2141:   312--327. Springer--Verlag, 2004.

2142:

2143: \bibitem{CC/77}

2144: P.~Cousot and R.~Cousot.

2145: \newblock Abstract interpretation: {A} unified lattice model for static

2146:   analysis of programs by construction or approximation of fixpoints.

2147: \newblock In {\em Rec. of the 4$^{th}$ ACM Symposium on Principles of

2148:   Programming Languages}, pages 238--252. ACM Press, 1977.

2149:

2150: \bibitem{CC/92b}

2151: P.~Cousot and R.~Cousot.

2152: \newblock Abstract interpretation and application to logic program.

2153: \newblock {\em Journal of Logic Programming}, 13(2--3):103--180, 1992.

2154:

2155: \bibitem{CC/92a}

2156: P.~Cousot and R.~Cousot.

2157: \newblock Abstract interpretation frameworks.

2158: \newblock {\em Journal of Logic and Computation}, 2(4):511--547, 1992.

2159:

2160: \bibitem{CC/95}

2161: P.~Cousot and R.~Cousot.

2162: \newblock Formal languages, grammar and set--constraint--based program analysis

2163:   by abstract interpretation.

2164: \newblock In {\em Rec. of the Conference on Functional Programming Languages

2165:   and Computer Architecture}, pages 170--181. ACM Press, 1995.

2166:

2167: \bibitem{CousotCousot-CAV02}

2168: P.~Cousot and R.~Cousot.

2169: \newblock Software analysis and model checking.

2170: \newblock In {\em Proceedings of the 14$^{th}$ International Conference on

2171:   Computer Aided Verification}, volume 2404 of {\em Lecture Notes in Computer

2172:   Science}, pages 37--56. Springer-Verlag, 2002.

2173:

2174: \bibitem{CouHal/78}

2175: P.~Cousot and N.~Halbwachs.

2176: \newblock Automatic discovery of linear restraints among variables of a

2177:   program.

2178: \newblock In {\em Records of the 5$^{th}$ annual {ACM} Symposium on Principles

2179:   of Programming Languages}, pages 84--96. ACM, ACM Press, January 1978.

2180:

2181: \bibitem{Deu/94}

2182: A.~Deutsch.

2183: \newblock Interprocedural may--alias analysis for pointers: beyond

2184:   $k$-limiting.

2185: \newblock {\em SIGPLAN Notices}, 29(6):230--241, 1994.

2186: \newblock Proc. of the ACM SIGPLAN'94 Conference on Program Language Design and

2187:   Implementation.

2188:

2189: \bibitem{SAS-4/97}

2190: P.G. Emelianov and D.E. Baburin.

2191: \newblock Semantic analyzer of {M}odula-programs.

2192: \newblock In {\em Proc. of the 4$^{th}$ International Static Analysis

2193:   Symposium}, volume 1302 of {\em Lecture Notes in Computer Science}, pages

2194:   361--363. Springer--Verlag, 1997.

2195:

2196: \bibitem{EmSa/94}

2197: P.G. Emelianov and V.K. Sabelfeld.

2198: \newblock Analyzer of semantic properties of {M}odula-programms.

2199: \newblock In {\em Software intellectualization and quality}, pages 100--107.

2200:   Institute of Informatics Systems, Novosibirsk, Russia, 1994.

2201:

2202: \bibitem{Em/96}

2203: P.G. Emelianov.

2204: \newblock Analysis of the equality relation for the program terms.

2205: \newblock In {\em Proc. of the Third International Static Analysis Symposium},

2206:   volume 1145 of {\em Lecture Notes in Computer Science}, pages 174--188.

2207:   Springer--Verlag, 1996.

2208:

2209: \bibitem{ENSS/95}

2210: G.~Even, J.~Naor, B.~Schieber, and M.~Sudan.

2211: \newblock Approximating minimum feedback sets and multi-cuts.

2212: \newblock In {\em Proc. of the 4$^{th}$ International Conference on Integer

2213:   Programming and Combinatorial Optimization}, volume 920 of {\em Lecture Notes

2214:   in Computer Science}, pages 14--28. Springer--Verlag, 1995.

2215:

2216: \bibitem{XDS/2000}

2217: {\rm Excelsior LLC}.

2218: \newblock Native xds-x86 modula-2/oberon-2 development toolset.

2219:

2220: \bibitem{Gargi-PLDI02}

2221: K.~Gargi.

2222: \newblock A sparse algorithm for predicated global value numbering.

2223: \newblock In {\em Proceedings of the ACM SIGPLAN 2002 Conference on Programming

2224:   Language Design and Implementation}, pages 45--56, 2002.

2225:

2226: \bibitem{HOOTS/98}

2227: A.D. Gordon and A.M. Pitts, editors.

2228: \newblock {\em High Order Operational Techniques in Semantics}.

2229: \newblock Publications of Newton Institute. Cambridge University Press, 1998.

2230:

2231: \bibitem{Gra/89}

2232: Ph. Granger.

2233: \newblock Static analysis of arithmetical congruences.

2234: \newblock {\em International Journal of Computer Mathematics}, 30:165--199,

2235:   1989.

2236:

2237: \bibitem{Gra/91b}

2238: Ph. Granger.

2239: \newblock Static analysis of linear congruence equalities among variables of a

2240:   program.

2241: \newblock In {\em Proceedings of the International Joint Conference on Theory

2242:   and Practice of Software Developement}, volume 493 of {\em Lecture Notes in

2243:   Computer Science}, pages 169--192. Springer--Verlag, 1991.

2244:

2245: \bibitem{GulwaniNecula-POPL03}

2246: S.~Gulwani and G.C. Necula.

2247: \newblock Discovering affine equalities using random interpretation.

2248: \newblock In {\em Proc. of the ACM SIGPLAN-SIGACT 2003 Principles of

2249:   Programming Languages}, pages 74--84, 2003.

2250:

2251: \bibitem{GulwaniNecula-SAS04}

2252: S.~Gulwani and G.C. Necula.

2253: \newblock A polynomial-time algorithm for global value numbering.

2254: \newblock In {\em Proc. of the 11$^{th}$ International Static Analysis

2255:   Symposium}, volume 3148 of {\em Lecture Notes in Computer Science}, pages

2256:   212--227. Springer--Verlag, 2004.

2257:

2258: \bibitem{HalbwachsProyRoumanoff/97}

2259: N.~Halbwachs, Y.-E. Proy, and P.~Roumanoff.

2260: \newblock Verification of real-time systems using linear relation analysis.

2261: \newblock {\em Formal Methods in System Design}, 11(2):157--185, 1997.

2262:

2263: \bibitem{HeintzeJafVoi00}

2264: Nevin Heintze, Joxan Jaffar, and R\u{a}zvan Voicu.

2265: \newblock A framework for combining analysis and verification.

2266: \newblock In {\em Records of the 27$^{th}$\/ Annual {ACM} Symposium on

2267:   Principles of Programming Languages}, pages 26--39, 2000.

2268:

2269: \bibitem{HeiJaf/94}

2270: N.~Heintze and J.~Jaffar.

2271: \newblock Set constraint and set--based program analysis.

2272: \newblock In A.~Borning, editor, {\em Principles and Practice of Constraint

2273:   Programming}, volume 874 of {\em Lecture Notes in Computer Science}, pages

2274:   281--298. Springer--Verlag, 1994.

2275:

2276: \bibitem{PFA/81}

2277: N.D. Jones and S.S. Muchnick, editors.

2278: \newblock {\em Program Flow Analysis: Theory and Applications}.

2279: \newblock Prentice-Hall, 1981.

2280:

2281: \bibitem{Jo/87}

2282: N.D. Jones.

2283: \newblock Flow analysis of lazy higher--order functional programs.

2284: \newblock In S.~Abramsky and C.~Hankin, editors, {\em Abstract Interpretation

2285:   of Declarative Languages}, pages 103--122. Ellis Horwood, 1987.

2286:

2287: \bibitem{Karr/76}

2288: M.~Karr.

2289: \newblock Affine relationships among variables of a program.

2290: \newblock {\em Acta Informatica}, 6:133--151, 1976.

2291:

2292: \bibitem{Ko/95:phd}

2293: D.V. Kochetov.

2294: \newblock {\em Effective specialization of {A}lgol--like programs}.

2295: \newblock {Ph.D. Thesis}, Institute of Informatics Systems, Novosibirsk,

2296:   Russia, 1995.

2297:

2298: \bibitem{Mas/92}

2299: F.~Masdupuis.

2300: \newblock Array operations abstraction using semantic analysis of trapezoid

2301:   congruences.

2302: \newblock In {\em Proceedings of the International Conference on

2303:   Supercomputing}, pages 226--235. ACM Press, 1992.

2304:

2305: \bibitem{Mas/93}

2306: F.~Masdupuis.

2307: \newblock Semantic analysis of interval congruences.

2308: \newblock In {\em Proceedings of the International Conference Formal Methods in

2309:   Programming and Their Applications}, volume 735 of {\em Lecture Notes in

2310:   Computer Science}, pages 142--155. Springer--Verlag, 1993.

2311:

2312: \bibitem{Mine/2001:pado}

2313: A.~Min{\'e}.

2314: \newblock A new numerical abstract domain based on difference-bound matrices.

2315: \newblock In {\em Proceedings of the Second Symposium on Programs as Data

2316:   Objects}, volume 2053, pages 155--169, 2001.

2317:

2318: \bibitem{Rosen/82}

2319: B.K. Rosen.

2320: \newblock Robust linear algorithms for cutsets.

2321: \newblock {\em Journal Algorithms}, 3:205--217, 1982.

2322:

2323: \bibitem{RuthingKnoopSteffen-SAS99}

2324: O.~R{\"u}thing, J.~Knoop, and B.~Steffen.

2325: \newblock Detecting equalities of variables: Combining efficiency with

2326:   precision.

2327: \newblock In {\em Proc. of the 6$^{th}$ International Static Analysis

2328:   Symposium}, volume 1694 of {\em Lecture Notes in Computer Science}, pages

2329:   232--247. Springer--Verlag, 1999.

2330:

2331: \bibitem{Sa/79}

2332: V.~Sabelfeld.

2333: \newblock Polynomial upper bound for the complexity of the logic-termal

2334:   equivalence decision.

2335: \newblock {\em Doklady Akademii Nauk, Matematika}, 249(4):793--796, 1979.

2336:

2337: \bibitem{Sa/80}

2338: V.~Sabelfeld.

2339: \newblock The logic--termal equivalence is polynomial--time decidable.

2340: \newblock {\em Information Processing Letters}, 10(2):102--112, 1980.

2341:

2342: \bibitem{Speckenmeyer/89}

2343: E.~Speckenmeyer.

2344: \newblock On feedback problems in digraphs.

2345: \newblock In {\em Proceedings of the 15$^{th}$ International Workshop on

2346:   Graph--Theoretic Concepts in Computer Science}, volume 411 of {\em Lecture

2347:   Notes in Computer Science}, pages 218--231. Springer--Verlag, 1990.

2348:

2349: \bibitem{Venet/99}

2350: A.~Venet.

2351: \newblock Automatic analysis of pointer aliasing for untyped programs.

2352: \newblock {\em Science of Computer Programming}, 35(2-3):223--248, 1999.

2353:

2354: \bibitem{WZ/91}

2355: M.~Wegman and F.K. Zadeck.

2356: \newblock Constant propagation with conditional branches.

2357: \newblock {\em ACM Transactions on Programming Language and Systems},

2358:   13(2):181--210, 1991.

2359:

2360: \end{thebibliography}

2361:

2362:

2363:

2364:

2365:

2366: %\end{document}

2367:

2368: \newpage

2369:

2370:

2371: \section*{Appendix. Analysis of KMP}

2372:

2373: The appendix presents results of application of \ERA\/ to {\bf

2374: KMP} program generated by the specializer {\em M2Mix}. This

2375: program is a specialization of a program implementing the

2376: na{\"\i}ve pattern matching {\bf Match(p,str)} with respect to the

2377: pattern {\bf p="ababb"}. The invariants are written as comments at

2378: program points where they hold.

2379: %

2380: %Let us consider the results of our analysis of the program \linebreak

2381: %{\bf Match("ababb",str)} written as comments at program points where

2382: %they hold. Only the most interesting of them are given.

2383: We can conclude that:

2384:

2385:

2386: \begin{itemize}

2387: \item The target string necessarily ends with "\#" and the variable

2388:       {\bf ls} is equal to the string length (line 10).

2389:

2390: \item Every time when some element of {\bf str}

2391:       (lines 20, 31, 42, 53, 58, 67, 72, 78, 86, 108, 113, 129, 140, 151, 162)

2392:       is used in second {\bf LOOP}, the value of its index

2393:       expression does not exceed the value of the variable {\bf ls}.

2394:       The same is true for the value of a variable before the

2395:       increment statements {\bf INC(s)}

2396:       (lines 23, 34, 45, 62, 81, 89, 94, 98, 103, 116, 120, 132, 143, 154, 165).

2397:       Therefore, it suffices to check that a value of {\bf ls}

2398:       is not beyond the ranges determined by the type {\bf \_TYPE354a04}

2399:       during input of the target string (line 8). So, in the second cycle

2400:       all range checks can be eliminated.

2401:

2402: \item The assignment {\bf \_cfg\_counter:=0} is redundant (line 24).

2403:

2404: \item Conditions {\bf str[s+2]='a'} and {\bf str[s]='a'} are always false

2405:       (lines 78 and 86, respectively) because two different constants are equal.

2406:       So, the code of {\bf THEN}--branches is dead.

2407:

2408: \item The conditions at the lines 63, 68, 74, 82, 104, 109 are false, too.

2409:       However, automatic detection of these properties are not as easily as the previous.

2410:

2411:

2412: \end{itemize}

2413:

2414:

2415: Using this semantic information, it is possible to build a new

2416: program functionally equivalent to {\bf Match("ababb",str)}. In

2417: text of the program given below the underlined code can be

2418: eliminated.

2419:

2420: \vskip\baselineskip

2421:

2422: \fontsize{8}{10}\selectfont

2423: {\setcounter{numline}{0}

2424: \begin{tabular}{ll}                                                          \vspace*{-1pt}

2425: MODULE Match;                               &\\

2426: \vspace*{-1pt} \Z  FROM FIO IMPORT

2427: File,Open,ReadChar,\lefteqn{\mbox{WriteInt,stdout;}}&\\

2428: \vspace*{-1pt}

2429:

2430: \Z  VAR  \_cfg\_counter : CARDINAL; str\_file : File;        &\\

2431: \vspace*{-1pt}

2432:

2433: \Z  TYPE \_TYPE354a04 = [0..20];            &\\

2434: \vspace*{-1pt}

2435:

2436: \Z  TYPE \_TYPE355004 = ARRAY \_TYPE354a04 \lefteqn{\mbox{OF

2437: CHAR;}}&\\      \vspace*{-1pt}

2438:

2439: \Z  VAR str : \_TYPE355004; ls,s : \_TYPE354a04;

2440: &\\          \vspace*{-1pt}

2441: %

2442: BEGIN                                       &\\\NM

2443: \Z  str\_file := Open("target.dat");        &\\\NM

2444: \Z  ls := 0;                                &\\\NM

2445: \Z  LOOP                                    &\\\NM

2446: \Z\Z  str[ls] := ReadChar(str\_file);       &\\\NM

2447: \Z\Z  IF (str[ls]='\#') THEN                &$(*str[ls]='\!\!\#'*)$\\\NM

2448: \Z\Z\Z  EXIT                                &\\\NM

2449: \Z\Z  ELSE                                  &$(*str[ls]\neq'\!\!\#'*)$\\\NM

2450: \Z\Z\Z  INC(ls)                             &\\\NM

2451: \Z\Z  END                                   &\\\NM

2452: \Z  END;                                    &$(*str[ls]='\!\!\#'*)$\\\NM

2453: \Z  s := 0;                                 &\\\NM

2454: \Z  \_cfg\_counter := 0;                    &$(*s=\_cfg\_counter=0*)$\\

2455: \end{tabular}

2456:

2457: %\hspace*{-1cm}

2458: \begin{tabular}{ll}                         \NM

2459: \Z  LOOP                                    &\\\NM

2460: \Z\Z    CASE \_cfg\_counter OF              &\\\NM

2461: \Z\Z    $|$ 0 :                             &$(*\_cfg\_counter=0*)$\\\NM

2462: \Z\Z\Z    IF ((s+0)$\geq$ls) THEN           &$(*s\geq ls*)$\\\NM

2463: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);        &\\\NM

2464: \Z\Z\Z\Z    EXIT                            &\\\NM

2465: \Z\Z\Z    END;                              &$(*0<ls,s<ls*)$\\\NM

2466: \Z\Z\Z    IF (str[(s+0)]='a') THEN          &$(*str[s]='\!\!a',0<ls,s<ls*)$\\\NM

2467: \Z\Z\Z\Z    \_cfg\_counter := 1             &\\\NM

2468: \Z\Z\Z    ELSE                              &$(*0<ls,s<ls,str[0]\neq'\!\!a'*)$\\\NM

2469: \Z\Z\Z\Z    INC(s);                         &$(*\_cfg\_counter=0*)$\\\NM

2470: \Z\Z\Z\Z    \underline{\_cfg\_counter := 0} &\\\NM

2471: \Z\Z\Z    END                               &$(*0<ls,s<ls*)$\\\NM

2472: \Z\Z    $|$ 1 :                             &$(*\_cfg\_counter=1*)$\\\NM

2473: \Z\Z\Z    IF ((s+1)$\geq$ls) THEN           &$(*s+1\geq ls*)$\\\NM

2474: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);        &\\\NM

2475: \Z\Z\Z\Z    EXIT                            &\\\NM

2476: \Z\Z\Z    END;                              &$(*s+1<ls*)$\\\NM

2477: \Z\Z\Z    IF (str[(s+1)]='b') THEN          &$(*str[s+1]='\!\!b'*)$\\\NM

2478: \Z\Z\Z\Z    \_cfg\_counter := 2             &\\\NM

2479: \Z\Z\Z    ELSE                              &$(*str[s+1]\neq'\!\!b'*)$\\\NM

2480: \Z\Z\Z\Z    INC(s);                         &$(*str[s]\neq'\!\!b',s<ls*)$\\\NM

2481: \Z\Z\Z\Z    \_cfg\_counter := 0             &\\\NM

2482: \Z\Z\Z    END                               &\\\NM

2483: \Z\Z    $|$ 2 :                           &$(*\_cfg\_counter=2*)$\\\NM

2484: \Z\Z\Z    IF ((s+2)$\geq$ls) THEN         &$(*s+2\geq ls*)$\\\NM

2485: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);      &\\\NM

2486: \Z\Z\Z\Z    EXIT                          &\\\NM

2487: \Z\Z\Z    END;                            &$(*s+2<ls*)$\\\NM

2488: \Z\Z\Z    IF (str[(s+2)]='a') THEN        &$(*str[s+2]='\!\!a'*)$\\\NM

2489: \Z\Z\Z\Z    \_cfg\_counter := 3           &\\\NM

2490: \Z\Z\Z    ELSE                            &$(*str[s+2]\neq'\!\!a'*)$\\\NM

2491: \Z\Z\Z\Z    INC(s);                       &$(*str[s+1]\neq'\!\!a',s+1<ls*)$\\\NM

2492: \Z\Z\Z\Z    \_cfg\_counter := 4           &\\\NM

2493: \Z\Z\Z    END                             &\\\NM

2494: \Z\Z    $|$ 3 :                           &$(*\_cfg\_counter=3*)$\\\NM

2495: \Z\Z\Z    IF ((s+3)$\geq$ls) THEN         &$(*s+3\geq ls*)$\\\NM

2496: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);      &\\\NM

2497: \Z\Z\Z\Z    EXIT                          &\\\NM

2498: \Z\Z\Z    END;                            &$(*s+3<ls*)$\\\NM

2499: \Z\Z\Z    IF (str[(s+3)]='b') THEN        &$(*str[s+3]='\!\!b'*)$\\\NM

2500: \Z\Z\Z\Z    IF ((s+4)$\geq$ls) THEN       &$(*s+4\geq ls,s+3<ls*)$\\\NM

2501: \Z\Z\Z\Z\Z    WriteInt(stdout,(-1),0);    &\\\NM

2502: \Z\Z\Z\Z\Z    EXIT                        &\\\NM

2503: \Z\Z\Z\Z    END;                          &$(*s+4<ls*)$\\\NM

2504: \Z\Z\Z\Z    IF (str[(s+4)]='b') THEN      &$(*str[s+3]=str[s+4]='\!\!b'*)$\\\NM

2505: \Z\Z\Z\Z\Z    WriteInt(stdout,s,0);       &\\\NM

2506: \Z\Z\Z\Z\Z    EXIT                        &\\\NM

2507: \Z\Z\Z\Z    ELSE                          &$(*str[s+3]='\!\!b',str[s+4]\neq'\!\!b',s+4<ls*)$\\\NM

2508: \Z\Z\Z\Z\Z    INC(s);                     &$(*str[s+2]='\!\!b',str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM

2509: \Z\Z\Z\Z\Z    \underline{IF ((s+0)$\geq$ls) THEN}     &$(*s\geq ls,s+3<ls*)$\\\NM

2510: \Z\Z\Z\Z\Z\Z    \underline{WriteInt(stdout,(-1),0);}  &\\\NM

2511: \Z\Z\Z\Z\Z\Z    \underline{EXIT}          &\\\NM

2512: \Z\Z\Z\Z\Z    \underline{END;}            &$(*str[s+2]='\!\!b',str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM

2513: \Z\Z\Z\Z\Z    IF (str[(s+0)]='a') THEN    &$(*str[s]='\!\!a',str[s+2]='\!\!b',str[s+3]\neq'\!\!b',$\\

2514:                                           &$\opn  s+3<ls*)$\\\NM

2515: \Z\Z\Z\Z\Z\Z    \underline{IF ((s+1)$\geq$ls) THEN}   &$(*s+1\geq ls,s+3<ls*)$\\\NM

2516: \Z\Z\Z\Z\Z\Z\Z    \underline{WriteInt(stdout,(-1),0);}&\\\NM

2517: \Z\Z\Z\Z\Z\Z\Z    \underline{EXIT}        &\\\NM

2518: \Z\Z\Z\Z\Z\Z    \underline{END;}          &$(*\mbox{the same as at line 67}*)$\\

2519: \end{tabular}

2520:

2521: %\hspace*{-1cm}

2522: \begin{tabular}{ll}                        \NM

2523: \Z\Z\Z\Z\Z\Z    IF (str[(s+1)]='b') THEN  &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]='\!\!b',$\\\NM

2524:                                           &$\opn  str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM

2525: \Z\Z\Z\Z\Z\Z\Z    \underline{IF ((s+2)$\geq$ls) THEN} &$(*s+2\geq ls,s+3<ls*)$\\\NM

2526: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{WriteInt(stdout,(-1),0);}&\\\NM

2527: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{EXIT}      &\\\NM

2528: \Z\Z\Z\Z\Z\Z\Z    \underline{END;}        &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]='\!\!b',$\\

2529:                                           &$\opn  str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM

2530: \Z\Z\Z\Z\Z\Z\Z    \underline{IF (str[(s+2)]='a') THEN}&$(*\mbox{inaccessible point}*)$\\\NM

2531: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{\_cfg\_counter := 3}   &\\\NM

2532: \Z\Z\Z\Z\Z\Z\Z    \underline{ELSE}        &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]='\!\!b',$\\

2533:                                           &$\opn  str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM

2534: \Z\Z\Z\Z\Z\Z\Z\Z    INC(s);               &$(*str[s-1]='\!\!a',str[s]='\!\!b',str[s+1]='\!\!b',$\\

2535:                                           &$\opn  str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2536: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{IF ((s+0)$\geq$ls) THEN}&$(*s\geq ls,s+2<ls*)$\\\NM

2537: \Z\Z\Z\Z\Z\Z\Z\Z\Z    \underline{WriteInt(stdout,(-1),0);}&\\\NM

2538: \Z\Z\Z\Z\Z\Z\Z\Z\Z    \underline{EXIT}    &\\\NM

2539: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{END;}      &$(*str[s-1]='\!\!a',str[s]='\!\!b',str[s+1]='\!\!b',$\\

2540:                                           &$\opn  str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2541: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{IF (str[(s+0)]='a') THEN}&$(*\mbox{inaccessible point}*)$\\\NM

2542: \Z\Z\Z\Z\Z\Z\Z\Z\Z    \underline{\_cfg\_counter:=14}  &\\\NM

2543: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{ELSE}      &$(*str[s-1]='\!\!a',str[s]='\!\!b',str[s+1]='\!\!b',$\\

2544:                                           &$\opn  str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2545: \Z\Z\Z\Z\Z\Z\Z\Z\Z    INC(s);             &$(*str[s-2]='\!\!a',str[s-1]='\!\!b',str[s]='\!\!b',$\\

2546:                                           &$\opn  str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM

2547: \Z\Z\Z\Z\Z\Z\Z\Z\Z    \_cfg\_counter:=4   &\\\NM

2548: \Z\Z\Z\Z\Z\Z\Z\Z    \underline{END}       &\\\NM

2549: \Z\Z\Z\Z\Z\Z\Z    \underline{END}         &$(*str[s-2]='\!\!a',str[s-1]='\!\!b',str[s]='\!\!b',$\\

2550:                                           &$\opn  str[s+1]\neq'\!\!b',s+1<ls*)$\\

2551: \NM

2552: \Z\Z\Z\Z\Z\Z    ELSE                   &$(*str[s]='\!\!a',str[s+1]\neq'\!\!b',str[s+2]='\!\!b',$\\

2553:                                        &$\opn  str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM

2554: \Z\Z\Z\Z\Z\Z\Z    INC(s);              &$(*str[s-1]='\!\!a',str[s]\neq'\!\!b',str[s+1]='\!\!b',$\\

2555:                                        &$\opn  str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2556: \Z\Z\Z\Z\Z\Z\Z    \_cfg\_counter := 12 &\\\NM

2557: \Z\Z\Z\Z\Z\Z    END                    &\\\NM

2558: \Z\Z\Z\Z\Z    ELSE                     &$(*str[s]='\!\!a',str[s+2]='\!\!b',str[s+3]\neq'\!\!b',$\\

2559:                                        &$\opn  s+3<ls*)$\\\NM

2560: \Z\Z\Z\Z\Z\Z    INC(s);                &$(*str[s-1]='\!\!a',str[s+1]='\!\!b',str[s+2]\neq'\!\!b',$\\

2561:                                        &$\opn  s+2<ls*)$\\\NM

2562: \Z\Z\Z\Z\Z\Z    \_cfg\_counter := 12   &\\\NM

2563: \Z\Z\Z\Z\Z    END                      &\\\NM

2564: \Z\Z\Z\Z    END                        &\\\NM

2565: \Z\Z\Z    ELSE                         &$(*str[s+3]\neq'\!\!b',s+3<ls*)$\\\NM

2566: \Z\Z\Z\Z    INC(s);                    &$(*str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2567: \Z\Z\Z\Z    \underline{IF ((s+0)$\geq$ls) THEN}    &$(*s\geq ls,s+2<ls*)$\\\NM

2568: \Z\Z\Z\Z\Z    \underline{WriteInt(stdout,(-1),0);} &\\\NM

2569: \Z\Z\Z\Z\Z    \underline{EXIT}         &\\\NM

2570: \Z\Z\Z\Z    \underline{END;}           &$(*str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2571: \Z\Z\Z\Z    IF (str[(s+0)]='a') THEN   &$(*str[s]='\!\!a',str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2572: \Z\Z\Z\Z\Z    \underline{IF ((s+1)$\geq$ls) THEN}  &$(*s+1\geq ls,s+2<ls*)$\\\NM

2573: \Z\Z\Z\Z\Z\Z    \underline{WriteInt(stdout,(-1),0);}&\\\NM

2574: \Z\Z\Z\Z\Z\Z    \underline{EXIT}       &\\\NM

2575: \Z\Z\Z\Z\Z    \underline{END;}         &$(*str[s]='\!\!a',str[s+2]\neq'\!\!b',s+2<ls*)$\\

2576: \end{tabular}

2577:

2578: %\hspace*{-1cm}

2579: \begin{tabular}{ll}                    \NM

2580: \Z\Z\Z\Z\Z    IF (str[(s+1)]='b') THEN &$(*str[s]='\!\!a',str[s+1]='\!\!b',str[s+2]\neq'\!\!b',$\\

2581:                                        &$\opn  s+2<ls*)$\\\NM

2582: \Z\Z\Z\Z\Z\Z    \_cfg\_counter := 2    &\\\NM

2583: \Z\Z\Z\Z\Z    ELSE                     &$(*str[s]='\!\!a',str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2584: \Z\Z\Z\Z\Z\Z    INC(s);                &$(*str[s-1]='\!\!a',str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM

2585: \Z\Z\Z\Z\Z\Z    \_cfg\_counter := 10   &\\\NM

2586: \Z\Z\Z\Z\Z    END                      &\\\NM

2587: \Z\Z\Z\Z    ELSE                       &$(*str[s+2]\neq'\!\!b',s+2<ls*)$\\\NM

2588: \Z\Z\Z\Z\Z    INC(s);                  &$(*str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM

2589: \Z\Z\Z\Z\Z    \_cfg\_counter := 10     &\\\NM

2590: \Z\Z\Z\Z    END                        &\\\NM

2591: \Z\Z\Z    END                          &\\\NM

2592: \Z\Z    $|$ 4 :                        &$(*\_cfg\_counter=4*)$\\\NM

2593: \Z\Z\Z    IF ((s+0)$\geq$ls) THEN      &$(*s\geq ls*)$\\\NM

2594: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);   &\\\NM

2595: \Z\Z\Z\Z    EXIT                       &\\\NM

2596: \Z\Z\Z    END;                         &$(*s<ls*)$\\\NM

2597: \Z\Z\Z    IF (str[(s+0)]='a') THEN     &$(*str[s]='\!\!a',s<ls*)$\\\NM

2598: \Z\Z\Z\Z    \_cfg\_counter := 1        &\\\NM

2599: \Z\Z\Z    ELSE                         &$(*str[s]\neq'\!\!a',s<ls*)$\\\NM

2600: \Z\Z\Z\Z    INC(s);                    &$(*str[s-1]\neq'\!\!a',s\leq ls*)$\\\NM

2601: \Z\Z\Z\Z    \_cfg\_counter := 0        &\\\NM

2602: \Z\Z\Z    END                          &\\\NM

2603: \Z\Z    $|$ 10:                        &$(*\_cfg\_counter=10*)$\\\NM

2604: \Z\Z\Z    IF ((s+0)$\geq$ls) THEN      &$(*s\geq ls*)$\\\NM

2605: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);   &\\\NM

2606: \Z\Z\Z\Z    EXIT                       &\\\NM

2607: \Z\Z\Z    END;                         &$(*s<ls*)$\\\NM

2608: \Z\Z\Z    IF (str[(s+0)]='a') THEN     &$(*str[s]='\!\!a',s<ls*)$\\\NM

2609: \Z\Z\Z\Z    \_cfg\_counter := 1        &\\\NM

2610: \Z\Z\Z    ELSE                         &$(*str[s]\neq'\!\!a',s<ls*)$\\\NM

2611: \Z\Z\Z\Z    INC(s);                    &$(*str[s-1]\neq'\!\!a',s\leq ls*)$\\\NM

2612: \Z\Z\Z\Z    \_cfg\_counter := 0        &\\\NM

2613: \Z\Z\Z    END                          &\\

2614: \NM

2615: \Z\Z $|$ 12:                           &$(*\_cfg\_counter=12*)$\\\NM

2616: \Z\Z\Z    IF ((s+0)$\geq$ls) THEN      &$(*s\geq ls*)$\\\NM

2617: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);   &\\\NM

2618: \Z\Z\Z\Z    EXIT                       &\\\NM

2619: \Z\Z\Z    END;                         &$(*s<ls*)$\\\NM

2620: \Z\Z\Z    IF (str[(s+0)]='a') THEN     &$(*str[s]='\!\!a',s<ls*)$\\\NM

2621: \Z\Z\Z\Z    \_cfg\_counter := 14       &\\\NM

2622: \Z\Z\Z    ELSE                         &$(*str[s]\neq'\!\!a',s<ls*)$\\\NM

2623: \Z\Z\Z\Z    INC(s);                    &$(*str[s-1]\neq'\!\!a',s\leq ls*)$\\\NM

2624: \Z\Z\Z\Z    \_cfg\_counter := 4        &\\\NM

2625: \Z\Z\Z    END                          &\\\NM

2626: \Z\Z $|$ 14:                           &$(*\_cfg\_counter=14*)$\\\NM

2627: \Z\Z\Z    IF ((s+1)$\geq$ls) THEN      &$(*s+1\geq ls*)$\\\NM

2628: \Z\Z\Z\Z    WriteInt(stdout,(-1),0);   &\\\NM

2629: \Z\Z\Z\Z    EXIT                       &\\\NM

2630: \Z\Z\Z    END;                         &$(*s+1<ls*)$\\\NM

2631: \Z\Z\Z    IF (str[(s+1)]='b') THEN     &$(*str[s+1]='\!\!b',s+1<ls*)$\\\NM

2632: \Z\Z\Z\Z    \_cfg\_counter := 2        &\\\NM

2633: \Z\Z\Z    ELSE                         &$(*str[s+1]\neq'\!\!b',s+1<ls*)$\\\NM

2634: \Z\Z\Z\Z    INC(s);                    &$(*str[s]\neq'\!\!b',s<ls*)$\\\NM

2635: \Z\Z\Z\Z    \_cfg\_counter := 4        &\\\NM

2636: \Z\Z\Z    END                          &\\\NM

2637: \Z\Z  END                              &\\

2638: \Z  END                                &\\

2639: END Match.                             &\\

2640: \end{tabular}

2641:

2642:

2643:

2644:

2645:

2646:

2647: \end{document}

2648: