cs0404006/cw.tex
1: \documentclass[leqno,tbtags]{sig-alternate}
2: \listfiles
3: \setcounter{errorcontextlines}{200}
4: \usepackage[logonly]{trace}
5: %\flushbottom
6: \usepackage{fixltx2e}
7: \usepackage{amsmath1}
8: \usepackage{amstext}
9: \usepackage{amssymb}
10: \usepackage{mathdots}
11: 
12: %%% \usepackage[cjkbg5]{ucs}
13: %%% \usepackage[utf8]{inputenc}
14: %%% \usepackage[C00,OT1]{fontenc}
15: %%% \DeclareFontSubstitution{C00}{ming}{m}{n}
16: %%% \makeatletter
17: %%% \uc@dclc{65292}{cjkbg5}{\u@cjk@Bgv0101}%
18: %%% \makeatother
19: 
20: \usepackage{ifpdf}
21: \ifpdf
22:     \setlength{\pdfpagewidth}{8.5in}
23:     \setlength{\pdfpageheight}{11in}
24:     \usepackage[pdf]{qtree}
25:     \usepackage[pdftex,matrix,arrow,curve,frame]{xy}
26: \else
27:     \usepackage{qtree}
28:     \usepackage[ps,dvips,matrix,arrow,curve,frame]{xy}
29: \fi
30: \entrymodifiers={+!!<0pt,\the\fontdimen22\textfont2>}
31: \newcommand{\strutentries}{\everyentry={\strut}}
32: \makeatletter
33: \newcommand{\shrink}[1][\jot]{\relax \setlength\@tempdima{#1}%
34:   \ifmmode \expandafter\mathpalette\expandafter\math@shrink
35:   \else \expandafter\make@shrink \fi}
36: \newcommand{\make@shrink}[1]{\setbox\z@\hbox{\kern-\@tempdima\color@begingroup#1\color@endgroup\kern-\@tempdima}\fin@shrink}
37: \newcommand{\math@shrink}[2]{\setbox\z@\hbox{\kern-\@tempdima$\m@th#1{#2}$\kern-\@tempdima}\fin@shrink}
38: \newcommand{\fin@shrink}{%
39:   \@tempdimb\ht\z@ \advance\@tempdimb-\@tempdima \ht\z@\@tempdimb
40:   \@tempdimb\dp\z@ \advance\@tempdimb-\@tempdima \dp\z@\@tempdimb
41:   \box\z@}
42: \makeatother
43: 
44: \usepackage{hyphenat}
45: \usepackage{exemplar}
46: \usepackage{array1}
47: \usepackage{qtree1}
48: \usepackage{url}
49: \usepackage{varioref}
50: \usepackage{refrange}
51: \usepackage{calc}
52: \usepackage{prooftree}
53: \usepackage{mdwlist}
54: 
55: \usepackage{flushend}
56: %\atColsBreak={\vspace{\bibsep}}
57: %\showcolsendrule
58: 
59: \setlength{\parskip}{0pt}
60: %\setlength{\maxdepth}{\depthof{\strut}}
61: 
62: \renewcommand{\floatpagefraction}{.9}
63: \renewcommand{\dblfloatpagefraction}{.9}
64: 
65: \makeatletter \providecommand{\newblock}{\@undefined} \makeatother
66: \usepackage[longnamesfirst,sort&compress]{natbib1}
67: \bibsep=\smallskipamount
68: \renewcommand\bibsection{\section{\MakeUppercase{\refname}}}
69: 
70: \usepackage{soul}
71: 
72: \usepackage{semantics}
73: \renewcommand{\emp}{\emph}
74: \renewcommand{\denotation}[1]{\text{\sffamily#1}}
75: \DeclareDenotation{\Student}{student}
76: \DeclareDenotation{\Diligent}{diligent}
77: \DeclareDenotation{\Passed}{passed}
78: \DeclareDenotation{\Course}{course}
79: \DeclareDenotation{\Alice}{alice}
80: \DeclareDenotation{\CSnlp}{cs187}
81: \DeclareDenotation{\Liked}{liked}
82: \DeclareDenotation{\Left}{left}
83: \DeclareDenotation{\Representative}{repr}
84: \DeclareDenotation{\Of}{of}
85: \DeclareDenotation{\Company}{company}
86: \DeclareDenotation{\Animate}{animate}
87: \DeclareDenotation{\Thing}{Thing}
88: \DeclareDenotation{\Bool}{Bool}
89: \DeclareDenotation{\BoolNeg}{BoolNeg}
90: \DeclareDenotation{\BoolPos}{BoolPos}
91: 
92: % A new math version for code samples.
93: %
94: \DeclareMathVersion{code}
95: \SetMathAlphabet{\mathnormal}{code}{OT1}{cmtt}{m}{n}
96: \SetMathAlphabet{\mathbf}{code}{OT1}{cmtt}{b}{n}
97: \SetMathAlphabet{\mathsf}{code}{OT1}{cmtt}{m}{n}
98: \SetMathAlphabet{\mathit}{code}{OT1}{cmtt}{m}{it}
99: \SetSymbolFont{operators}{code}{OT1}{cmtt}{m}{n}
100: \SetSymbolFont{letters}{code}{OT1}{cmtt}{m}{n}  % not OML... oh well
101: \newcommand{\code}[1]{\text{\mathversion{code}$#1$}}
102: \newcommand{\prose}[1]{\text{\mathversion{normal}#1}}
103: 
104: % Programming language keywords.
105: %
106: \newcommand{\keyword}[1]{\textsf{#1}}
107: \newcommand{\preword}[1]{{}\nonscript\mskip-\thickmuskip\mathrel{\keyword{#1}}}
108: \newcommand{\infword}[1]{\mathrel{\keyword{#1}}}
109: \newcommand{\Random    }{\keyword{random}}
110: \newcommand{\Semi      }{\preword{;}}
111: \newcommand{\Try       }{\preword{try}}
112: \newcommand{\Catch     }{\infword{catch}}
113: \newcommand{\Goto      }{\preword{goto}}
114: \newcommand{\In        }{\infword{in}}
115: \newcommand{\Let       }{\preword{let}}
116: \newcommand{\Print     }{\preword{print}}
117: \newcommand{\Read      }{\keyword{read}}
118: \newcommand{\Throw     }{\keyword{throw}}
119: \newcommand{\With      }{\infword{with}}
120: \newcommand{\Comma     }{\preword{,}}
121: \newcommand{\Colon     }{.\,}
122: \newcommand{\assignFrom}{\infword{:=}}
123: \newcommand{\Load      }{\keyword{load}}
124: \newcommand{\Store     }{\keyword{store}}
125: \newcommand{\Abort     }{\keyword{abort}}
126: \newcommand{\Shift     }{\xi}
127: \newcommand{\Lam       }{\lambda}
128: \newcommand{\Callcc    }{\keyword{call/cc}}
129: 
130: % Categorial grammar for computer scientists.
131: %
132: \newcommand{\fwd}{\mathbin\prime}
133: \newcommand{\bwd}{\mathbin\backprime}
134: \newcommand{\Fwd}{\mathbin{\acute\rightarrow}}
135: \newcommand{\Bwd}{\mathbin{\grave\rightarrow}}
136: \makeatletter
137: \renewcommand{\fun}[2][]{\mathopen{\smash[t]{\lambda
138:       ^{\text{\setbox\z@=\hbox{$\!#1\!$}%
139:               \ifdim\wd\z@>\z@\unhbox\z@\else\hb@xt@\z@{\hss$#1$\hss}\fi}}%
140:     }\mathord{#2}.\,}}
141: \makeatother
142: 
143: % Miscellaneous macros.
144: %
145: \newcommand{\evalsto}{\mathrel\vartriangleright}
146: \newcommand{\toD}{\mathbin{\smash[t]{\stackrel{\Delta}%
147:     {\text{\raisebox{0pt}[.8\height-.2\depth][.8\depth-.2\height]%
148:                          {$\rightarrow$}}}}}}
149: \newcommand{\toMaybeD}{\mathbin{\smash[t]{\stackrel{\smash[b](\Delta\smash[b])}%
150:     {\text{\raisebox{0pt}[.8\height-.2\depth][.8\depth-.2\height]%
151:                          {$\rightarrow$}}}}}}
152: 
153: % Fix bugs in sig-alternate.cls
154: \makeatletter
155: \def\@sect#1#2#3#4#5#6[#7]#8{%
156:     \ifnum #2>\c@secnumdepth
157:         \let\@svsec\@empty
158:     \else
159:         \refstepcounter{#1}%
160:         \edef\@svsec{%
161:             \begingroup
162:                 %\ifnum#2>2 \noexpand\rm \fi % changed to next 29 July 2002 gkmt
163: 			\ifnum#2>2 \noexpand#6 \fi
164:                 \csname the#1\endcsname
165:             \endgroup
166:             \ifnum #2=1\relax .\fi
167:             \hskip 1em
168:         }%
169:     \fi
170:     \@tempskipa #5\relax
171:     \ifdim \@tempskipa>\z@
172:         \begingroup
173:             #6\relax
174:             \@hangfrom{\hskip #3\relax\@svsec}%
175:             \begingroup
176:                 \interlinepenalty \@M
177:                 \if@uchead
178:                     \uppercase{#8}%
179:                 \else
180:                     #8%
181:                 \fi
182:                 \par
183:             \endgroup
184:         \endgroup
185:         \csname #1mark\endcsname{#7}%
186:         %\vskip -12pt  %gkmt, 11 aug 99 and GM July 2000 (was -14) - numbered section head spacing
187: \addcontentsline{toc}{#1}{%
188:             \ifnum #2>\c@secnumdepth \else
189:                 \protect\numberline{\csname the#1\endcsname}%
190:             \fi
191:             #7%
192:         }%
193:     \else
194:         \def\@svsechd{%
195:             #6%
196:             \hskip #3\relax
197:             \@svsec
198:             \if@uchead
199:                 \uppercase{#8}%
200:             \else
201:                 #8%
202:             \fi
203:             \csname #1mark\endcsname{#7}%
204:             \addcontentsline{toc}{#1}{%
205:                 \ifnum #2>\c@secnumdepth \else
206:                     \protect\numberline{\csname the#1\endcsname}%
207:                 \fi
208:                 #7%
209:             }%
210:         }%
211:     \fi
212:     \@xsect{#5}%\hskip 1pt
213:     \par
214: }
215: \def\section{%
216:     \@startsection{section}{1}{\z@}{10\p@ \@plus 4\p@ \@minus 2\p@}% GM
217:     {4\p@}{\baselineskip 14pt\secfnt\@ucheadtrue}%
218: }
219: \def\subsection{%
220:     \@startsection{subsection}{2}{\z@}{8\p@ \@plus 2\p@ \@minus \p@}
221:     {4\p@}{\secfnt}%
222: }
223: \def\subsubsection{%
224:     \@startsection{subsubsection}{3}{\z@}{8\p@ \@plus 2\p@ \@minus \p@}%
225:     {4\p@}{\subsecfnt}%
226: }
227: \let\@@category\@category
228: \def\@category{\noindent\@@category}
229: \makeatother
230: 
231: \begin{document}
232: 
233: \conferenceinfo{Continuation Workshop}{2004 Venice, Italy}
234: \title{Delimited continuations in natural language}
235: \subtitle{Quantification and polarity sensitivity}
236: \numberofauthors{1}
237: \author{
238: \alignauthor Chung-chieh Shan\\[\jot]
239:     \affaddr{Harvard University}\\
240:     \affaddr{33 Oxford Street}\\
241:     \affaddr{Cambridge, MA 02138 USA}\\
242:     \email{ccshan@post.harvard.edu}}
243: \maketitle
244: 
245: \begin{abstract}
246: \noindent
247: Making a linguistic theory is like making a programming language: one typically
248: devises a type system to delineate the acceptable utterances and a denotational
249: semantics to explain observations on their behavior.  Via this connection, the
250: programming language concept of delimited continuations can help analyze
251: natural language phenomena such as quantification and polarity sensitivity.
252: Using a logical metalanguage whose syntax includes control operators and whose
253: semantics involves evaluation order, these analyses can be expressed in direct
254: style rather than continuation\hyp passing style, and these phenomena can be
255: thought of as computational side effects.
256: \end{abstract}
257: 
258: \category{D.3.3}{Programming languages}{Language constructs and features}[control structures]
259: \category{J.5}{Linguistics}{}
260: 
261: \terms\noindent{Languages, Theory}
262: 
263: \keywords\noindent{Delimited continuations, control effects, natural language
264: semantics, quantification, polarity sensitivity}
265: 
266: \section{Introduction}
267: \label{s:introduction}
268: 
269: This paper is about computational linguistics, in the sense of applying
270: insights from computer science to linguistics.  Linguistics strives to
271: scientifically explain empirical observations of natural language.
272: Semantics, in particular, is concerned with phenomena such as the
273: following.  In~\eqref{e:entailment} below, some sentences to the left
274: \emp{entail} their counterparts to the right, but others do not.
275: \example{\label{e:entailment}%
276:     \subexamples[l@{\;}C@{\;}X]{
277:         Every student passed
278:         & \vdash
279:         & Every diligent student passed \\
280:         No student passed
281:         & \vdash
282:         & No diligent student passed \\
283:         A student passed
284:         & \nvdash
285:         & A diligent student passed \\
286:         Most students passed
287:         & \nvdash
288:         & Most diligent students passed}}
289: The sentence in~\eqref{e:ambiguity} is \emp{ambiguous} between at least
290: two readings.  On one reading, the speaker must decline to run any spot
291: that fails to substantiate any claims whatsoever.  On another reading, there
292: exist certain claims (anti-war ones, say) such that the speaker must decline to
293: run any spot that fails to substantiate them.
294: \example{\label{e:ambiguity}%
295:     We must decline to run any spot that fails to substantiate certain claims.%
296:     \footnotemark}
297: \footnotetext{%
298:     This sentence is part of a statement made by the cable television
299:     company Comcast after its CNN channel rejected an anti-war commercial hours
300:     before it was scheduled to air on January 28, 2003.}%
301: Finally, among the four sentences in~\eqref{e:acceptability},
302: only~\eqref{e:acceptability-acceptable} is \emp{acceptable}.  That is, only
303: it can be used in idealized conversation.  The unacceptability of the rest
304: is notated with asterisks.
305: \example{\label{e:acceptability}%
306:     \subexamples{
307:     \subexample{\label{e:acceptability-acceptable}%
308:                    No    student  liked any course.}
309:     \subexample{\<*Every student  liked any course.}
310:     \subexample{\<*A     student  liked any course.}
311:     \subexample{\<*Most  students liked any course.}}}
312: 
313: \begin{figure*}[t]
314: \smallskip
315: \centerline{\shrink{$\xymatrix @C=0pt @R=3ex{
316:     \strut\text{Every student passed} \ar[d]
317:     &\vdash&
318:     \strut\text{Every diligent student passed} \ar[d]
319: \\
320:     \strut\Forall{x} \Student(x) \limplies \Passed(x) \ar[d]
321:     &\vdash&
322:     \strut\Forall{x} \bigl( \Student(x) \land \Diligent(x) \bigr) \limplies \Passed(x) \ar[d]
323: \\
324:     \strut\langle\text{some truth condition on models}\rangle
325:     &\vdash&
326:     \strut\langle\text{some other truth condition on models}\rangle
327:     }$}}
328: \caption{The translation\slash denotation approach to natural language semantics}
329: \label{fig:translation}
330: \end{figure*}
331: %stopzone
332: 
333: %%% \begingroup
334: %%% \def\\{\hspace{0pt}\ignorespaces}
335: The linguistic entailments and non\hyp entailments in~\eqref{e:entailment} are
336: facts about English, in that only a speaker of English can make these judgments.
337: Nevertheless, they presumably have to do with corresponding logical entailments
338: and non\hyp entailments: both the English speaker who judges that \phrase{Every
339: student passed} entails \phrase{Every diligent student passed} and the Mandarin
340: speaker who judges that \phrase{Meige xuesheng dou jige-le} entails
341: \phrase{Meige yonggong-de xuesheng dou jige-le} rely on knowing that,
342: if every student passed, then every diligent student passed.
343: Thus the typical linguistic theory specifies a semantics for natural
344: language by translating declarative sentences into logical statements
345: with truth conditions.  The linguistic entailments
346: in~\eqref{e:entailment} hold, goes the theory, because the
347: meanings---truth conditions---of the two sentences are such that any model
348: that verifies the former also verifies the latter.
349: Much work in natural language semantics aims in this way, as depicted in
350: Figure~\vref{fig:translation}, to explain the horizontal by positing the
351: vertical.  This approach is
352: reminiscent of programming language research where an ill-understood
353: language (perhaps one with a complicating feature like exceptions) is
354: studied by translation into a simpler language (without exceptions) that is
355: better understood.
356: %%% \endgroup
357: 
358: The translation target posited in natural language semantics is often some
359: combination of the $\lambda$-calculus and predicate logic.  For example, the
360: verb \phrase{passed} might be translated as $\fun{x}\Passed(x)$.  This paper
361: argues by example that the translation target should be a logical metalanguage
362: with first-class delimited continuations.  The examples are two natural
363: language phenomena: \emp{quantification} by words like \phrase{every} and
364: \phrase{most} in~\eqref{e:entailment}, and \emp{polarity sensitivity} on the
365: part of words like \phrase{any} in~\eqref{e:acceptability}.
366: 
367: Quantification was first analyzed explicitly using continuations by
368: \citet{barker-continuations}.  Building on that insight, this paper makes the
369: following two contributions.  First, I analyze natural language in direct style
370: rather than in continuation\hyp passing style.  In other words, the logical
371: metalanguage used here is one that includes control operators for delimited
372: continuations, rather than a pure $\lambda$-calculus in which denotations need
373: to handle continuations explicitly.  Natural language is thus endowed with an
374: operational semantics from computer science that is richer than just
375: $\beta\eta$-reduction.
376: 
377: Second, I propose a new analysis of polarity sensitivity that improves upon
378: prior theories in explaining why \phrase{No student liked any course}
379: is acceptable but *\phrase{Any student liked no course} is not.  This
380: analysis crucially relies on the notion of evaluation order from
381: programming languages, thus elucidating the role of control effects
382: in natural language and supporting the broader claim that linguistic
383: phenomena can be fruitfully thought of as computational side effects.
384: 
385: The rest of this paper is organized as follows.  In~\S\ref{s:formalism},
386: I introduce a simple grammatical formalism.  In~\S\ref{s:quantification},
387: I describe the linguistic phenomenon of quantification and show a straw man
388: analysis that deals with some cases but not others.  I then introduce
389: a programming language with delimited continuations and use it to improve the straw
390: man analysis: quantification in non-subject position is
391: treated in~\S\ref{s:delimited-continuations}, and inverse scope is covered
392: in~\S\ref{s:hierarchy}.  In~\S\ref{s:polarity}, I turn to the linguistic
393: phenomenon of polarity sensitivity and show how a computationally motivated
394: notion of evaluation order improves upon previous analyses.
395: In~\S\ref{s:beyond}, I place these examples in a broader context and conclude.
396: 
397: \section{A grammatical formalism}
398: \label{s:formalism}
399: 
400: In this section, I introduce a simple grammatical formalism for use in the
401: rest of the paper.  It is a notational variant of categorial grammar (as
402: introduced by \citet[chapter~4]{carpenter-type-logical}, for instance).
403: 
404: The verb \phrase{like} usually requires an object to its right and
405: a subject to its left.
406: \example{\subexamples{
407: \subexample{\label{e:alice-liked-cs187}Alice liked CS187.}
408: \subexample{\label{e:alice-liked}\<*Alice liked.}
409: \subexample{\label{e:alice-liked-bob-cs187}\<*Alice liked Bob CS187.}}}
410: Intuitively, \phrase{like} is a function that takes two arguments, and the sentences
411: (\refrange{e:alice-liked}{e:alice-liked-bob-cs187}) are unacceptable
412: due to type mismatch.  We can model this formally by assigning types to the
413: denotations of \phrase{Alice}, \phrase{CS187}, and \phrase{liked}, which we
414: take to be atomic expressions.
415: \begin{alignat}{2}
416:     \denote{\text{Alice}} &= \Alice &&: \Thing \\
417:     \denote{\text{CS187}} &= \CSnlp &&: \Thing \\
418:     \denote{\text{liked}} &= \Liked &&: \Thing \toF \Thing \toF \Bool
419: \end{alignat}
420: Here $\Thing$ is the type of individual objects, and $\Bool$ is the type of
421: truth values or propositions.  Following (justifiable) standard practice in
422: linguistics, we let $\Liked$ take its object as the first argument and its
423: subject as the second argument.  For example,
424: in~\eqref{e:alice-liked-cs187}, the first argument to~$\Liked$ is $\CSnlp$,
425: and the second argument is~$\Alice$.
426: 
427: As \eqref{e:alice-liked-cs187} shows, there are two ways to combine
428: expressions.  A function can take its
429: argument either to its right (combining \phrase{liked} with \phrase{CS187})
430: or to its left (combining \phrase{Alice} with \phrase{liked CS187}).  We
431: denote these two cases with two infix operators: ``$\fwd$'' for forward
432: combination and ``$\bwd$'' for backward combination.  (The tick marks
433: depict the direction in which a function ``leans on'' an argument.)
434: \begin{alignat}{3}
435: \label{e:fwd-pre}
436:     f\fwd x &= f(x) &&: \beta &\qquad&\text{where $f:\alpha\toF\beta$,\enspace $x:\alpha$}\\
437: \label{e:bwd-pre}
438:     x\bwd f &= f(x) &&: \beta &\qquad&\text{where $f:\alpha\toF\beta$,\enspace $x:\alpha$}
439: \end{alignat}
440: We can now derive the sentence~\eqref{e:alice-liked-cs187}---that is, prove
441: it to have type~$\Bool$.  The derivation can be written as
442: a tree~\eqref{e:alc-tree} or a term~\eqref{e:alc-term}.
443: \begin{gather}
444:     \label{e:alc-tree}
445:     \leaf{Alice}
446:     \leaf{liked}
447:     \leaf{CS187}
448:     \emptybranch{2}
449:     \emptybranch{2}
450:     \qobitree
451: \\
452:     \label{e:alc-term}
453:     \denote{\text{Alice}} \bwd \left(\denote{\text{liked}} \fwd \denote{\text{CS187}}\right)
454:     = \Liked\;\CSnlp\;\Alice : \Bool
455: \end{gather}
456: By convention, the infix operators $\fwd$ and $\bwd$ associate to the
457: right, so parentheses such as those in~\eqref{e:alc-term}
458: above are optional.
459: 
460: Unfortunately, the system set up so far derives not only the acceptable
461: sentence~\eqref{e:alice-liked-cs187} but also the unacceptable
462: sentence~\eqref{e:alice-cs187-liked}, with the same meaning.
463: \example{\label{e:alice-cs187-liked}%
464:     \<*Alice CS187 liked.}
465: The reason the system derives~\eqref{e:alice-cs187-liked} is that the
466: direction of function application is unconstrained: in the derivation
467: below, \phrase{liked} takes its first (object) argument to the left, which
468: is usually disallowed in English.
469: \begin{gather}
470:     \label{e:acl-tree}
471:     \leaf{Alice}
472:     \leaf{CS187}
473:     \leaf{liked}
474:     \emptybranch{2}
475:     \emptybranch{2}
476:     \qobitree
477: \displaybreak[0]
478: \\
479:     \label{e:acl-term}
480:     \denote{\text{Alice}} \bwd \denote{\text{CS187}} \bwd \denote{\text{liked}}
481:     = \Liked\;\CSnlp\;\Alice : \Bool
482: \end{gather}
483: To rule out this derivation of~\eqref{e:alice-cs187-liked} in our type
484: system, we split the function type constructor ``$\toF$'' into two type
485: constructors ``$\Fwd$'' and~``$\Bwd$'', one for each direction of
486: application.  Using these new type constructors, we change the denotation
487: of \phrase{liked} to specify that its first argument is to its right and
488: its second argument is to its left.
489: \begin{equation}
490:     \denote{\text{liked}} = \Liked : \Thing \Fwd \Thing \Bwd \Bool
491: \end{equation}
492: We also revise the combination rules \eqref{e:fwd-pre}
493: and~\eqref{e:bwd-pre} to require different function type constructors.
494: \begin{alignat}{2}
495: \label{e:fwd}
496:     f\fwd x &: \beta &\qquad&\text{where $f:\alpha\Fwd\beta$,\enspace $x:\alpha$}\\
497: \label{e:bwd}
498:     x\bwd f &: \beta &\qquad&\text{where $f:\alpha\Bwd\beta$,\enspace $x:\alpha$}
499: \end{alignat}
500: The system now rejects~\eqref{e:alice-cs187-liked} while continuing to
501: accept~\eqref{e:alice-liked-cs187}, as desired.
502: 
503: \section{Quantification}
504: \label{s:quantification}
505: 
506: The linguistic phenomenon of quantification is illustrated by the following
507: sentences.
508: \example{
509: \subexamples{
510: \subexample{\label{e:eslc}Every student liked CS187.}
511: \subexample{\label{e:sslec}Some student liked every course.}
512: \subexample{\label{e:atbtm}Alice consulted Bob before most meetings.}}}
513: As with the previously encountered sentences, the natural language
514: semanticist wants to translate English into logical formulas that
515: account for entailment and other properties.  More precisely, the problem
516: is to posit translation rules that map these sentences thus.  For instance,
517: we would like to map~\eqref{e:eslc} to a formula like
518: \begin{equation}
519: \label{e:eslc-formula}
520:     \Forall{x} \Student(x) \limplies x\bwd\Liked\fwd\CSnlp : \Bool,
521: \end{equation}
522: where the constants
523: \begin{align}
524:     \forall  &:(\Thing\toF\Bool)\toF\Bool, &
525:     \limplies&:\Bool\toF\Bool\toF\Bool \hspace*{-.5em}
526: \end{align}
527: are drawn from the (higher-order) abstract syntax of predicate logic.  To this
528: end, what should the subject noun phrase \phrase{every student} denote?  Unlike
529: with \phrase{Alice}, there is nothing of type~$\Thing$ that the quantificational
530: noun phrase \phrase{every student} can denote and still allow the desired
531: translation~\eqref{e:eslc-formula} to be generated.  At the same time, we would
532: like to retain the denotation that we previously computed for the verb phrase
533: \phrase{liked CS187}, namely $\Liked\fwd\CSnlp$.  Taking
534: these considerations into account, one way to translate~\eqref{e:eslc}
535: to~\eqref{e:eslc-formula} is for the determiner \phrase{every} to denote
536: \begin{equation}
537: \label{e:every-term}
538: \begin{split}
539:     \denote{\text{every}}
540:     & = \fun[\fwd]{r} \fun[\fwd]{s} \Forall{x} r(x) \limplies x\bwd s \\
541:     & \eqish: (\Thing\toF\Bool)\Fwd(\Thing\Bwd\Bool)\Fwd\Bool.
542: \end{split}
543: \end{equation}
544: Here the \emp{restrictor}~$r$ and the \emp{scope}~$s$ are $\lambda$-bound
545: variables intended to receive, respectively, the denotations of the noun
546: \phrase{student} (of type $\Thing\toF\Bool$) and the verb phrase
547: \phrase{liked CS187} (of type $\Thing\Bwd\Bool$).  (More precisely, $r$
548: and~$s$ are $\lambda^{\!\fwd}$-bound variables; the tick mark again signifies
549: the direction of function application.)  In
550: a non-quantificational sentence like~\eqref{e:alice-liked-cs187}, the verb
551: phrase takes the subject as its argument; by contrast, in the
552: quantificational sentence~\eqref{e:eslc}, the subject takes the verb phrase
553: as its argument.
554: 
555: Extended with the lexical entry~\eqref{e:every-term} for \phrase{every},
556: and assuming that \phrase{student} denotes
557: \begin{equation}
558:     \denote{\text{student}} = \Student : \Thing\toF\Bool,
559: \end{equation}
560: the grammar can derive the sentence~\eqref{e:eslc}.
561: \begin{gather}
562:     \label{e:eslc-tree}
563:     \leaf{every}
564:     \leaf{student}
565:     \emptybranch{2}
566:     \leaf{liked}
567:     \leaf{CS187}
568:     \emptybranch{2}
569:     \emptybranch{2}
570:     \qobitree
571: \\
572:     \label{e:eslc-term}
573:     \left(\denote{\text{every}} \fwd \denote{\text{student}}\right) \fwd
574:           \denote{\text{liked}} \fwd \denote{\text{CS187}}
575:     = \eqref{e:eslc-formula}
576: \end{gather}
577: The existential determiner \phrase{some} can be analyzed similarly:
578: let \phrase{some} denote
579: \begin{equation}
580: \label{e:some-term}
581: \begin{split}
582:     \denote{\text{some}}
583:     &= \fun[\fwd]{r} \fun[\fwd]{s} \Exists{x} r(x) \land x\bwd s \\
584:     &\eqish: (\Thing\toF\Bool)\Fwd(\Thing\Bwd\Bool)\Fwd\Bool
585: \end{split}
586: \end{equation}
587: to derive the sentence \phrase{Some student liked CS187}.
588: \begin{gather}
589:     \label{e:sslc-tree}
590:     \leaf{some}
591:     \leaf{student}
592:     \emptybranch{2}
593:     \leaf{liked}
594:     \leaf{CS187}
595:     \emptybranch{2}
596:     \emptybranch{2}
597:     \qobitree
598: \displaybreak[0]
599: \\
600:     \label{e:sslc-term}
601:     \begin{split}
602:     \displayleft{\left(\denote{\text{some}} \fwd \denote{\text{student}}\right) \fwd
603:         \denote{\text{liked}} \fwd \denote{\text{CS187}}} \\
604:     \qquad&= \Exists{x} \Student(x) \land x\bwd\Liked\fwd\CSnlp : \Bool
605:     \end{split}
606: \end{gather}
607: 
608: To summarize, we treat determiners
609: like \phrase{every} and \phrase{some} as functions of two arguments: the
610: restrictor and the scope of a quantifier, both functions from~$\Thing$
611: to~$\Bool$.  Such higher-order functions are a popular analysis of natural
612: language determiners, and have been known to semanticists since
613: \citet{montague-proper} as \emp{generalized quantifiers}.  However, the
614: simplistic account presented above only handles quantificational noun
615: phrases in subject position, as in~\eqref{e:eslc} but not \eqref{e:sslec}
616: or~\eqref{e:atbtm}.  For example, in~\eqref{e:sslec}, neither forward nor
617: backward combination can apply to join the verb \phrase{liked}, of type
618: $\Thing\Fwd\Thing\Bwd\Bool$, to its object \phrase{every course}, of type
619: $(\Thing\Bwd\Bool)\Fwd\Bool$.  Yet, empirically speaking, the
620: sentence~\eqref{e:sslec} is not only acceptable but in fact ambiguous
621: between two available readings.  This problem has prompted a great variety
622: of supplementary proposals in the linguistics literature \citep[][inter
623: alia]{barwise-generalized,may-logical,hendriks-studied}.  The next
624: section presents a solution using \emp{delimited continuations}.
625: 
626: \section{Delimited continuations}
627: \label{s:delimited-continuations}
628: 
629: First-class continuations represent ``the entire (default) future for the
630: computation'' \citep{kelsey-r5rs-short}.  Refining this concept,
631: \citet{felleisen-theory} introduced \emph{delimited} continuations, which
632: encapsulate only a prefix of that future.  This paper uses \emp{shift} and
633: \emp{reset} \citep{danvy-functional,danvy-abstracting}, a popular choice of
634: control operators for delimited continuations.
635: 
636: To review briefly, the shift operator (notated~$\Shift$) captures the current
637: context of computation, removing it and making it available to the program as
638: a function.  For example, when evaluating the term
639: \begin{equation}
640: \label{e:shift-example}
641:     10 \times (\Shift f.\, 1 + f(2)),
642: \end{equation}
643: the variable~$f$ is bound to the function that multiplies every number
644: by~$10$.  Thus the above expression evaluates to~$21$ via the following
645: sequence of reductions.  (The reduced subexpression at each step is underlined.)
646: \begin{align}
647:     \rlap{$[10 \times (\text{\ul{$\Shift f.\, 1 + f(2)$}})]$}\enspace \\
648:     &\evalsto [1 + \text{\ul{$(\Lam v\Colon [10 \times v])(2)$}}] \notag\\
649:     &\evalsto [1 + [\text{\ul{$10 \times 2$}}]]
650:      \evalsto [1 + \text{\ul{$[20]$}}]
651:      \evalsto [\text{\ul{$1 + 20$}}]
652:      \evalsto \text{\ul{$[21]$}}
653:      \evalsto 21 \notag
654: \end{align}
655: Term reductions are performed deterministically in applicative order:
656: call-by-value and left-to-right.
657: 
658: The reset operator (notated with square brackets $[\enspace]$)
659: delineates how far shift can reach: shift captures the current
660: context of computation up to the closest dynamically enclosing reset.
661: Hence ``$3\;\times$'' below is out of reach.
662: %The following reduction sequence demonstrates.
663: \begin{align}
664: \label{e:reset-example}
665:     \rlap{$\bigl[3 \times [10 \times (\text{\ul{$\Shift f.\, 1 + f(2)$}})]\bigr]$}\enspace \\
666:     &\evalsto \bigl[3 \times [1 + \text{\ul{$(\Lam v\Colon [10 \times v])(2)$}}]\bigr] \notag\\
667:     &\evalsto \bigl[3 \times [1 + [\text{\ul{$10 \times 2$}}]]\bigr]
668:      \evalsto \bigl[3 \times [1 + \text{\ul{$[20]$}}]\bigr]
669:      \evalsto\dotsb
670:      \evalsto 63 \notag
671: \end{align}
672: 
673: Shift and reset come with an operational semantics (illustrated by the
674: reductions above) as well as a denotational semantics (via the CPS
675: transform).  That both kinds of semantics are
676: available is important to linguistics, because
677: the meanings of natural language expressions (studied in
678: semantics) need to be related to how humans process them (studied
679: in psycholinguistics).
680: 
681: Quantificational expressions in natural language can be thought of as phrases
682: that manipulate their context.  In a sentence like
683: \phrase{Alice liked CS187} \eqref{e:alice-liked-cs187}, the context of
684: \phrase{CS187} is the function mapping each thing~$x$ to the proposition that
685: Alice liked~$x$.  Compared to the proper noun \phrase{CS187}, what is special
686: about a quantificational expression like \phrase{every course} is that it
687: captures its surrounding context when used.
688: \example{\label{e:alec}Alice liked [every course].}
689: Thus, loosely speaking, the meaning of the sentence~\eqref{e:alec} no
690: longer has the overall shape $\Alice\bwd\Liked\fwd\dotsb$ once the occurrence
691: of \phrase{every course} is considered, much as the meaning of the
692: program~\eqref{e:shift-example} no longer has the overall shape $10
693: \times \dotsb$ once the shift expression is evaluated.  Let us add
694: shift and reset to the target language of our translation from English.  We can
695: then translate \phrase{every course} as
696: \begin{equation}
697: \label{e:every-course}
698:     \denote{\text{every course}}
699:     = \Shift s\Colon \Forall{x} \Course(x) \limplies s(x)
700:     : \Thing_{\Bool}^{\Bool}.
701: \end{equation}
702: The type notation \smash[b]{$\alpha_\gamma^\delta$} here indicates an~$\alpha$
703: with a control effect; the CPS transform maps it to
704: $(\alpha\toF\gamma)\toF\delta$.  The denotation of \phrase{every course} behaves
705: locally as a $\Thing$, but requires the current context to have the answer
706: type~$\Bool$ and maintains that answer type.
707: 
708: To see the new denotation~\eqref{e:every-course} in action, let us derive the
709: sentence~\eqref{e:alec}.  The type of \phrase{every course}
710: is~$\Thing_\Bool^\Bool$, similar to the type~$\Thing$ of~\phrase{CS187}, so the
711: derivation of~\eqref{e:alec} is analogous
712: to~(\refrange{e:alc-tree}{e:alc-term}).
713: \begin{gather}
714:     \label{e:alec-tree}
715:     \leaf{Alice}
716:     \leaf{liked}
717:     \leaf{every course}
718:     \emptybranch{2}
719:     \emptybranch{2}
720:     \qobitree
721: \displaybreak[0]\\
722: \label{e:alec-term}
723: \begin{split}
724:     \rlap{$\bigl[\denote{\text{Alice}} \bwd
725:                  \denote{\text{liked}} \fwd
726:                  \denote{\text{every course}}\bigr]$}\enspace\\
727:     &= [\Alice\bwd \Liked\fwd
728:       \text{\ul{$\Shift s\Colon \Forall{x} \Course(x) \limplies s(x)$}} ] \hspace*{-1em} \\
729:     &\evalsto [\Forall{x} \Course(x) \limplies \text{\ul{$(\fun{v}
730:         [\Alice\bwd\Liked\fwd v])(x)$}}]
731:       \evalsto\dotsb\\
732:     &\evalsto \Forall{x} \Course(x) \limplies \Alice\bwd\Liked\fwd x : \Bool
733: \end{split}
734: \end{gather}
735: 
736: Like the straw man analysis in~\S\ref{s:quantification}, the denotation
737: in~\eqref{e:every-course} generalizes to determiners other than
738: \phrase{every}: we can abstract the noun \phrase{course} out of
739: \phrase{every course}, and deal with \phrase{some student} similarly.
740: \begin{align}
741: \label{e:every}
742:     \denote{\text{every}}
743:         &= \fun[\fwd]{r} \Shift s\Colon \Forall{x} r(x) \limplies s(x),\\
744: \label{e:some}
745:     \denote{\text{some}}
746:         &= \fun[\fwd]{r} \Shift s\Colon \Exists{x} r(x) \land     s(x) \\
747:         &\eqish: \smash[t]{(\Thing\toF\Bool)\Fwd\Thing_{\Bool}^{\Bool}} \notag
748: \end{align}
749: (We require here that the restrictor~$r$ have the type $\Thing\toF\Bool$, not
750: a type of the form $\smash{\Thing\toF\Bool_\gamma^\delta}$, so $r$ cannot incur
751: control effects when applied to~$x$.  Any control effect in the restrictor,
752: such as induced by the quantificational noun phrase \phrase{a company} in the
753: sentence \phrase{Every representative of a company left}, must be contained
754: within reset.)
755: 
756: More importantly, unlike the straw man analysis, the new analysis works
757: uniformly for quantificational expressions in subject, object, and other
758: positions, such as in~(\refrange{e:eslc}{e:atbtm}).  Intuitively, this is
759: because shift captures the context of an expression no matter how deeply it is
760: embedded in the sentence.%
761: \footnote{No matter how deep, that is, up to the closest dynamically enclosing
762:     reset.  Control delimiters correspond to \emph{islands} in natural language
763:     \citep{barker-continuations}.}
764: By adding control operators for delimited
765: continuations to our logical metalanguage, we arrive at an analysis of
766: quantification with greater empirical coverage.%
767: 
768: \begin{figure*}[t]
769: \newcommand{\just}{\justifies}
770: \abovedisplayskip     =0pt
771: \abovedisplayshortskip=0pt
772: \begin{align*}
773:     \tag*{Directions} \Delta
774:         &::= \fwd \mid \bwd \\
775:     \tag*{Types} \alpha,\beta,\gamma,\delta
776:         &::= \Thing \mid \Bool \mid \alpha\toF\beta
777:         \mid \smash[t]{\alpha\toD\beta_\gamma^\delta} \\
778:     \tag*{Antecedents} \Gamma
779:         &::= x_1:\alpha_1,\dotsc,x_n:\alpha_n \\
780:     \tag*{Terms} E,F
781:         &::= c \mid x \mid \fun{x}E \mid \fun[\Delta]{x}E
782:         \mid FE \mid F\fwd E \mid E\bwd F \mid [E] \mid \Shift f\Colon E
783: \end{align*}
784: 
785: \abovedisplayskip     =\smallskipamount
786: \abovedisplayshortskip=\smallskipamount
787: \renewcommand{\just}{\justifies\strut}
788: \noindent Constants $c:\alpha$
789: \begin{gather*}
790:     \begin{prooftree}
791:         \just \forall : (\Thing\toF\Bool)\toF\Bool
792:     \end{prooftree}
793:     \qquad
794:     \begin{prooftree}
795:         \just \exists : (\Thing\toF\Bool)\toF\Bool
796:     \end{prooftree}
797: \\
798:     \begin{prooftree}
799:         \just \limplies : \Bool\toF\Bool\toF\Bool
800:     \end{prooftree}
801:     \qquad
802:     \begin{prooftree}
803:         \just \land : \Bool\toF\Bool\toF\Bool
804:     \end{prooftree}
805: \\
806:     \begin{prooftree}
807:         \just \Student : \Thing\toF\Bool
808:     \end{prooftree}
809:     \qquad
810:     \begin{prooftree}
811:         \just \Liked : \Thing\Fwd(\Thing\Bwd\Bool_\delta^\delta)_\gamma^\gamma
812:     \end{prooftree}
813:     \qquad
814:     \dotsm
815: \end{gather*}
816: 
817: \renewcommand{\just}{\strut\justifies\strut}
818: \noindent Pure expressions $\Gamma \vdash E:\alpha$
819: \begin{gather*}
820:     \begin{prooftree}
821:         c:\alpha
822:         \just \Gamma \vdash c:\alpha
823:         \using \text{Const}
824:     \end{prooftree}
825:     \qquad
826:     \begin{prooftree}
827:         \just \Gamma,x:\alpha \vdash x:\alpha
828:         \using \text{Var}
829:     \end{prooftree}
830:     \qquad
831:     \begin{prooftree}
832:         \Gamma \vdash E:\alpha_\alpha^\beta
833:         \just \Gamma \vdash [E]:\beta
834:         \using \text{Reset}
835:     \end{prooftree}
836: \\
837:     \begin{prooftree}
838:         \Gamma, x:\alpha \vdash E:\beta
839:         \just \Gamma \vdash \fun{x}E:\alpha\toF\beta
840:         \using \toF\text{I}
841:     \end{prooftree}
842:     \qquad
843:     \begin{prooftree}
844:         \Gamma, x:\alpha \vdash E:\beta_\gamma^\delta
845:         \just \Gamma \vdash \fun[\Delta]{x}E:\alpha\toD\beta_\gamma^\delta
846:         \using \toD\text{I}
847:     \end{prooftree}
848:     \qquad
849:     \begin{prooftree}
850:         \Gamma \vdash F:\alpha\toF\beta
851:         \quad \Gamma \vdash E:\alpha
852:         \just \Gamma \vdash FE:\beta
853:         \using \toF\text{E}
854:     \end{prooftree}
855: \end{gather*}
856: 
857: \belowdisplayskip     =0pt
858: \belowdisplayshortskip=0pt
859: \noindent Impure expressions $\Gamma \vdash E:\alpha_\gamma^\delta$
860: \begin{gather*}
861:     \begin{prooftree}
862:         \Gamma \vdash E:\alpha
863:         \just \Gamma \vdash E:\alpha_\gamma^\gamma
864:         \using \text{Lift}
865:     \end{prooftree}
866:     \qquad
867:     \begin{prooftree}
868:         \Gamma, f:\alpha\toF\gamma \vdash E:\delta
869:         \just \Gamma \vdash \Shift f\Colon E:\alpha_\gamma^\delta
870:         \using \text{Shift}
871:     \end{prooftree}
872: \\
873:     \begin{prooftree}
874:         \Gamma \vdash F:(\alpha\Fwd\beta_{\gamma3}^{\gamma2})_{\gamma1}^{\gamma0}
875:         \quad \Gamma \vdash E:\alpha_{\gamma2}^{\gamma1}
876:         \just \Gamma \vdash F\fwd E:\beta_{\gamma3}^{\gamma0}
877:         \using \Fwd{\text E}
878:     \end{prooftree}
879:     \qquad
880:     \begin{prooftree}
881:         \Gamma \vdash E:\alpha_{\gamma1}^{\gamma0}
882:         \quad \Gamma \vdash F:(\alpha\Bwd\beta_{\gamma3}^{\gamma2})_{\gamma2}^{\gamma1}
883:         \just \Gamma \vdash E\bwd F:\beta_{\gamma3}^{\gamma0}
884:         \using \Bwd{\text E}
885:     \end{prooftree}
886: \end{gather*}
887: \caption{A logical metalanguage with directionality and delimited control operators}
888: \label{fig:metalanguage}
889: \end{figure*}
890: 
891: \begin{figure}
892: \newcommand{\blank}[1][\enspace]{\langle#1\rangle}
893: \abovedisplayskip     =0pt
894: \abovedisplayshortskip=0pt
895: \begin{align*}
896:     \tag*{Values} V
897:         &::= U \mid \fun{x}E \mid \fun[\Delta]{x}E \\
898:     \tag*{Unknowns} U
899:         &::= c \mid UV \mid U\fwd V \mid V\bwd U \\
900:     \tag*{Contexts} C\blank
901:         &::= \blank
902:         \mid (C\blank)    E
903:         \mid  C\blank\fwd E
904:         \mid  C\blank\bwd F \\&\hphantom{{}::= \blank}
905:         \mid V    (C\blank)
906:         \mid V\fwd C\blank
907:         \mid V\bwd C\blank \\
908:     \tag*{Metacontexts} D\blank
909:         &::= \blank
910:         \mid C\blank[{[D\blank]}]
911: \end{align*}
912: 
913: \abovedisplayskip     =\smallskipamount
914: \abovedisplayshortskip=\smallskipamount
915: \belowdisplayskip     =0pt
916: \belowdisplayshortskip=0pt
917: \noindent Computations $E \evalsto E'$
918: \begin{align*}
919:     D\blank[{C\blank[\text{\ul{$(\fun      {x}E)     V$}}]}] &\enspace\evalsto\enspace D\blank[{C\blank[{E\{x\mapsto V\}}]}] \\
920:     D\blank[{C\blank[\text{\ul{$(\fun[\fwd]{x}E)\fwd V$}}]}] &\enspace\evalsto\enspace D\blank[{C\blank[{E\{x\mapsto V\}}]}] \\
921:     D\blank[{C\blank[\text{\ul{$V\bwd (\fun[\bwd]{x}E)$}}]}] &\enspace\evalsto\enspace D\blank[{C\blank[{E\{x\mapsto V\}}]}] \\
922:     D\blank[{C\blank[\text{\ul{$[V]                   $}}]}] &\enspace\evalsto\enspace D\blank[{C\blank[{V}]}] \\
923:     D\blank[{C\blank[\text{\ul{$\Shift f\Colon E      $}}]}] &\enspace\evalsto\enspace D\blank[{E\{f\mapsto\fun{x}[C\blank[x]]\}}]
924: \end{align*}
925: \caption{Reductions for the logical metalanguage}
926: \label{fig:reductions}
927: \end{figure}
928: 
929: Figure~\vref{fig:metalanguage} shows a logical metalanguage that formalizes the
930: basic ideas presented above.  It is in this language that denotations on this
931: page are written and reduced.  Refining Danvy and Filinski's original
932: shift-reset language, we distinguish between \emp{pure} and \emp{impure}
933: expressions.  An impure expression may incur control effects when evaluated,
934: whereas a pure expression only incurs control effects contained within reset
935: \citep{danvy-cps,danvy-transformation,nielsen-selective,thielecke-control}.
936: This distinction is reflected in the typing judgments: an impure judgment
937: \begin{equation}
938:     \Gamma\vdash E:\smash[t]{\alpha_\gamma^\delta}
939: \end{equation}
940: not only gives a type~$\alpha$ for~$E$ itself but also specifies two answer
941: types $\gamma$ and~$\delta$.  By contrast, a pure judgment
942: \begin{equation}
943:     \Gamma\vdash E:\alpha
944: \end{equation}
945: only gives a type~$\alpha$ for~$E$ itself.  As can be seen in the Lift rule,
946: pure expressions are polymorphic in the answer type.
947: 
948: As mentioned in~\S\ref{s:formalism}, the use of directionality in function types
949: to control word order is not new in linguistics, but the use of delimited
950: control operators to analyze quantification is.  It turns out that we can tie
951: the potential presence of control effects in function bodies to directionality.
952: That is, only directional functions---those whose types are decorated with tick
953: marks---are potentially impure; all non-directional functions we need to deal
954: with, including contexts captured by shift, are pure.  Another link between
955: directionality and control effects is that the $\Fwd$E and $\Bwd$E rules for
956: directional function application are not merely mirror images of each other: the
957: answer types $\gamma_0$ through~$\gamma_3$ are chained differently through the
958: premises.  This is due to left-to-right evaluation.
959: 
960: Having made the distinction between pure and impure expressions, we require in
961: our Shift rule that the body of a shift expression be pure.  This change from
962: Danvy and Filinski's original system simplifies the type system and the CPS
963: transform, but a shift expression $\Shift f\Colon E$ in their language may
964: need to be rewritten here to~$\Shift f\Colon[E]$.
965: 
966: The CPS transform for the metalanguage follows from the typing rules and is
967: standard; it supplies a denotational semantics.  The operational semantics for
968: the metalanguage specifies a computation relation between complete terms; it is
969: also standard and shown in Figure~\ref{fig:reductions}.
970: 
971: The present analysis is almost, but not quite, the direct-style analogue of
972: \citets{barker-continuations} CPS analysis.  Put in direct-style terms, Barker's
973: function bodies are always pure, whereas function bodies here can harbor control
974: effects.  In other words, function bodies are allowed to shift, as in the
975: determiner denotations in \eqref{e:every} and~\eqref{e:some}.  By contrast,
976: Barker uses \emph{choice functions} to assign meanings to determiners.
977: 
978: \section{Quantifier scope ambiguity}
979: %\section{A hierarchy of control operators}
980: \label{s:hierarchy}
981: 
982: Of course, natural language phenomena are never as simple as a couple of
983: programming language control operators.  Quantification is no exception, so to
984: speak.  For example, the sentence \phrase{Some student liked
985: every course} \eqref{e:sslec} is ambiguous between the following two readings.
986: \begin{gather}
987: \label{e:sslec-surface}\Exists{x} \Student(x) \land \Forall{y} \Course(y) \limplies x\bwd\Liked\fwd y\\
988: \label{e:sslec-inverse}\Forall{y} \Course(y) \limplies \Exists{x} \Student(x) \land x\bwd\Liked\fwd y
989: \end{gather}
990: In the \emp{surface scope} reading~\eqref{e:sslec-surface}, \phrase{some} takes
991: scope over \phrase{every}.  In the \emp{inverse scope}
992: reading~\eqref{e:sslec-inverse}, \phrase{every} takes scope over \phrase{some}.
993: Given that evaluation takes place from left to right, the shift
994: for \phrase{some student} is evaluated before the shift for \phrase{every
995: course}.  Our grammar thus predicts the surface scope reading but not the
996: inverse scope reading.  This prediction can be seen in the first few reductions
997: of the (unique) derivation for~\eqref{e:sslec}:
998: \begin{align}
999:     \rlap{$
1000:         \bigl[ \left(\denote{\text{some}} \fwd \denote{\text{student}}\right)
1001:           \bwd \denote{\text{liked}}
1002:           \fwd \denote{\text{every}} \fwd \denote{\text{course}} \bigr]
1003:         $}\enspace
1004: \\\notag
1005:     &= \bigl[ \bigl( \text{\ul{$
1006:                 (\fun[\fwd]{r} \Shift s\Colon \Exists{x} r(x) \land     s(x))
1007:               \fwd
1008:                 \Student
1009:               $}} \bigr)
1010: \\\notag
1011:     &\hspace*{3em}{}
1012:          \bwd \Liked
1013:          \fwd \bigl(
1014:                 (\fun[\fwd]{r} \Shift s\Colon \Forall{y} r(y) \limplies s(y))
1015:               \fwd
1016:                 \Course
1017:               \bigr)
1018:        \bigr]
1019: \\\notag
1020:     &\evalsto
1021:        \bigl[ \bigl( \text{\ul{$
1022:                 \Shift s\Colon \Exists{x} \Student(x) \land     s(x)
1023:               $}} \bigr)
1024: \\\notag
1025:     &\hspace*{3em}{}
1026:          \bwd \Liked
1027:          \fwd \bigl(
1028:                 (\fun[\fwd]{r} \Shift s\Colon \Forall{y} r(y) \limplies s(y))
1029:               \fwd
1030:                 \Course
1031:               \bigr)
1032:        \bigr]
1033: \\\notag
1034:     &\evalsto
1035:        \bigl[ \Exists{x} \Student(x) \land
1036:          \bigl(\fun{v} [v
1037: \\\notag
1038:     &\hspace*{3em}{}
1039:          \bwd \Liked
1040:          \fwd (
1041:                 (\fun[\fwd]{r} \Shift s\Colon \Forall{y} r(y) \limplies s(y))
1042:               \fwd
1043:                 \Course
1044:               )]\bigr)(x)
1045:        \bigr]
1046:     \hspace*{-.5em}
1047: \end{align}
1048: Regardless of what evaluation order we specify, as long
1049: as our rules for semantic translation remain deterministic, they will only
1050: generate one reading for the sentence.  Hence our theory fails to
1051: predict the ambiguity of the sentence~\eqref{e:sslec}.
1052: 
1053: To better account for the data, we need to introduce some sort of nondeterminism
1054: into our theory.  There are two natural ways to proceed.  First, we can allow
1055: arbitrary evaluation order, not just left-to-right.  This change would render
1056: our term calculus nonconfluent, a result unwelcome for most programming language
1057: researchers but welcome for us in light of the ambiguous natural language
1058: sentence~\eqref{e:sslec}.  This route has been pursued with some success by
1059: \citet{barker-continuations} and \citet{de-groote-type}.  However, there are
1060: empirical reasons to maintain left-to-right evaluation, one of which appears
1061: in~\S\ref{s:polarity}.
1062: 
1063: \begin{figure*}[t]
1064: \newcommand{\just}{\justifies}
1065: \abovedisplayskip     =0pt
1066: \abovedisplayshortskip=0pt
1067: \begin{align*}
1068:     \tag*{Directions} \Delta
1069:         &::= \fwd \mid \bwd \\
1070:     \tag*{Value types} \alpha,\beta,\gamma,\delta
1071:         &::= \smash[t]{
1072:              \Thing\{\vec{p}\} \mid \Bool\{\vec{p}\}
1073:         \mid \alpha\toF\beta!n
1074:         \mid \alpha\toD\beta!n} \\
1075:     \tag*{Computation types} \alpha!0
1076:         &::= \alpha, \qquad \alpha!(n + 1)
1077:          ::= \smash{\alpha_{\gamma!n}^{\delta!n}} \\
1078:     \tag*{Antecedents} \Gamma
1079:         &::= x_1:\alpha_1,\dotsc,x_n:\alpha_n \\
1080:     \tag*{Terms} E,F
1081:         &::= c \mid x \mid \fun[n]{x}E \mid \fun[\Delta n]{x}E
1082:         \mid FE \mid F\fwd E \mid E\bwd F \mid [E]^n \mid \Shift^n\!f\Colon E
1083: \end{align*}
1084: 
1085: \abovedisplayskip     =\smallskipamount
1086: \abovedisplayshortskip=\smallskipamount
1087: \renewcommand{\just}{\justifies\strut}
1088: \noindent Constants $c:\alpha$
1089: \begin{gather*}
1090:     \begin{prooftree}
1091:         \just p : \Thing\{p\}
1092:     \end{prooftree}
1093:     \qquad
1094:     \begin{prooftree}
1095:         \just \forall p : \Bool\{p,\vec{q}\}\toF\Bool\{\vec{q}\}
1096:     \end{prooftree}
1097:     \qquad
1098:     \begin{prooftree}
1099:         \just \exists p : \Bool\{p,\vec{q}\}\toF\Bool\{\vec{q}\}
1100:     \end{prooftree}
1101: \\
1102:     \begin{prooftree}
1103:         \just \limplies : \Bool\{\vec{p}\}\toF\Bool\{\vec{q}\}\toF\Bool\{\vec{p}\cup\vec{q}\}
1104:     \end{prooftree}
1105:     \qquad
1106:     \begin{prooftree}
1107:         \just \land : \Bool\{\vec{p}\}\toF\Bool\{\vec{q}\}\toF\Bool\{\vec{p}\cup\vec{q}\}
1108:     \end{prooftree}
1109: \\
1110:     \begin{prooftree}
1111:         \just \Student : \Thing\{\vec{p}\}\toF\Bool\{\vec{p}\}
1112:     \end{prooftree}
1113:     \qquad
1114:     \begin{prooftree}
1115:         \just \Liked : \Thing\{\vec{p}\}\Fwd(\Thing\{\vec{q}\}\Bwd\Bool\{\vec{p}\cup\vec{q}\}{}_{\delta!m}^{\delta!m})_{\gamma!n}^{\gamma!n}
1116:     \end{prooftree}
1117:     \qquad
1118:     \dotsm
1119: \end{gather*}
1120: 
1121: \belowdisplayskip     =0pt
1122: \belowdisplayshortskip=0pt
1123: \renewcommand{\just}{\strut\justifies\strut}
1124: \noindent Expressions $\Gamma \vdash E:\alpha!n$
1125: \begin{gather*}
1126:     \let\vcenter\vbox
1127:     \begin{prooftree}
1128:         c:\alpha
1129:         \just \Gamma \vdash c:\alpha
1130:         \using \text{Const}
1131:     \end{prooftree}
1132:     \qquad
1133:     \begin{prooftree}
1134:         \just \Gamma,x:\alpha \vdash x:\alpha
1135:         \using \text{Var}
1136:     \end{prooftree}
1137:     \qquad
1138:     \begin{prooftree}
1139:         \Gamma \vdash E:\alpha_\alpha^\beta
1140:         \just \Gamma \vdash [E]^0:\beta
1141:         \using \text{Reset}
1142:     \end{prooftree}
1143:     \qquad
1144:   \smash{
1145:     \begin{prooftree}
1146:         \Gamma \vdash E:\alpha_{\alpha_{\gamma!n}^{\gamma!n}}^{\beta!(n+1)}
1147:         \just \Gamma \vdash [E]^{n+1}:\beta!(n+1)
1148:         \using \text{Reset}
1149:     \end{prooftree}
1150:   }
1151: \\
1152:     \begin{prooftree}
1153:         \Gamma, x:\alpha \vdash E:\beta!n
1154:         \just \Gamma \vdash \fun[(\Delta)]{x}E:\alpha\toMaybeD\beta!n
1155:         \using \toMaybeD\text{I}
1156:     \end{prooftree}
1157:     \qquad
1158:     \begin{prooftree}
1159:         \Gamma \vdash F:\alpha\toF\beta!n
1160:         \quad \Gamma \vdash E:\alpha
1161:         \just \Gamma \vdash FE:\beta!n
1162:         \using \toF\text{E}
1163:     \end{prooftree}
1164: \\
1165:     \let\vcenter\vtop
1166:     \begin{prooftree}
1167:         \Gamma \vdash E:\alpha
1168:         \just \Gamma \vdash E:\alpha_\gamma^\gamma
1169:         \using \text{Lift}
1170:     \end{prooftree}
1171:     \qquad
1172:     \begin{prooftree}
1173:         \Gamma \vdash E:\alpha_{\gamma1!n}^{\gamma0!n}
1174:         \just \Gamma \vdash E:\alpha_{\beta_{\gamma2!n}^{\gamma1!n}}
1175:                                     ^{\beta_{\gamma2!n}^{\gamma0!n}}
1176:         \using \text{Lift}
1177:     \end{prooftree}
1178:     \qquad
1179:     \begin{prooftree}
1180:         \Gamma, f:\alpha\toF\gamma!n \vdash E:\delta!n
1181:         \just \Gamma \vdash \Shift^n\!f\Colon E : \alpha_{\gamma!n}^{\delta!n}
1182:         \using \text{Shift}
1183:     \end{prooftree}
1184: \\
1185:     \begin{prooftree}
1186:         \Gamma \vdash F:(\alpha\Fwd\beta_{\gamma3!n}^{\gamma2!n})_{\gamma1!n}^{\gamma0!n}
1187:         \quad \Gamma \vdash E:\alpha_{\gamma2!n}^{\gamma1!n}
1188:         \just \Gamma \vdash F\fwd E:\beta_{\gamma3!n}^{\gamma0!n}
1189:         \using \Fwd{\text E}
1190:     \end{prooftree}
1191:     \qquad
1192:     \begin{prooftree}
1193:         \Gamma \vdash E:\alpha_{\gamma1!n}^{\gamma0!n}
1194:         \quad \Gamma \vdash F:(\alpha\Bwd\beta_{\gamma3!n}^{\gamma2!n})_{\gamma2!n}^{\gamma1!n}
1195:         \just \Gamma \vdash E\bwd F:\beta_{\gamma3!n}^{\gamma0!n}
1196:         \using \Bwd{\text E}
1197:     \end{prooftree}
1198: \\
1199:     \begin{prooftree}
1200:         \Gamma \vdash F:(\alpha\Fwd\beta)
1201:         \quad \Gamma \vdash E:\alpha
1202:         \just \Gamma \vdash F\fwd E:\beta
1203:         \using \Fwd{\text E}
1204:     \end{prooftree}
1205:     \qquad
1206:     \begin{prooftree}
1207:         \Gamma \vdash E:\alpha
1208:         \quad \Gamma \vdash F:(\alpha\Bwd\beta)
1209:         \just \Gamma \vdash E\bwd F:\beta
1210:         \using \Bwd{\text E}
1211:     \end{prooftree}
1212: \end{gather*}
1213: \caption{Extending the logical metalanguage to a hierarchy of control operators}
1214: \label{fig:hierarchy}
1215: \end{figure*}
1216: 
1217: \begin{figure*}
1218: \newcommand{\blank}[1][\enspace]{\langle#1\rangle}
1219: \abovedisplayskip     =0pt
1220: \abovedisplayshortskip=0pt
1221: \begin{align*}
1222:     \tag*{Values} V
1223:         &::= U \mid \fun{x}E \mid \fun[\Delta]{x}E \\
1224:     \tag*{Unknowns} U
1225:         &::= c \mid UV \mid U\fwd V \mid V\bwd U \\
1226:     \tag*{Contexts} C\blank
1227:         &::= \blank
1228:         \mid (C\blank)    E
1229:         \mid  C\blank\fwd E
1230:         \mid  C\blank\bwd F
1231:         \mid V    (C\blank)
1232:         \mid V\fwd C\blank
1233:         \mid V\bwd C\blank \\
1234:     \tag*{Metacontexts at level~$n$} D^n\blank
1235:         &::= \blank
1236:         \mid D^{n+1}\blank[{D^{n+2}\blank[{\dotsm\blank[{C\blank[{[D^n\blank]^n}]}]\dotsm}]}]
1237: \end{align*}
1238: 
1239: \abovedisplayskip     =\smallskipamount
1240: \abovedisplayshortskip=\smallskipamount
1241: \belowdisplayskip     =0pt
1242: \belowdisplayshortskip=0pt
1243: \noindent Computations $E \evalsto E'$
1244: \begin{align*}
1245:     D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[\text{\ul{$(\fun      {x}E)     V$}}]}]\dotsm}]}] &\enspace\evalsto\enspace D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[{E\{x\mapsto V\}}]}]\dotsm}]}] \\
1246:     D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[\text{\ul{$(\fun[\fwd]{x}E)\fwd V$}}]}]\dotsm}]}] &\enspace\evalsto\enspace D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[{E\{x\mapsto V\}}]}]\dotsm}]}] \\
1247:     D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[\text{\ul{$V\bwd (\fun[\bwd]{x}E)$}}]}]\dotsm}]}] &\enspace\evalsto\enspace D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[{E\{x\mapsto V\}}]}]\dotsm}]}] \\
1248:     D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[\text{\ul{$[V]                   $}}]}]\dotsm}]}] &\enspace\evalsto\enspace D^0\blank[{D^1\blank[{\dotsm\blank[{C\blank[{V}]}]\dotsm}]}] \\
1249:     D^0\blank[{D^1\blank[{\dotsm\blank[{D^n\blank[{D^{n+1}\blank[{\dotsm\blank[{C\blank[\text{\ul{$\Shift^n\!f\Colon E$}}]}]\dotsm}]}]}]\dotsm}]}] &\enspace\evalsto\enspace D^0\blank[{D^1\blank[{\dotsm\blank[{D^n\blank[{E\{f\mapsto\fun{x}[D^{n+1}\blank[{\dotsm\blank[{C\blank[x]}]\dotsm}]]^n\}}]}]\dotsm}]}]
1250: \end{align*}
1251: \caption{Reductions for the extended logical metalanguage}
1252: \label{fig:reductions-hierarchy}
1253: \end{figure*}
1254: 
1255: A second way to introduce nondeterminism is to maintain left-to-right evaluation
1256: but generalize shift and reset to a \emp{hierarchy} of control operators
1257: \citep{danvy-abstracting,barker-higher-order,shan-explaining}, leaving it
1258: unspecified at which level on the hierarchy each quantificational phrase shifts.
1259: Following \citeauthor{danvy-abstracting}, we extend our logical
1260: metalanguage by superscripting every shift expression and pair of reset brackets
1261: with a nonnegative integer to indicate a level on the control hierarchy.
1262: Level~$0$ is the highest level (not the lowest).  When a shift expression at
1263: level~$n$ is evaluated, it captures the current context of computation up to the
1264: closest dynamically enclosing reset at level~$n$ or higher (smaller).  For
1265: example, whereas the expression
1266: \begin{equation}
1267:     \bigl[3 \times [10 \times (\Shift^5\!f.\, 1 + f(2))]^5\bigr]^0
1268: \end{equation}
1269: evaluates to~$63$ as in~\eqref{e:reset-example}, the expression
1270: \begin{equation}
1271:     \bigl[3 \times [10 \times (\Shift^3\!f.\, 1 + f(2))]^5\bigr]^0
1272: \end{equation}
1273: evaluates to $1 + 3 \times 10 \times 2$, or~$61$.  The superscripts can be
1274: thought of ``strength levels'' for shifts and resets.
1275: 
1276: \Citet{danvy-abstracting} give a denotational semantics for multiple levels of
1277: delimited control using continuations of higher-order type.  We can take
1278: advantage of that work in our quantificational
1279: denotations~(\refrange{e:every}{e:some}) by letting them shift at any level.
1280: The ambiguity of~\eqref{e:sslec} is then predicted as follows.  Suppose that
1281: \phrase{some student} shifts at level~$m$ and \phrase{every course} shifts at
1282: level~$n$.
1283: \example{Some$^m$ student liked every$^n$ course.}
1284: If $m\le n$, the surface scope reading~\eqref{e:sslec-surface} results.  If
1285: $m>n$, the inverse scope reading~\eqref{e:sslec-inverse} results.  In general,
1286: a quantifier that shifts at a higher level always scopes over another that
1287: shifts on a lower level, regardless of which one is evaluated first.  This
1288: way, evaluation order does not determine scoping possibilities among quantifiers
1289: in a sentence unless two quantifiers happen to shift at the same level.
1290: 
1291: To summarize the discussion so far, whether we introduce nondeterministic
1292: evaluation order or a hierarchy of delimited control operators, we can account
1293: for the ambiguity of the sentence~\eqref{e:sslec}, as well as more complicated
1294: cases of quantification in English and Mandarin \citep{shan-quantifier}.  For
1295: example, both the nondeterministic evaluation order approach and the control
1296: hierarchy approach predict correctly that the sentence below, with three
1297: quantifiers, is $5$-way ambiguous.
1298: \example{\label{e:three-quantifiers}Every representative of a company saw most samples.}
1299: Despite the fact that there are three quantifiers in this sentence and $3! = 6$,
1300: this sentence has only $5$ readings.  Because \phrase{a company} occurs within
1301: the restrictor of \phrase{every representative of a company}, it is incoherent
1302: for \phrase{every} to scope over \phrase{most} and \phrase{most}
1303: over~\phrase{a}.  The reason neither approach generates such a reading can
1304: be seen in the denotation of \phrase{every} in~\eqref{e:every}: ``$\forall x$''
1305: is located immediately above ``$\limplies$'' in the abstract syntax, with no
1306: intervening control delimiter, so no control operator can insert any material
1307: (such as \phrase{most}\hyp quantification over samples) in between.
1308: 
1309: There exist in the computational linguistics literature algorithms for computing
1310: the possible quantifier scopings of a sentence like~\eqref{e:three-quantifiers}
1311: \citep[][followed by
1312: \citealp{moran-quantifier,lewin-quantifier}]{hobbs-algorithm}.  Having related
1313: quantifier scoping to control operators, we gain a denotational
1314: understanding of these algorithms that accords with our theoretical intuitions
1315: and empirical observations.
1316: 
1317: An extended logical metalanguage with an infinite hierarchy of control operators
1318: is shown in Figure~\ref{fig:hierarchy}.  This system is more complex than the
1319: one in Figure~\ref{fig:metalanguage} in two ways.  First, instead of making a
1320: binary distinction between pure and impure expressions, we use a number to
1321: measure ``how pure'' each expression is.  An expression is pure up to level~$n$
1322: if it only incurs control effects at levels above~$n$ when evaluated.  Pure
1323: expressions are the special case when $n=0$.  The purity level of an expression
1324: is reflected in its typing judgment: a judgment
1325: \begin{equation}
1326:     \Gamma\vdash E:\alpha!n
1327: \end{equation}
1328: states that the expression~$E$ is pure up to level~$n$.  Here $\alpha!n$ is
1329: a \emph{computation type} with $n$ levels: as defined in the figure, it
1330: consists of $2^{n+1}-1$ value types that together specify how a computation
1331: that is pure up to level~$n$ affects answer types between levels~$0$
1332: and~$n-1$.  In the special case where $n=1$, the computation type $\alpha!1$ is
1333: of the familiar form~$\alpha_\gamma^\delta$.
1334: 
1335: In the previous system in Figure~\ref{fig:metalanguage}, directional functions
1336: are always impure (that is, pure up to level~$1$) while non-directional
1337: functions are always pure (that is, pure up to level~$0$).  In the current
1338: system, both kinds of functions declare in their types up to what level their
1339: bodies are pure.  For example, the determiners \phrase{every} and \phrase{some},
1340: now allowed to shift at any level, both have not just the type
1341: \begin{gather}
1342:     \label{e:det-type-pure-restrictor}
1343:     (\Thing \toF \Bool) \Fwd
1344:     \Thing     _{\Bool_{\gamma!n}^{\delta!n}}
1345:                ^{\Bool_{\gamma!n}^{\delta!n}},
1346: \end{gather}
1347: but also the type
1348: \begin{equation}
1349:     \label{e:det-type-impure-restrictor}
1350:     (\Thing \toF \Bool_{\gamma1!n}^{\gamma0!n}) \Fwd
1351:     \Thing     _{\Bool_{\gamma2!n}^{\gamma1!n}}
1352:                ^{\Bool_{\gamma2!n}^{\gamma0!n}}
1353: \end{equation}
1354: (but see the second technical complication below).  As the argument type
1355: $\Thing \toF \smash{\Bool_{\gamma1!n}^{\gamma0!n}}$ above shows,
1356: the first argument to these determiners, the restrictor, is non-directional
1357: yet can be impure (that is, pure up to level $n+1$).
1358: 
1359: To traverse the control hierarchy, we add a new Reset rule, which makes an
1360: expression more pure, and a new Lift rule, which makes an expression less pure.
1361: (Consecutive nested resets like $\bigl[[E]^{n+1}\bigr]^n$ can be abbreviated to
1362: $[E]^n$ without loss of coherence.)
1363: 
1364: A second complication in this system, in contrast to
1365: Figure~\ref{fig:metalanguage}, is that we can no longer encode logical
1366: quantification using a higher-order constant like
1367: $\forall:(\Thing\toF\Bool)\toF\Bool$, because such a constant requires its
1368: argument---the logical formula to be quantified---to be a pure function.  This
1369: requirement is problematic because it is exactly the impurity of quantified
1370: logical formulas that underlies this account of quantifier scope ambiguity.  On
1371: one hand, we want to quantify logical formulas that are impure; on the other
1372: hand, we want to rule out expressions like
1373: \begin{equation}
1374:     \label{e:leak}
1375:     \Forall{x} \Shift f\Colon x,
1376: \end{equation}
1377: where the logical variable~$x$ ``leaks'' illicitly into the surrounding context.
1378: This issue is precisely the problem of classifying open and closed terms in
1379: staged programming (see \citealp{taha-environment} and references therein): the
1380: types $\Thing$ and $\Bool$ really represent not individuals or truth values but
1381: staged programs that compute individuals and truth values.  For this paper, we
1382: adopt the simplistic solution of adjoining to these types a set of free logical
1383: variables for tracking purposes, denoted~$p,q,\dotsc$.  Unfortunately, we also
1384: need to stipulate that these logical variables be freshly $\alpha$-renamed
1385: (``created by gensym'') for each occurrence of a quantifier in a sentence.
1386: 
1387: As before, the CPS transform and reductions for this metalanguage are
1388: standard; the latter appears in Figure~\ref{fig:reductions-hierarchy}.
1389: 
1390: The present analysis is almost, but not quite, the direct-style analogue of
1391: \citets{shan-explaining} CPS analysis, even though both use a control
1392: hierarchy.  Each level in \citeauthor{shan-explaining}'s hierarchy is
1393: intuitively a staged computation produced at one level higher.  More
1394: concretely, a computation type with $n$ levels in that system has the shape
1395: \begin{equation}
1396:     (\alpha_{\gamma1}^{\gamma0})_{\delta1}^{\delta0}
1397: \end{equation}
1398: rather than the shape
1399: \begin{equation}
1400:     \alpha_{\gamma1_{\delta3}^{\delta2}}
1401:           ^{\gamma0_{\delta1}^{\delta0}}.
1402: \end{equation}
1403: The issue above of how to encode logical quantification over impure formulas
1404: receives a more satisfactory treatment in Shan and Barker's system: no
1405: stipulation of $\alpha$-renaming is necessary, because there is no analogue
1406: of~\eqref{e:leak} to prohibit.  The relation between that system and staged
1407: programming with effects has yet to be explored.
1408: 
1409: \section{Polarity sensitivity}
1410: \label{s:polarity}
1411: 
1412: Because the analysis so far focuses on the truth\hyp conditional meaning of
1413: quantifiers, it equates the determiners \phrase{a} and
1414: \phrase{some}---both are existential quantifiers with the type and
1415: denotation in~\eqref{e:some}.  Furthermore, sentences like \phrase{Has anyone
1416: arrived?} suggest that the determiner \phrase{any} also means the same thing as
1417: \phrase{a} and \phrase{some}.  To the contrary, though, the determiners
1418: \phrase{a}, \phrase{some}, and \phrase{any} are not always interchangeable in
1419: their existential usage.  The sentences and readings in~\eqref{e:polarity} show
1420: that they take scope differently relative to negation (in these cases the
1421: quantifier~\phrase{no}).
1422: \examples[l@{}l@{}X]{
1423: \ex\label{e:polarity}
1424: &\ey\label{e:polarity-unordered-first}\label{e:no-some}
1425:     &No student liked some course.\hfill (unambiguous $\exists\neg$)\\
1426: &\ey\label{e:no-a}
1427:     &No student liked a course.\hfill (ambiguous $\neg\exists$, $\exists\neg$)\\
1428: &\ey\label{e:no-any}
1429:     &No student liked any course.\hfill (unambiguous $\neg\exists$)\\
1430: &\ey\label{e:some-no}
1431:     &Some student liked no course.\hfill (unambiguous $\exists\neg$)\\
1432: &\ey\label{e:polarity-unordered-last}\label{e:a-no}
1433:     &A student liked no course.\hfill (ambiguous $\neg\exists$, $\exists\neg$)\\
1434: &\ey\label{e:any-no}
1435:     &\<*Any student liked no course.\hfill (unacceptable)}
1436: 
1437: The determiner \phrase{any} is a \emph{negative polarity item}: to a first
1438: approximation, it can occur only in \emph{downward\hyp
1439: entailing} contexts, such as under the scope of a \emph{monotonically
1440: decreasing} quantifier \citep{ladusaw-polarity}.
1441: A quantifier~$q$, of type $(\Thing\toF\Bool)\toF\Bool$, is
1442: monotonically decreasing just in case
1443: \begin{equation}
1444:     \Forall{s_1} \Forall{s_2}
1445:     \bigl(\Forall{x} s_2(x) \limplies s_1(x)\bigr)
1446:         \limplies q(s_1) \limplies q(s_2).
1447: \end{equation}
1448: The quantificational noun phrases \phrase{no student} and \phrase{no course} are
1449: monotonically decreasing since, for instance, if no student liked any course in
1450: general, then no student liked any computer science course in particular.
1451: 
1452: Whereas \phrase{any} is a negative polarity item, \phrase{some} is
1453: a \emph{positive polarity item}. Roughly speaking, \phrase{some} is allergic to
1454: downward\hyp entailing contexts (especially those with an overtly negative word
1455: like~\phrase{no}).  These generalizations regarding polarity items cover the
1456: data in~(\refrange{e:polarity-unordered-first}{e:polarity-unordered-last}): in
1457: principle, goes the theory, all these sentences are ambiguous between two
1458: scopings, but the polarity sensitivity of \phrase{some} and \phrase{any} rule
1459: out one scoping each in \eqref{e:no-some}, \eqref{e:no-any}, \eqref{e:some-no},
1460: and~\eqref{e:any-no}.  These four sentences are thus predicted to be
1461: unambiguous, but it remains unclear why \eqref{e:any-no} is downright
1462: unacceptable.
1463: 
1464: In the type-theoretic tradition of linguistics, polarity sensitivity is
1465: typically implemented by splitting the answer type $\Bool$ into several types,
1466: each a different functor applied to $\Bool$, that are related by subtyping
1467: \citep{fry-proof,bernardi-reasoning,bernardi-generalized}.  For instance, to
1468: differentiate the determiners in~\eqref{e:polarity} from each other in our
1469: formalism, we can add the types $\BoolPos$ and $\BoolNeg$ alongside
1470: $\Bool$, such that both are supertypes of $\Bool$ (but not of each other).
1471: \begin{equation}
1472: \label{e:subtype}
1473:     \begin{prooftree} \justifies \Bool \le \BoolPos \end{prooftree}
1474:     \qquad
1475:     \begin{prooftree} \justifies \Bool \le \BoolNeg \end{prooftree}
1476: \end{equation}
1477: We also extend the subtyping relation between (value and computation) types with
1478: the usual closure rules,
1479: and allow implicit coercion from a subtype to a supertype.
1480: \begin{gather}
1481:     \begin{prooftree}
1482:         \strut\justifies\strut \alpha \le \alpha
1483:     \end{prooftree}
1484:     \qquad
1485:     \begin{prooftree}
1486:         \alpha' \le \alpha
1487:         \quad \beta!n \le \beta'!n
1488:         \strut\justifies\strut \alpha\toMaybeD\beta!n \le \alpha'\toMaybeD\beta'!n
1489:     \end{prooftree}
1490: \displaybreak[0]
1491: \\
1492:     \begin{prooftree}
1493:         \alpha' \le \alpha
1494:         \quad \gamma'!n \le \gamma!n
1495:         \quad \delta!n \le \delta'!n
1496:         \justifies \alpha_{\gamma!n}^{\delta!n}
1497:             \le \alpha'{}_{\gamma'!n}^{\delta'!n}
1498:     \end{prooftree}
1499: \displaybreak[1]
1500: \\
1501:     \begin{prooftree}
1502:         \Gamma \vdash E:\alpha!n
1503:         \quad \alpha!n\le\beta!n
1504:         \strut\justifies\strut \Gamma \vdash E:\beta!n
1505:         \using \text{Sub}
1506:     \end{prooftree}
1507: \end{gather}
1508: We then add a side condition to Reset, requiring that the
1509: produced answer type be $\Bool$ or $\BoolPos$, not $\BoolNeg$.
1510: \begin{gather}
1511:     \begin{prooftree}
1512:         \Gamma \vdash E : \alpha_\alpha^\beta
1513:         \justifies \Gamma \vdash [E] : \beta
1514:         \using \text{Reset}
1515:         \qquad \text{where $\beta \le \BoolPos$}
1516:     \end{prooftree}
1517: \displaybreak[0]
1518: \\
1519:     \begin{prooftree}
1520:         \Gamma \vdash E : \alpha_{\alpha_{\gamma!n}^{\gamma!n}}^{\beta!(n+1)}
1521:         \justifies \Gamma \vdash [E] : \beta!(n+1)
1522:         \using \text{Reset}
1523:         \qquad \text{where $\beta \le \BoolPos$}
1524:     \end{prooftree}
1525: \end{gather}
1526: Finally, we refine the types of our determiners
1527: from~\eqref{e:det-type-pure-restrictor} to
1528: \begingroup
1529: \newcommand{\transition}[3]{%
1530:     \denote{\text{#1}}
1531:     &: (\Thing\toF\Bool)\Fwd\Thing_{\smash[b]{#2}_{\gamma!n}^{\delta!n}}
1532:                                   ^{\smash[b]{#3}_{\gamma!n}^{\delta!n}}}
1533: \begin{align}
1534: \label{e:poldet-first}
1535: \transition{no}{\BoolNeg}{\Bool}, \displaybreak[0] \\
1536: \transition{some}{\BoolPos}{\BoolPos}, \displaybreak[1] \\
1537: \transition{a}{\Bool}{\Bool}, \displaybreak[0] \\
1538: \transition{any}{\BoolNeg}{\BoolNeg}.
1539: \label{e:poldet-last}
1540: \end{align}
1541: \endgroup
1542: 
1543: The chain of answer-type transitions from one quantificational expression to the
1544: next acts as a finite-state automaton, shown in Figure~\vref{fig:automaton}.
1545: The states of the automaton are the three supertypes of~$\Bool$; the
1546: $\epsilon$\hyp transitions are the two subtyping relations in~\eqref{e:subtype};
1547: and the non-$\epsilon$ transitions are the determiners
1548: in~(\refrange{e:poldet-first}{e:poldet-last}).
1549: \begin{figure}[htbp]
1550: \let\labelstyle\objectstyle
1551: \centering
1552: $
1553:   \xymatrix @R=1em{
1554:     *+[F-:<\jot>]{\BoolPos} \ar@/_1em/[dr]_{\epsilon} \ar@(ur,ul)[]_{\text{some}}
1555:     &&
1556:     *+[F-:<\jot>]{\smash[b]{\BoolNeg}} \ar@/_1em/[dl]_{\epsilon} \ar@(ur,ul)[]_{\text{\smash[b]{any}}}
1557:     \\
1558:     &
1559:     *+[F-:<\jot>]+<\jot,\jot>[F-:<1.5\jot>]{\Bool} \ar@/_1em/[ur]_{\text{no}} \ar@(dl,dr)[]_{\text{a}}
1560:   }
1561: $
1562: \caption{An automaton of answer-type transitions}
1563: \label{fig:automaton}
1564: \end{figure}
1565: 
1566: This three-state machine enforces polarity constraints as follows.  Any valid
1567: derivation for a sentence assigns each of its quantifiers to shift at
1568: a certain level in the control hierarchy.  For each level, the quantifiers at
1569: that level---in the order in which they are evaluated---must form
1570: \begin{itemize*}
1571:     \item either a path from $\BoolPos$ to $\Bool$ in the state machine, in
1572:           other words a string of determiners matching the regular expression
1573:           ``$\text{some}^*\,(\text{a}\,|\,\text{no}\;\text{any}^*)^*$'';
1574:     \item or a path from $\Bool$ to $\Bool$ in the state machine, in other
1575:           words a string of determiners matching the regular expression
1576:           ``$(\text{a}\,|\,\text{no}\;\text{any}^*)^*$''.
1577: \end{itemize*}
1578: Furthermore, the $\BoolPos$-to-$\Bool$ levels in the hierarchy must all be
1579: higher than the $\Bool$-to-$\Bool$ levels.  Every assignment of quantifiers to
1580: levels that satisfies these conditions gives a reading for the sentence, in
1581: which quantifiers at higher levels scope wider, and, among quantifiers at the
1582: same level, ones evaluated earlier scope wider.
1583: 
1584: Consider now the two alternative ways to characterize scope ambiguity suggested
1585: in \S\ref{s:hierarchy}.  The first approach is to allow arbitrary evaluation
1586: order (and use a degenerate control hierarchy of one level only).  If we take
1587: this route, we can account for all of the acceptability and ambiguity judgments
1588: in~(\refrange{e:polarity-unordered-first}{e:polarity-unordered-last}), but we
1589: cannot distinguish the acceptable sentence \eqref{e:no-any} from the
1590: unacceptable \eqref{e:any-no}.  In other words, it would be a mystery how the
1591: acceptability of a sentence hinges on the linear order in which the quantifiers
1592: \phrase{no} and \phrase{any} appear.  This mystery has been noted by
1593: \citet[\S9.2]{ladusaw-polarity} and \citet[\S8.2]{fry-proof} as a defect in
1594: current accounts of polarity sensitivity.
1595: 
1596: The second approach, using a control hierarchy with multiple levels, fares
1597: better by comparison.%
1598: \footnote{Although this paper uses Danvy and Filinski's control hierarchy,
1599:     polarity sensitivity can be expressed equally well in Shan and Barker's
1600:     system.}
1601: We can stick to left-to-right evaluation, under which---as
1602: desired---an \phrase{any} must be preceded by a \phrase{no} that scopes over it
1603: with no intervening \phrase{a} or~\phrase{some}.  Indeed, the variations in
1604: ambiguity and acceptability among sentences in~\eqref{e:polarity} are completely
1605: captured.  For intuition, we can imagine that the hearer of a sentence must
1606: first process the trigger for a downward\hyp entailing context, like
1607: \phrase{no}, before it makes sense to process a negative polarity item, like
1608: \phrase{any}.%
1609: \footnote{The syntactic distinction among the types $\Bool$, $\BoolPos$, and
1610:     $\BoolNeg$ may even be semantically interpretable via the formulas-as-types
1611:     correspondence, but the potential for such a connection has only been
1612:     briefly explored \citep{bernardi-polarity} and we do not examine it here.
1613:     In this connection, \citet{krifka-semantics} and others have proposed on
1614:     pragmatic grounds that determiners like \phrase{any} are negative polarity
1615:     items because they indicate extreme points on a scale.}
1616: Intuition aside, the programming\hyp language notion of evaluation order
1617: provides the syntactic hacker of formal types with a new tool with which to
1618: capture observed regularities in natural language.
1619: 
1620: \section{Linguistic side effects}
1621: %\section{Beyond quantification}
1622: \label{s:beyond}
1623: 
1624: This paper outlines how quantification and polarity sensitivity in natural
1625: language can be modeled using delimited continuations.  These two examples
1626: support my claim that the formal theory and computational intuition we have for
1627: continuations can help us construct, understand, and maintain linguistic
1628: theories.  To be sure, this work is far from the first time insights from
1629: programming languages are applied to natural language:
1630: \begin{itemize*}
1631:     \item It has long been noted that the intensional logic in which Montague
1632:           grammar is couched can be understood computationally
1633:           \citep{hobbs-making,hung-semantics}.
1634:     \item \emp{Dynamic semantics} \citep{groenendijk-dynamic}, which relates
1635:           anaphora and discourse in natural languages to nondeterminism and
1636:           mutable state in programming languages \citep{van-eijck-programming},
1637:           has been applied to a variety of natural language phenomena, such as
1638:           verb-phrase ellipsis
1639:           \citep{van-eijck-verb-phrase,gardent-dynamic,hardt-dynamic}.
1640: \end{itemize*}
1641: However, the link between natural language and continuations has only
1642: recently been made explicit, and this paper's use of control operators for
1643: a direct-style analysis is novel.
1644: 
1645: The analyses presented here are part of a larger project, that of
1646: relating computational side effects to \emp{linguistic side effects}.  The term
1647: ``computational side effect'' here covers all programming language features
1648: where either it is unclear what a denotational semantics should look like, or
1649: the ``obvious'' denotational semantics (such as making each arithmetic
1650: expression denote a number) turns out to break referential transparency.
1651: A computational side effect of the first kind is jumps to labels; one of the
1652: second kind is mutable state.  By analogy, I use the term ``linguistic side
1653: effects'' to refer to aspects of natural language where either it is unclear
1654: what a denotational semantics should look like, or the ``obvious'' denotational
1655: semantics (such as making each clause denote whether it is true) turns out to
1656: break referential transparency.  Besides quantification and polarity
1657: sensitivity, some examples are:
1658: \examples[l@{}l@{}X@{}>(r<)]{
1659: \ex\label{e:nl}
1660: &\ey \label{e:nl-intensionality}
1661:      &Bob \emph{thinks} Alice likes CS187. &Intensionality \\
1662: &\ey &\emph{A man} walks. \emph{He} whistles.  &Variable binding \\
1663: %&\ey &\emph{Every} woman whistles.  &Quantification \\
1664: &\ey \label{e:nl-interrogatives}
1665:      &\emph{Which} star did Alice see?  &Interrogatives \\
1666: &\ey &Alice \emph{only} saw \emph{\textsc{Venus}}.  &Focus \\
1667: &\ey &\emph{The king of France} whistles. &Presuppositions}
1668: 
1669: To study linguistic side effects, I propose to draw an analogy between them and
1670: computational side effects.  Just as computer scientists want to express all
1671: computational side effects in a uniform and modular framework and study how
1672: control interacts with mutable state \citep{felleisen-revised}, linguists want
1673: to investigate properties common to all linguistic side effects and study
1674: how quantification interacts with variable binding.  Furthermore, just as
1675: computer scientists want to relate operational notions like evaluation order
1676: and parameter passing to denotational models like continuations and monads,
1677: linguists want to relate the dynamics of information in language processing to
1678: the static definition of a language as a generative device.  Whether this
1679: analogy yields a linguistic theory that is empirically adequate is an open
1680: scientific question that I find attractive to pursue.
1681: 
1682: \section{Acknowledgments}
1683: 
1684: \noindent
1685: Thanks to Stuart Shieber, Chris Barker, Raffaella Bernardi, Barbara Grosz,
1686: Pauline Jacobson, Aravind Joshi, William Ladusaw, Fernando Pereira, Avi Pfeffer,
1687: Chris Potts, Norman Ramsey, Dylan Thurston, Yoad Winter, and anonymous
1688: referees.  This work is supported by the United States National Science
1689: Foundation Grants IRI-9712068 and BCS-0236592.
1690: 
1691: \bibliographystyle{mcbride}
1692: \bibliography{ccshan}
1693: \setlength{\dimen0}{\depthof{g}}
1694: \vspace*{-\dimen0}
1695: 
1696: \end{document}
1697: