cs0009025/cs0009025
1: %!PS-Adobe-3.0
2: %%Title: (COLING'00 ShortDer-corrected)
3: %%Creator: (Microsoft Word 5.1: LaserWriter 8 N1-8.2.2)
4: %%CreationDate: (17:49 vrijdag, 22 september 2000)
5: %%For: (rens)
6: %%Pages: 7
7: %%DocumentFonts: Times-Bold Symbol Times-Roman Times-Italic Helvetica-Bold
8: %%DocumentNeededFonts: Times-Bold Symbol Times-Roman Times-Italic Helvetica-Bold
9: %%DocumentSuppliedFonts:
10: %%DocumentData: Clean7Bit
11: %%PageOrder: Ascend
12: %%Orientation: Portrait
13: %%DocumentMedia: Default 595 842 0 () ()
14: %ADO_ImageableArea: 29 31 567 812
15: %%EndComments
16: userdict begin/dscInfo 5 dict dup begin
17: /Title(COLING'00 ShortDer-corrected)def
18: /Creator(Microsoft Word 5.1: LaserWriter 8 N1-8.2.2)def
19: /CreationDate(17:49 vrijdag, 22 september 2000)def
20: /For(rens)def
21: /Pages 1 def
22: end def end
23: /md 198 dict def md begin/currentpacking where {pop /sc_oldpacking currentpacking def true setpacking}if
24: %%BeginFile: adobe_psp_basic
25: %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved.
26: /bd{bind def}bind def
27: /xdf{exch def}bd
28: /xs{exch store}bd
29: /ld{load def}bd
30: /Z{0 def}bd
31: /T/true
32: /F/false
33: /:L/lineto
34: /lw/setlinewidth
35: /:M/moveto
36: /rl/rlineto
37: /rm/rmoveto
38: /:C/curveto
39: /:T/translate
40: /:K/closepath
41: /:mf/makefont
42: /gS/gsave
43: /gR/grestore
44: /np/newpath
45: 14{ld}repeat
46: /$m matrix def
47: /av 81 def
48: /por true def
49: /normland false def
50: /psb-nosave{}bd
51: /pse-nosave{}bd
52: /us Z
53: /psb{/us save store}bd
54: /pse{us restore}bd
55: /level2
56: /languagelevel where
57: {
58: pop languagelevel 2 ge
59: }{
60: false
61: }ifelse
62: def
63: /featurecleanup
64: {
65: stopped
66: cleartomark
67: countdictstack exch sub dup 0 gt
68: {
69: {end}repeat
70: }{
71: pop
72: }ifelse
73: }bd
74: /noload Z
75: /startnoload
76: {
77: {/noload save store}if
78: }bd
79: /endnoload
80: {
81: {noload restore}if
82: }bd
83: level2 startnoload
84: /setjob
85: {
86: statusdict/jobname 3 -1 roll put
87: }bd
88: /setcopies
89: {
90: userdict/#copies 3 -1 roll put
91: }bd
92: level2 endnoload level2 not startnoload
93: /setjob
94: {
95: 1 dict begin/JobName xdf currentdict end setuserparams
96: }bd
97: /setcopies
98: {
99: 1 dict begin/NumCopies xdf currentdict end setpagedevice
100: }bd
101: level2 not endnoload
102: /pm Z
103: /mT Z
104: /sD Z
105: /realshowpage Z
106: /initializepage
107: {
108: /pm save store mT concat
109: }bd
110: /endp
111: {
112: pm restore showpage
113: }def
114: /$c/DeviceRGB def
115: /rectclip where
116: {
117: pop/rC/rectclip ld
118: }{
119: /rC
120: {
121: np 4 2 roll
122: :M
123: 1 index 0 rl
124: 0 exch rl
125: neg 0 rl
126: :K
127: clip np
128: }bd
129: }ifelse
130: /rectfill where
131: {
132: pop/rF/rectfill ld
133: }{
134: /rF
135: {
136: gS
137: np
138: 4 2 roll
139: :M
140: 1 index 0 rl
141: 0 exch rl
142: neg 0 rl
143: fill
144: gR
145: }bd
146: }ifelse
147: /rectstroke where
148: {
149: pop/rS/rectstroke ld
150: }{
151: /rS
152: {
153: gS
154: np
155: 4 2 roll
156: :M
157: 1 index 0 rl
158: 0 exch rl
159: neg 0 rl
160: :K
161: stroke
162: gR
163: }bd
164: }ifelse
165: %%EndFile
166: %%BeginFile: adobe_psp_colorspace_level1
167: %%Copyright: Copyright 1991-1993 Adobe Systems Incorporated. All Rights Reserved.
168: /G/setgray ld
169: /:F/setrgbcolor ld
170: %%EndFile
171: %%BeginFile: adobe_psp_uniform_graphics
172: %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved.
173: /@a
174: {
175: np :M 0 rl :L 0 exch rl 0 rl :L fill
176: }bd
177: /@b
178: {
179: np :M 0 rl 0 exch rl :L 0 rl 0 exch rl fill
180: }bd
181: /arct where
182: {
183: pop
184: }{
185: /arct
186: {
187: arcto pop pop pop pop
188: }bd
189: }ifelse
190: /x1 Z
191: /x2 Z
192: /y1 Z
193: /y2 Z
194: /rad Z
195: /@q
196: {
197: /rad xs
198: /y2 xs
199: /x2 xs
200: /y1 xs
201: /x1 xs
202: np
203: x2 x1 add 2 div y1 :M
204: x2 y1 x2 y2 rad arct
205: x2 y2 x1 y2 rad arct
206: x1 y2 x1 y1 rad arct
207: x1 y1 x2 y1 rad arct
208: fill
209: }bd
210: /@s
211: {
212: /rad xs
213: /y2 xs
214: /x2 xs
215: /y1 xs
216: /x1 xs
217: np
218: x2 x1 add 2 div y1 :M
219: x2 y1 x2 y2 rad arct
220: x2 y2 x1 y2 rad arct
221: x1 y2 x1 y1 rad arct
222: x1 y1 x2 y1 rad arct
223: :K
224: stroke
225: }bd
226: /@i
227: {
228: np 0 360 arc fill
229: }bd
230: /@j
231: {
232: gS
233: np
234: :T
235: scale
236: 0 0 .5 0 360 arc
237: fill
238: gR
239: }bd
240: /@e
241: {
242: np
243: 0 360 arc
244: :K
245: stroke
246: }bd
247: /@f
248: {
249: np
250: $m currentmatrix
251: pop
252: :T
253: scale
254: 0 0 .5 0 360 arc
255: :K
256: $m setmatrix
257: stroke
258: }bd
259: /@k
260: {
261: gS
262: np
263: :T
264: 0 0 :M
265: 0 0 5 2 roll
266: arc fill
267: gR
268: }bd
269: /@l
270: {
271: gS
272: np
273: :T
274: 0 0 :M
275: scale
276: 0 0 .5 5 -2 roll arc
277: fill
278: gR
279: }bd
280: /@m
281: {
282: np
283: arc
284: stroke
285: }bd
286: /@n
287: {
288: np
289: $m currentmatrix
290: pop
291: :T
292: scale
293: 0 0 .5 5 -2 roll arc
294: $m setmatrix
295: stroke
296: }bd
297: %%EndFile
298: %%BeginFile: adobe_psp_basic_text
299: %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved.
300: /S/show ld
301: /A{
302: 0.0 exch ashow
303: }bd
304: /R{
305: 0.0 exch 32 exch widthshow
306: }bd
307: /W{
308: 0.0 3 1 roll widthshow
309: }bd
310: /J{
311: 0.0 32 4 2 roll 0.0 exch awidthshow
312: }bd
313: /V{
314: 0.0 4 1 roll 0.0 exch awidthshow
315: }bd
316: /fcflg true def
317: /fc{
318: fcflg{
319: vmstatus exch sub 50000 lt{
320: (%%[ Warning: Running out of memory ]%%\r)print flush/fcflg false store
321: }if pop
322: }if
323: }bd
324: /$f[1 0 0 -1 0 0]def
325: /:ff{$f :mf}bd
326: /MacEncoding StandardEncoding 256 array copy def
327: MacEncoding 39/quotesingle put
328: MacEncoding 96/grave put
329: /Adieresis/Aring/Ccedilla/Eacute/Ntilde/Odieresis/Udieresis/aacute
330: /agrave/acircumflex/adieresis/atilde/aring/ccedilla/eacute/egrave
331: /ecircumflex/edieresis/iacute/igrave/icircumflex/idieresis/ntilde/oacute
332: /ograve/ocircumflex/odieresis/otilde/uacute/ugrave/ucircumflex/udieresis
333: /dagger/degree/cent/sterling/section/bullet/paragraph/germandbls
334: /registered/copyright/trademark/acute/dieresis/notequal/AE/Oslash
335: /infinity/plusminus/lessequal/greaterequal/yen/mu/partialdiff/summation
336: /product/pi/integral/ordfeminine/ordmasculine/Omega/ae/oslash
337: /questiondown/exclamdown/logicalnot/radical/florin/approxequal/Delta/guillemotleft
338: /guillemotright/ellipsis/space/Agrave/Atilde/Otilde/OE/oe
339: /endash/emdash/quotedblleft/quotedblright/quoteleft/quoteright/divide/lozenge
340: /ydieresis/Ydieresis/fraction/currency/guilsinglleft/guilsinglright/fi/fl
341: /daggerdbl/periodcentered/quotesinglbase/quotedblbase/perthousand
342: /Acircumflex/Ecircumflex/Aacute/Edieresis/Egrave/Iacute/Icircumflex/Idieresis/Igrave
343: /Oacute/Ocircumflex/apple/Ograve/Uacute/Ucircumflex/Ugrave/dotlessi/circumflex/tilde
344: /macron/breve/dotaccent/ring/cedilla/hungarumlaut/ogonek/caron
345: MacEncoding 128 128 getinterval astore pop
346: level2 startnoload
347: /copyfontdict
348: {
349: findfont dup length dict
350: begin
351: {
352: 1 index/FID ne{def}{pop pop}ifelse
353: }forall
354: }bd
355: level2 endnoload level2 not startnoload
356: /copyfontdict
357: {
358: findfont dup length dict
359: copy
360: begin
361: }bd
362: level2 not endnoload
363: md/fontname known not{
364: /fontname/customfont def
365: }if
366: /Encoding Z
367: /:mre
368: {
369: copyfontdict
370: /Encoding MacEncoding def
371: fontname currentdict
372: end
373: definefont :ff def
374: }bd
375: /:bsr
376: {
377: copyfontdict
378: /Encoding Encoding 256 array copy def
379: Encoding dup
380: }bd
381: /pd{put dup}bd
382: /:esr
383: {
384: pop pop
385: fontname currentdict
386: end
387: definefont :ff def
388: }bd
389: /scf
390: {
391: scalefont def
392: }bd
393: /scf-non
394: {
395: $m scale :mf setfont
396: }bd
397: /ps Z
398: /fz{/ps xs}bd
399: /sf/setfont ld
400: /cF/currentfont ld
401: /mbf
402: {
403: /makeblendedfont where
404: {
405: pop
406: makeblendedfont
407: /ABlend exch definefont
408: }{
409: pop
410: }ifelse
411: def
412: }def
413: %%EndFile
414: %%BeginFile: adobe_psp_derived_styles
415: %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved.
416: /wi
417: version(23.0)eq
418: {
419: {
420: gS 0 0 0 0 rC stringwidth gR
421: }bind
422: }{
423: /stringwidth load
424: }ifelse
425: def
426: /$o 1. def
427: /gl{$o G}bd
428: /ms{:M S}bd
429: /condensedmtx[.82 0 0 1 0 0]def
430: /:mc
431: {
432: condensedmtx :mf def
433: }bd
434: /extendedmtx[1.18 0 0 1 0 0]def
435: /:me
436: {
437: extendedmtx :mf def
438: }bd
439: /basefont Z
440: /basefonto Z
441: /dxa Z
442: /dxb Z
443: /dxc Z
444: /dxd Z
445: /dsdx2 Z
446: /bfproc Z
447: /:fbase
448: {
449: dup/FontType get 0 eq{
450: dup length dict begin
451: dup{1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse}forall
452: /FDepVector exch/FDepVector get[exch/:fbase load forall]def
453: }/bfproc load ifelse
454: /customfont currentdict end definefont
455: }bd
456: /:mo
457: {
458: /bfproc{
459: dup dup length 2 add dict
460: begin
461: {
462: 1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse
463: }forall
464: /PaintType 2 def
465: /StrokeWidth .012 0 FontMatrix idtransform pop def
466: /customfont currentdict
467: end
468: definefont
469: 8 dict begin
470: /basefonto xdf
471: /basefont xdf
472: /FontType 3 def
473: /FontMatrix[1 0 0 1 0 0]def
474: /FontBBox[0 0 1 1]def
475: /Encoding StandardEncoding def
476: /BuildChar
477: {
478: exch begin
479: basefont setfont
480: ( )dup 0 4 -1 roll put
481: dup wi
482: setcharwidth
483: 0 0 :M
484: gS
485: gl
486: dup show
487: gR
488: basefonto setfont
489: show
490: end
491: }def
492: }store :fbase
493: }bd
494: /:mso
495: {
496: /bfproc{
497: 7 dict begin
498: /basefont xdf
499: /FontType 3 def
500: /FontMatrix[1 0 0 1 0 0]def
501: /FontBBox[0 0 1 1]def
502: /Encoding StandardEncoding def
503: /BuildChar
504: {
505: exch begin
506: sD begin
507: /dxa 1 ps div def
508: basefont setfont
509: ( )dup 0 4 -1 roll put
510: dup wi
511: 1 index 0 ne
512: {
513: exch dxa add exch
514: }if
515: setcharwidth
516: dup 0 0 ms
517: dup dxa 0 ms
518: dup dxa dxa ms
519: dup 0 dxa ms
520: gl
521: dxa 2. div dup ms
522: end
523: end
524: }def
525: }store :fbase
526: }bd
527: /:ms
528: {
529: /bfproc{
530: dup dup length 2 add dict
531: begin
532: {
533: 1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse
534: }forall
535: /PaintType 2 def
536: /StrokeWidth .012 0 FontMatrix idtransform pop def
537: /customfont currentdict
538: end
539: definefont
540: 8 dict begin
541: /basefonto xdf
542: /basefont xdf
543: /FontType 3 def
544: /FontMatrix[1 0 0 1 0 0]def
545: /FontBBox[0 0 1 1]def
546: /Encoding StandardEncoding def
547: /BuildChar
548: {
549: exch begin
550: sD begin
551: /dxb .05 def
552: basefont setfont
553: ( )dup 0 4 -1 roll put
554: dup wi
555: exch dup 0 ne
556: {
557: dxb add
558: }if
559: exch setcharwidth
560: dup dxb .01 add 0 ms
561: 0 dxb :T
562: gS
563: gl
564: dup 0 0 ms
565: gR
566: basefonto setfont
567: 0 0 ms
568: end
569: end
570: }def
571: }store :fbase
572: }bd
573: /:mss
574: {
575: /bfproc{
576: 7 dict begin
577: /basefont xdf
578: /FontType 3 def
579: /FontMatrix[1 0 0 1 0 0]def
580: /FontBBox[0 0 1 1]def
581: /Encoding StandardEncoding def
582: /BuildChar
583: {
584: exch begin
585: sD begin
586: /dxc 1 ps div def
587: /dsdx2 .05 dxc 2 div add def
588: basefont setfont
589: ( )dup 0 4 -1 roll put
590: dup wi
591: exch dup 0 ne
592: {
593: dsdx2 add
594: }if
595: exch setcharwidth
596: dup dsdx2 .01 add 0 ms
597: 0 .05 dxc 2 div sub :T
598: dup 0 0 ms
599: dup dxc 0 ms
600: dup dxc dxc ms
601: dup 0 dxc ms
602: gl
603: dxc 2 div dup ms
604: end
605: end
606: }def
607: }store :fbase
608: }bd
609: /:msb
610: {
611: /bfproc{
612: 7 dict begin
613: /basefont xdf
614: /FontType 3 def
615: /FontMatrix[1 0 0 1 0 0]def
616: /FontBBox[0 0 1 1]def
617: /Encoding StandardEncoding def
618: /BuildChar
619: {
620: exch begin
621: sD begin
622: /dxd .03 def
623: basefont setfont
624: ( )dup 0 4 -1 roll put
625: dup wi
626: 1 index 0 ne
627: {
628: exch dxd add exch
629: }if
630: setcharwidth
631: dup 0 0 ms
632: dup dxd 0 ms
633: dup dxd dxd ms
634: 0 dxd ms
635: end
636: end
637: }def
638: }store :fbase
639: }bd
640: /italicmtx[1 0 -.212557 1 0 0]def
641: /:mi
642: {
643: italicmtx :mf def
644: }bd
645: /:v
646: {
647: [exch dup/FontMatrix get exch
648: dup/FontInfo known
649: {
650: /FontInfo get
651: dup/UnderlinePosition known
652: {
653: dup/UnderlinePosition get
654: 2 index 0
655: 3 1 roll
656: transform
657: exch pop
658: }{
659: .1
660: }ifelse
661: 3 1 roll
662: dup/UnderlineThickness known
663: {
664: /UnderlineThickness get
665: exch 0 3 1 roll
666: transform
667: exch pop
668: abs
669: }{
670: pop pop .067
671: }ifelse
672: }{
673: pop pop .1 .067
674: }ifelse
675: ]
676: }bd
677: /$t Z
678: /$p Z
679: /$s Z
680: /:p
681: {
682: aload pop
683: 2 index mul/$t xs
684: 1 index mul/$p xs
685: .012 mul/$s xs
686: }bd
687: /:m
688: {gS
689: 0 $p rm
690: $t lw
691: 0 rl stroke
692: gR
693: }bd
694: /:n
695: {
696: gS
697: 0 $p rm
698: $t lw
699: 0 rl
700: gS
701: gl
702: stroke
703: gR
704: strokepath
705: $s lw
706: /setstrokeadjust where{pop
707: currentstrokeadjust true setstrokeadjust stroke setstrokeadjust
708: }{
709: stroke
710: }ifelse
711: gR
712: }bd
713: /:o
714: {gS
715: 0 $p rm
716: $t 2 div dup rm
717: $t lw
718: dup 0 rl
719: stroke
720: gR
721: :n
722: }bd
723: %%EndFile
724: /currentpacking where {pop sc_oldpacking setpacking}if end
725: %%EndProlog
726: %%BeginSetup
727: md begin
728: countdictstack[{
729: %%BeginFeature: *ManualFeed False
730: 1 dict dup /ManualFeed false put setpagedevice
731: %%EndFeature
732: }featurecleanup
733: countdictstack[{
734: %%BeginFeature: *InputSlot Cassette
735: 
736: %%EndFeature
737: }featurecleanup
738: countdictstack[{
739: %%BeginFeature: *PageRegion A4Small
740: 
741:     3 dict dup /PageSize [595 842] put dup /ImagingBBox [25 25 570 817] put
742: 	dup /InputAttributes 1 dict dup 0 1 dict dup
743: 	/PageSize [595 842] put put put setpagedevice
744: %%EndFeature
745: }featurecleanup
746: (rens)setjob
747: /mT[1 0 0 -1 29 812]def
748: /sD 16 dict def
749: 300 level2{1 dict dup/WaitTimeout 4 -1 roll put setuserparams}{statusdict/waittimeout 3 -1 roll put}ifelse
750: %%IncludeFont: Times-Bold
751: %%IncludeFont: Symbol
752: %%IncludeFont: Times-Roman
753: %%IncludeFont: Times-Italic
754: %%IncludeFont: Helvetica-Bold
755: /f0_1/Times-Bold
756: :mre
757: /f0_16 f0_1 16 scf
758: /f0_12 f0_1 12 scf
759: /f0_10 f0_1 10 scf
760: /f1_1/Symbol
761: :bsr
762: 240/apple pd
763: :esr
764: /f1_14 f1_1 14 scf
765: /f1_12 f1_1 12 scf
766: /f1_10 f1_1 10 scf
767: /f1_9 f1_1 9 scf
768: /f1_4 f1_1 4 scf
769: /f2_1 f1_1
770: def
771: /f2_12 f2_1 12 scf
772: /f2_10 f2_1 10 scf
773: /f3_1/Times-Roman
774: :mre
775: /f3_12 f3_1 12 scf
776: /f3_11 f3_1 11 scf
777: /f3_10 f3_1 10 scf
778: /f3_9 f3_1 9 scf
779: /f3_7 f3_1 7 scf
780: /f3_4 f3_1 4 scf
781: /f4_1/Times-Italic
782: :mre
783: /f4_12 f4_1 12 scf
784: /f4_10 f4_1 10 scf
785: /f4_9 f4_1 9 scf
786: /f4_4 f4_1 4 scf
787: /f5_1 f1_1
788: :mi
789: /f5_12 f5_1 12 scf
790: /f5_10 f5_1 10 scf
791: /f5_9 f5_1 9 scf
792: /f5_4 f5_1 4 scf
793: /f6_1/Helvetica-Bold
794: :mre
795: /f7_1 f6_1 1.04 scf
796: /f7_9 f7_1 9 scf
797: /Courier findfont[10 0 0 -10 0 0]:mf setfont
798: %%EndSetup
799: %%Page: 1 1
800: %%BeginPageSetup
801: gsave %matrix defaultmatrix setmatrix
802: 90 rotate 4 72 mul .55 -72 mul moveto /Times-Roman findfont
803: 20 scalefont setfont 0.3 setgray (arXiv:cs.CL/0009025   27 Sep 2000) show grestore
804: initializepage
805: (rens; page: 1 of 7)setjob
806: %%EndPageSetup
807: gS 0 0 538 781 rC
808: 143 71 :M
809: f0_16 sf
810: .04 .004(Parsing with the Shortest Derivation)J
811: 243 96 :M
812: f0_12 sf
813: .587 .059(Rens Bod)J
814: 112 110 :M
815: f3_11 sf
816: -.048(Informatics Research Institute, University of Leeds, Leeds LS2 9JT,  &)A
817: 107 124 :M
818: -.052(Institute for Logic, Language and Computation, University of Amsterdam)A
819: 222 138 :M
820: -.079(rens@scs.leeds.ac.uk)A
821: 124 182 :M
822: f0_12 sf
823: .051(Abstract)A
824: 33 200 :M
825: f3_10 sf
826: 2.295 .23(Common wisdom has it that the bias of stochastic)J
827: 33 213 :M
828: .861 .086(grammars in favor of shorter derivations of a sentence)J
829: 33 226 :M
830: 1.006 .101(is harmful and should be redressed. We show that the)J
831: 33 239 :M
832: 2.196 .22(common wisdom is wrong for stochastic grammars)J
833: 33 252 :M
834: 3.374 .337(that use elementary trees instead of context-free)J
835: 33 265 :M
836: 1.35 .135(rules, such as Stochastic Tree-Substitution Grammars)J
837: 33 278 :M
838: 3.228 .323(used by Data-Oriented Parsing models. For such)J
839: 33 291 :M
840: 2.392 .239(grammars a )J
841: f4_10 sf
842: .75(non)A
843: f3_10 sf
844: 2.751 .275(-probabilistic metric based on the)J
845: 33 304 :M
846: 1.459 .146(shortest derivation outperforms a probabilistic metric)J
847: 33 317 :M
848: 2.889 .289(on the ATIS and OVIS corpora, while it obtains)J
849: 33 330 :M
850: 1.425 .143(competitive results on the Wall Street Journal \(WSJ\))J
851: 33 343 :M
852: 2.044 .204(corpus. This paper also contains the first published)J
853: 33 356 :M
854: .772 .077(experiments with DOP on the WSJ.)J
855: 33 380 :M
856: f0_12 sf
857: .282 .028(1. Introduction)J
858: 33 398 :M
859: f3_10 sf
860: .731 .073(A well-known property of stochastic grammars is their)J
861: 33 411 :M
862: 2.427 .243(propensity to assign higher probabilities to shorter)J
863: 33 424 :M
864: 2.38 .238(derivations of a sentence \(cf. Chitrao & Grishman)J
865: 33 437 :M
866: 1.485 .149(1990; Magerman & Marcus 1991; Briscoe & Carroll)J
867: 33 450 :M
868: 1.665 .167(1993; Charniak 1996\). This propensity is due to the)J
869: 33 463 :M
870: 2.637 .264(probability of a derivation being computed as the)J
871: 33 476 :M
872: 2.709 .271(product of the rule probabilities, and thus shorter)J
873: 33 489 :M
874: 1.322 .132(derivations involving fewer rules tend to have higher)J
875: 33 502 :M
876: 1.993 .199(probabilities, almost regardless of the training data.)J
877: 33 515 :M
878: 1.694 .169(While this bias may seem interesting in the light of)J
879: 33 528 :M
880: 1.955 .195(the principle of cognitive economy, shorter derivat-)J
881: 33 541 :M
882: 1.27 .127(ions generate smaller parse trees \(consisting of fewer)J
883: 33 554 :M
884: 1.298 .13(nodes\) which are not warranted by the correct parses)J
885: 33 567 :M
886: .944 .094(of sentences. Most systems therefore redress this bias,)J
887: 33 580 :M
888: 1.206 .121(for instance by normalizing the derivation probability)J
889: 33 593 :M
890: 1.096 .11(\(see Caraballo & Charniak 1998\).)J
891: 68 606 :M
892: 2.441 .244(However, for stochastic grammars that use)J
893: 33 619 :M
894: 2.783 .278(elementary trees instead of context-free rules, the)J
895: 33 632 :M
896: 2.427 .243(propensity to assign higher probabilities to shorter)J
897: 33 645 :M
898: 2.736 .274(derivations does not necessarily lead to a bias in)J
899: 33 658 :M
900: 1.159 .116(favor of smaller parse trees, because elementary trees)J
901: 33 671 :M
902: 1.801 .18(may differ in size and lexicalization. For Stochastic)J
903: 33 684 :M
904: 2.259 .226(Tree-Substitution Grammars \(STSG\) used by Data-)J
905: 33 697 :M
906: 1.136 .114(Oriented Parsing \(DOP\) models, it has been observed)J
907: 33 710 :M
908: 1.652 .165(that the shortest derivation of a sentence consists of)J
909: 33 723 :M
910: 1.122 .112(the )J
911: f4_10 sf
912: .423(largest)A
913: f3_10 sf
914: 1.572 .157( subtrees seen in a treebank that generate)J
915: 275 180 :M
916: 1.769 .177(that sentence \(cf. Bod 1992, 98\). We may therefore)J
917: 275 193 :M
918: .988 .099(wonder whether for STSG the bias in favor of shorter)J
919: 275 206 :M
920: .86 .086(derivations is perhaps beneficial rather than harmful.)J
921: 310 219 :M
922: .976 .098(To investigate this question we created a new)J
923: 275 232 :M
924: 1.563 .156(STSG-DOP model which uses this bias as a feature.)J
925: 275 245 :M
926: 2.974 .297(This )J
927: f4_10 sf
928: 1.1(non)A
929: f3_10 sf
930: 4.448 .445(-probabilistic DOP model parses each)J
931: 275 258 :M
932: 5.675 .567(sentence by returning its shortest derivation)J
933: 275 271 :M
934: 1.08 .108(\(consisting of the fewest subtrees seen in the corpus\).)J
935: 275 284 :M
936: 1.167 .117(Only if there is more than one shortest derivation the)J
937: 275 297 :M
938: .664 .066(model backs off to a frequency ordering of the corpus-)J
939: 275 310 :M
940: .859 .086(subtrees and chooses the shortest derivation with most)J
941: 275 323 :M
942: 3.388 .339(highest ranked subtrees. We compared this non-)J
943: 275 336 :M
944: 3.061 .306(probabilistic DOP model against the probabilistic)J
945: 275 349 :M
946: 1.075 .108(DOP model \(which estimates the most probable parse)J
947: 275 362 :M
948: 2.479 .248(for each sentence\) on three different domains: the)J
949: 275 375 :M
950: 1.445 .145(Penn ATIS treebank \(Marcus et al. 1993\), the Dutch)J
951: 275 388 :M
952: 1.708 .171(OVIS treebank \(Bonnema et al. 1997\) and the Penn)J
953: 275 401 :M
954: 2.382 .238(Wall Street Journal \(WSJ\) treebank \(Marcus et al.)J
955: 275 414 :M
956: 1.209 .121(1993\). Surprisingly, the non-probabilistic DOP model)J
957: 275 427 :M
958: 1.203 .12(outperforms the probabilistic DOP model on both the)J
959: 275 440 :M
960: 1.228 .123(ATIS and OVIS treebanks, while it obtains competit-)J
961: 275 453 :M
962: 1.627 .163(ive results on the WSJ treebank. We conjecture that)J
963: 275 466 :M
964: 1.715 .171(any stochastic grammar which uses units of flexible)J
965: 275 479 :M
966: 1.597 .16(size can be turned into an accurate non-probabilistic)J
967: 275 492 :M
968: .009(version.)A
969: 310 505 :M
970: .861 .086(The rest of this paper is organized as follows:)J
971: 275 518 :M
972: 3.467 .347(we first explain both the probabilistic and non-)J
973: 275 531 :M
974: 3.654 .365(probabilistic DOP model. Next, we go into the)J
975: 275 544 :M
976: 2.397 .24(computational aspects of these models, and finally)J
977: 275 557 :M
978: 2.186 .219(we compare the performance of the models on the)J
979: 275 570 :M
980: 1.082 .108(three treebanks.)J
981: 275 594 :M
982: f0_12 sf
983: .137 .014(2. Probabilistic)J
984: f3_10 sf
985: ( )S
986: f0_12 sf
987: .165 .017(vs. Non-Probabilistic)J
988: 275 609 :M
989: f3_10 sf
990: .06<CACACACA>A
991: f0_12 sf
992: .934 .093(Data-Oriented Parsing)J
993: 275 627 :M
994: f3_10 sf
995: 3.207 .321(Both probabilistic and non-probabilistic DOP are)J
996: 275 640 :M
997: 3.511 .351(based on the DOP model in Bod \(1992\) which)J
998: 275 653 :M
999: 1.104 .11(extracts a Stochastic Tree-Substitution Grammar from)J
1000: 275 666 :M
1001: 1.172 .117(a treebank \("STSG-DOP"\).)J
1002: f3_7 sf
1003: 0 -3 rm
1004: .166(1)A
1005: 0 3 rm
1006: f3_10 sf
1007: .958 .096( STSG-DOP uses subtrees)J
1008: -4125 -4125 -1 1 -4123 -4125 1 -4125 -4126 @a
1009: 275 684.24 -.24 .24 418.24 684 .24 275 684 @a
1010: 275 696 :M
1011: f3_7 sf
1012: .206(1)A
1013: f3_9 sf
1014: 0 3 rm
1015: .925 .092( Note that the DOP-approach of extracting grammars from)J
1016: 0 -3 rm
1017: 275 711 :M
1018: 3.167 .317(corpora has been applied to a wide variety of other)J
1019: 275 723 :M
1020: .768 .077(grammatical frameworks, )J
1021: 374 723 :M
1022: .895 .089(including Tree-Insertion Grammar)J
1023: endp
1024: %%Page: 2 2
1025: %%BeginPageSetup
1026: initializepage
1027: (rens; page: 2 of 7)setjob
1028: %%EndPageSetup
1029: -29 -30 :T
1030: gS 29 30 538 781 rC
1031: 62 95 :M
1032: f3_10 sf
1033: 1.315 .131(from parse trees in a corpus as elementary trees, and)J
1034: 62 108 :M
1035: 2.581 .258(leftmost-substitution to combine subtrees into new)J
1036: 62 121 :M
1037: 1.563 .156(trees. As an example, consider a very simple corpus)J
1038: 62 134 :M
1039: 2.977 .298(consisting of only two trees \(we leave out some)J
1040: 62 147 :M
1041: 1.31 .131(subcategorizations to keep the example simple\):)J
1042: 62 155 225 115 rC
1043: -1 -1 184 178 1 1 205 163 @b
1044: 205 164 -1 1 226 176 1 205 163 @a
1045: 201 156 10 9 rC
1046: gS
1047: .634 .625 scale
1048: 324.71 256.225 :T
1049: -324.71 -256.225 :T
1050: 318.71 259.225 :M
1051: ( S)S
1052: gR
1053: gR
1054: gS 181 178 10 9 rC
1055: gS
1056: .634 .625 scale
1057: 292.577 291.429 :T
1058: -292.577 -291.429 :T
1059: 285.577 294.429 :M
1060: f3_10 sf
1061: (NP)S
1062: gR
1063: gR
1064: gS 62 155 225 115 rC
1065: -1 -1 185 199 1 1 184 186 @b
1066: 179 198 36 9 rC
1067: gS
1068: .634 .625 scale
1069: 310.999 323.432 :T
1070: -310.999 -323.432 :T
1071: 283.999 326.432 :M
1072: f3_10 sf
1073: (she)S
1074: gR
1075: gR
1076: gS 224 177 12 8 rC
1077: gS
1078: .634 .625 scale
1079: 361.421 288.829 :T
1080: -361.421 -288.829 :T
1081: 353.421 292.829 :M
1082: f3_10 sf
1083: (VP)S
1084: gR
1085: gR
1086: gS 201 199 12 9 rC
1087: gS
1088: .634 .625 scale
1089: 326.71 325.632 :T
1090: -326.71 -325.632 :T
1091: 318.71 329.632 :M
1092: f3_10 sf
1093: (VP)S
1094: gR
1095: gR
1096: gS 62 155 225 115 rC
1097: -1 -1 197 219 1 1 205 208 @b
1098: 190 218 10 9 rC
1099: gS
1100: .634 .625 scale
1101: 307.355 355.435 :T
1102: -307.355 -355.435 :T
1103: 301.355 358.435 :M
1104: f3_10 sf
1105: ( V)S
1106: gR
1107: gR
1108: gS 62 155 225 115 rC
1109: 205 209 -1 1 214 217 1 205 208 @a
1110: 212 217 12 8 rC
1111: gS
1112: .634 .625 scale
1113: 342.488 352.835 :T
1114: -342.488 -352.835 :T
1115: 334.488 356.835 :M
1116: f3_10 sf
1117: (NP)S
1118: gR
1119: gR
1120: gS 62 155 225 115 rC
1121: -1 -1 195 237 1 1 194 226 @b
1122: 246 199 11 9 rC
1123: gS
1124: .634 .625 scale
1125: 395.132 325.632 :T
1126: -395.132 -325.632 :T
1127: 388.132 329.632 :M
1128: f3_10 sf
1129: (PP)S
1130: gR
1131: gR
1132: gS 237 217 8 8 rC
1133: gS
1134: .634 .625 scale
1135: 380.51 352.835 :T
1136: -380.51 -352.835 :T
1137: 375.51 356.835 :M
1138: f3_10 sf
1139: ( P)S
1140: gR
1141: gR
1142: gS 257 216 10 10 rC
1143: gS
1144: .634 .625 scale
1145: 412.488 353.835 :T
1146: -412.488 -353.835 :T
1147: 405.488 356.835 :M
1148: f3_10 sf
1149: (NP)S
1150: gR
1151: gR
1152: gS 62 155 225 115 rC
1153: -1 -1 241 237 1 1 240 225 @b
1154: -1 -1 68 174 1 1 80 163 @b
1155: 80 164 -1 1 95 173 1 80 163 @a
1156: 77 156 8 8 rC
1157: gS
1158: .634 .625 scale
1159: 128.066 255.225 :T
1160: -128.066 -255.225 :T
1161: 123.066 259.225 :M
1162: f3_10 sf
1163: ( S)S
1164: gR
1165: gR
1166: gS 63 173 12 8 rC
1167: gS
1168: .634 .625 scale
1169: 108.978 282.428 :T
1170: -108.978 -282.428 :T
1171: 100.978 286.428 :M
1172: f3_10 sf
1173: (NP)S
1174: gR
1175: gR
1176: gS 91 173 12 8 rC
1177: gS
1178: .634 .625 scale
1179: 153.155 282.428 :T
1180: -153.155 -282.428 :T
1181: 145.155 286.428 :M
1182: f3_10 sf
1183: (VP)S
1184: gR
1185: gR
1186: gS 62 155 225 115 rC
1187: -1 -1 67 194 1 1 66 182 @b
1188: 75 194 9 9 rC
1189: gS
1190: .634 .625 scale
1191: 124.333 317.031 :T
1192: -124.333 -317.031 :T
1193: 118.333 320.031 :M
1194: f3_10 sf
1195: ( V)S
1196: gR
1197: gR
1198: gS 62 155 225 115 rC
1199: -1 -1 79 215 1 1 78 202 @b
1200: 69 214 27 9 rC
1201: gS
1202: .634 .625 scale
1203: 130.444 349.034 :T
1204: -130.444 -349.034 :T
1205: 110.444 352.034 :M
1206: f3_10 sf
1207: (wanted)S
1208: gR
1209: gR
1210: gS 106 193 12 8 rC
1211: gS
1212: .634 .625 scale
1213: 176.822 314.431 :T
1214: -176.822 -314.431 :T
1215: 168.822 318.431 :M
1216: f3_10 sf
1217: (NP)S
1218: gR
1219: gR
1220: gS 91 221 11 8 rC
1221: gS
1222: .634 .625 scale
1223: 152.155 359.235 :T
1224: -152.155 -359.235 :T
1225: 145.155 363.235 :M
1226: f3_10 sf
1227: (NP)S
1228: gR
1229: gR
1230: gS 122 219 10 9 rC
1231: gS
1232: .634 .625 scale
1233: 199.488 357.635 :T
1234: -199.488 -357.635 :T
1235: 192.488 361.635 :M
1236: f3_10 sf
1237: (PP)S
1238: gR
1239: gR
1240: gS 132 241 10 8 rC
1241: gS
1242: .634 .625 scale
1243: 215.266 391.239 :T
1244: -215.266 -391.239 :T
1245: 208.266 395.239 :M
1246: f3_10 sf
1247: (NP)S
1248: gR
1249: gR
1250: gS 62 155 225 115 rC
1251: -1 -1 116 262 1 1 115 249 @b
1252: 112 241 8 10 rC
1253: gS
1254: .634 .625 scale
1255: 183.288 393.839 :T
1256: -183.288 -393.839 :T
1257: 178.288 396.839 :M
1258: f3_10 sf
1259: ( P)S
1260: gR
1261: gR
1262: gS 62 155 225 115 rC
1263: gR
1264: gS 29 30 538 781 rC
1265: 79 194 :M
1266: 88.999 185.331 94 181 94 181 :C
1267: 94 181 96.332 182.997 101 187 :C
1268: 105.665 190.997 108 193 108 193 :C
1269: stroke
1270: 62 155 225 115 rC
1271: gR
1272: gS 29 30 538 781 rC
1273: 97 220 :M
1274: 106.332 207.997 111 202 111 202 :C
1275: 111 202 113.165 204.663 117.5 210 :C
1276: 121.832 215.33 124 218 124 218 :C
1277: stroke
1278: 62 155 225 115 rC
1279: gR
1280: gS 29 30 538 781 rC
1281: 115 241 :M
1282: 121.665 232.33 125 228 125 228 :C
1283: 125 228 126.665 229.996 130 234 :C
1284: 133.331 237.996 135 240 135 240 :C
1285: stroke
1286: 63 193 12 8 rC
1287: gS
1288: .634 .625 scale
1289: 107.4 314.431 :T
1290: -107.4 -314.431 :T
1291: 99.4 318.431 :M
1292: f3_10 sf
1293: (she)S
1294: gR
1295: gR
1296: gS 62 155 225 115 rC
1297: gR
1298: gS 29 30 538 781 rC
1299: 84 242 :M
1300: 90.665 233.33 94 229 94 229 :C
1301: 94 229 95.665 230.996 99 235 :C
1302: 102.332 238.996 104 241 104 241 :C
1303: stroke
1304: 80 242 9 8 rC
1305: gS
1306: .634 .625 scale
1307: 132.222 392.839 :T
1308: -132.222 -392.839 :T
1309: 126.222 396.839 :M
1310: f3_10 sf
1311: (the)S
1312: gR
1313: gR
1314: gS 97 242 16 8 rC
1315: gS
1316: .634 .625 scale
1317: 164.044 392.839 :T
1318: -164.044 -392.839 :T
1319: 153.044 396.839 :M
1320: f3_10 sf
1321: (dress)S
1322: gR
1323: gR
1324: gS 62 155 225 115 rC
1325: gR
1326: gS 29 30 538 781 rC
1327: 127 261 :M
1328: 132.998 252.329 136 248 136 248 :C
1329: 136 248 137.664 250.163 141 254.5 :C
1330: 144.331 258.829 146 261 146 261 :C
1331: stroke
1332: 124 261 10 8 rC
1333: gS
1334: .634 .625 scale
1335: 203.222 423.242 :T
1336: -203.222 -423.242 :T
1337: 197.222 427.242 :M
1338: f3_10 sf
1339: (the)S
1340: gR
1341: gR
1342: gS 141 260 14 9 rC
1343: gS
1344: .634 .625 scale
1345: 233.044 422.642 :T
1346: -233.044 -422.642 :T
1347: 224.044 425.642 :M
1348: f3_10 sf
1349: (rack)S
1350: gR
1351: gR
1352: gS 113 261 7 8 rC
1353: gS
1354: .634 .625 scale
1355: 182.288 423.242 :T
1356: -182.288 -423.242 :T
1357: 178.288 427.242 :M
1358: f3_10 sf
1359: (on)S
1360: gR
1361: gR
1362: gS 62 155 225 115 rC
1363: -1 -1 206 199 1 1 227 184 @b
1364: 227 185 -1 1 248 198 1 227 184 @a
1365: gR
1366: gS 29 30 538 781 rC
1367: 205 235 :M
1368: 211.663 227.663 215 224 215 224 :C
1369: 215 224 216.663 225.663 220 229 :C
1370: 223.33 232.33 225 234 225 234 :C
1371: stroke
1372: 203 236 9 8 rC
1373: gS
1374: .634 .625 scale
1375: 326.288 383.238 :T
1376: -326.288 -383.238 :T
1377: 320.288 387.238 :M
1378: f3_10 sf
1379: (the)S
1380: gR
1381: gR
1382: gS 220 236 16 8 rC
1383: gS
1384: .634 .625 scale
1385: 358.11 383.238 :T
1386: -358.11 -383.238 :T
1387: 347.11 387.238 :M
1388: f3_10 sf
1389: (dog)S
1390: gR
1391: gR
1392: gS 62 155 225 115 rC
1393: gR
1394: gS 29 30 538 781 rC
1395: 240 217 :M
1396: 246.663 210.33 250 207 250 207 :C
1397: 250 207 251.663 208.663 255 212 :C
1398: 258.329 215.33 260 217 260 217 :C
1399: stroke
1400: 62 155 225 115 rC
1401: gR
1402: gS 29 30 538 781 rC
1403: 252 235 :M
1404: 257.996 227.663 261 224 261 224 :C
1405: 261 224 262.663 225.83 266 229.5 :C
1406: 269.329 233.163 271 235 271 235 :C
1407: stroke
1408: 249 236 9 9 rC
1409: gS
1410: .634 .625 scale
1411: 400.444 384.238 :T
1412: -400.444 -384.238 :T
1413: 394.444 387.238 :M
1414: f3_10 sf
1415: (the)S
1416: gR
1417: gR
1418: gS 189 236 16 8 rC
1419: gS
1420: .634 .625 scale
1421: 309.199 383.238 :T
1422: -309.199 -383.238 :T
1423: 298.199 387.238 :M
1424: f3_10 sf
1425: (saw)S
1426: gR
1427: gR
1428: gS 234 236 16 8 rC
1429: gS
1430: .634 .625 scale
1431: 381.777 383.238 :T
1432: -381.777 -383.238 :T
1433: 370.777 387.238 :M
1434: f3_10 sf
1435: (with)S
1436: gR
1437: gR
1438: gS 262 236 24 8 rC
1439: gS
1440: .634 .625 scale
1441: 431.377 383.238 :T
1442: -431.377 -383.238 :T
1443: 413.377 387.238 :M
1444: f3_10 sf
1445: (telescope)S
1446: gR
1447: gR
1448: gS 29 30 538 781 rC
1449: 103 287 :M
1450: f3_9 sf
1451: .398 .04(Figure 1. A simple corpus of two trees.)J
1452: 62 305 :M
1453: f3_10 sf
1454: 1.665 .167(A new sentence such as )J
1455: f4_10 sf
1456: 1.75 .175(She saw the dress with the)J
1457: 62 318 :M
1458: .374(telescope)A
1459: f3_10 sf
1460: 1.414 .141( can be parsed by combining subtrees from)J
1461: 62 331 :M
1462: 5.081 .508(this corpus by means of leftmost-substitution)J
1463: 62 344 :M
1464: 1.382 .138(\(indicated as )J
1465: f1_10 sf
1466: 0 2 rm
1467: .353<B0>A
1468: 0 -2 rm
1469: f3_10 sf
1470: .538(\):)A
1471: 62 353 239 89 rC
1472: -1 -1 67 377 1 1 83 362 @b
1473: 83 363 -1 1 100 375 1 83 362 @a
1474: 80 355 8 9 rC
1475: gS
1476: .511 .61 scale
1477: 164.621 589.203 :T
1478: -164.621 -589.203 :T
1479: 158.621 592.203 :M
1480: ( S)S
1481: gR
1482: gR
1483: gS 64 376 8 9 rC
1484: gS
1485: .511 .61 scale
1486: 132.33 623.652 :T
1487: -132.33 -623.652 :T
1488: 125.33 626.652 :M
1489: f3_10 sf
1490: (NP)S
1491: gR
1492: gR
1493: gS 62 353 239 89 rC
1494: -1 -1 68 397 1 1 67 384 @b
1495: 63 396 29 9 rC
1496: gS
1497: .511 .61 scale
1498: 150.372 656.461 :T
1499: -150.372 -656.461 :T
1500: 123.372 659.461 :M
1501: f3_10 sf
1502: (she)S
1503: gR
1504: gR
1505: gS 98 376 10 7 rC
1506: gS
1507: .511 .61 scale
1508: 201.87 622.652 :T
1509: -201.87 -622.652 :T
1510: 193.87 626.652 :M
1511: f3_10 sf
1512: (VP)S
1513: gR
1514: gR
1515: gS 80 398 10 7 rC
1516: gS
1517: .511 .61 scale
1518: 166.621 658.742 :T
1519: -166.621 -658.742 :T
1520: 158.621 662.742 :M
1521: f3_10 sf
1522: (VP)S
1523: gR
1524: gR
1525: gS 62 353 239 89 rC
1526: -1 -1 77 416 1 1 83 405 @b
1527: 71 415 8 9 rC
1528: gS
1529: .511 .61 scale
1530: 146.997 687.63 :T
1531: -146.997 -687.63 :T
1532: 140.997 690.63 :M
1533: f3_10 sf
1534: ( V)S
1535: gR
1536: gR
1537: gS 62 353 239 89 rC
1538: 83 406 -1 1 91 415 1 83 405 @a
1539: 89 415 9 7 rC
1540: gS
1541: .511 .61 scale
1542: 182.287 686.63 :T
1543: -182.287 -686.63 :T
1544: 174.287 690.63 :M
1545: f3_10 sf
1546: (NP)S
1547: gR
1548: gR
1549: gS 62 353 239 89 rC
1550: -1 -1 75 434 1 1 74 423 @b
1551: 116 398 9 7 rC
1552: gS
1553: .511 .61 scale
1554: 236.119 658.742 :T
1555: -236.119 -658.742 :T
1556: 229.119 662.742 :M
1557: f3_10 sf
1558: (PP)S
1559: gR
1560: gR
1561: gS 62 353 239 89 rC
1562: -1 -1 84 397 1 1 101 383 @b
1563: 101 384 -1 1 118 396 1 101 383 @a
1564: 70 433 13 8 rC
1565: gS
1566: .511 .61 scale
1567: 150.038 716.158 :T
1568: -150.038 -716.158 :T
1569: 139.038 720.158 :M
1570: f3_10 sf
1571: (saw)S
1572: gR
1573: gR
1574: gS 174 355 8 8 rC
1575: gS
1576: .511 .61 scale
1577: 347.742 588.203 :T
1578: -347.742 -588.203 :T
1579: 340.742 592.203 :M
1580: f3_10 sf
1581: (PP)S
1582: gR
1583: gR
1584: gS 167 372 6 8 rC
1585: gS
1586: .511 .61 scale
1587: 332.034 616.09 :T
1588: -332.034 -616.09 :T
1589: 327.034 620.09 :M
1590: f3_10 sf
1591: ( P)S
1592: gR
1593: gR
1594: gS 183 371 8 9 rC
1595: gS
1596: .511 .61 scale
1597: 364.366 615.45 :T
1598: -364.366 -615.45 :T
1599: 358.366 618.45 :M
1600: f3_10 sf
1601: (NP)S
1602: gR
1603: gR
1604: gS 62 353 239 89 rC
1605: -1 -1 170 391 1 1 169 380 @b
1606: gR
1607: gS 29 30 538 781 rC
1608: 169 372 :M
1609: 174.331 365.328 177 362 177 362 :C
1610: 177 362 178.331 363.661 181 367 :C
1611: 183.664 370.328 185 372 185 372 :C
1612: stroke
1613: 62 353 239 89 rC
1614: gR
1615: gS 29 30 538 781 rC
1616: 179 390 :M
1617: 183.664 382.661 186 379 186 379 :C
1618: 186 379 187.33 380.827 190 384.5 :C
1619: 192.664 388.161 194 390 194 390 :C
1620: stroke
1621: 176 390 8 9 rC
1622: gS
1623: .511 .61 scale
1624: 352.617 646.618 :T
1625: -352.617 -646.618 :T
1626: 346.617 649.618 :M
1627: f3_10 sf
1628: (the)S
1629: gR
1630: gR
1631: gS 165 391 12 7 rC
1632: gS
1633: .511 .61 scale
1634: 334.117 645.618 :T
1635: -334.117 -645.618 :T
1636: 323.117 649.618 :M
1637: f3_10 sf
1638: (with)S
1639: gR
1640: gR
1641: gS 186 391 20 7 rC
1642: gS
1643: .511 .61 scale
1644: 382.241 645.618 :T
1645: -382.241 -645.618 :T
1646: 364.241 649.618 :M
1647: f3_10 sf
1648: (telescope)S
1649: gR
1650: gR
1651: gS 132 355 9 8 rC
1652: gS
1653: .511 .61 scale
1654: 265.494 588.203 :T
1655: -265.494 -588.203 :T
1656: 258.494 592.203 :M
1657: f3_10 sf
1658: (NP)S
1659: gR
1660: gR
1661: gS 62 353 239 89 rC
1662: gR
1663: gS 29 30 538 781 rC
1664: 127 375 :M
1665: 132.331 366.328 135 362 135 362 :C
1666: 135 362 136.331 363.994 139 368 :C
1667: 141.665 371.994 143 374 143 374 :C
1668: stroke
1669: 124 375 7 8 rC
1670: gS
1671: .511 .61 scale
1672: 248.827 621.012 :T
1673: -248.827 -621.012 :T
1674: 242.827 625.012 :M
1675: f3_10 sf
1676: (the)S
1677: gR
1678: gR
1679: gS 138 375 12 8 rC
1680: gS
1681: .511 .61 scale
1682: 281.243 621.012 :T
1683: -281.243 -621.012 :T
1684: 270.243 625.012 :M
1685: f3_10 sf
1686: (dress)S
1687: gR
1688: gR
1689: gS 108 354 5 13 rC
1690: gS
1691: .511 .61 scale
1692: 216.453 590.484 :T
1693: -216.453 -590.484 :T
1694: 213.453 595.484 :M
1695: f1_14 sf
1696: <B0>S
1697: gR
1698: gR
1699: gS 154 354 5 12 rC
1700: gS
1701: .511 .61 scale
1702: 304.576 590.484 :T
1703: -304.576 -590.484 :T
1704: 301.576 595.484 :M
1705: f1_14 sf
1706: <B0>S
1707: gR
1708: gR
1709: gS 62 353 239 89 rC
1710: -1 -1 218 376 1 1 235 362 @b
1711: 235 363 -1 1 252 374 1 235 362 @a
1712: 232 354 8 9 rC
1713: gS
1714: .511 .61 scale
1715: 462.281 587.562 :T
1716: -462.281 -587.562 :T
1717: 456.281 590.562 :M
1718: f3_10 sf
1719: ( S)S
1720: gR
1721: gR
1722: gS 215 376 9 9 rC
1723: gS
1724: .511 .61 scale
1725: 429.99 623.652 :T
1726: -429.99 -623.652 :T
1727: 422.99 626.652 :M
1728: f3_10 sf
1729: (NP)S
1730: gR
1731: gR
1732: gS 62 353 239 89 rC
1733: -1 -1 219 397 1 1 218 383 @b
1734: 214 395 29 9 rC
1735: gS
1736: .511 .61 scale
1737: 448.031 654.821 :T
1738: -448.031 -654.821 :T
1739: 421.031 657.821 :M
1740: f3_10 sf
1741: (she)S
1742: gR
1743: gR
1744: gS 250 375 10 8 rC
1745: gS
1746: .511 .61 scale
1747: 497.572 621.012 :T
1748: -497.572 -621.012 :T
1749: 489.572 625.012 :M
1750: f3_10 sf
1751: (VP)S
1752: gR
1753: gR
1754: gS 232 397 10 8 rC
1755: gS
1756: .511 .61 scale
1757: 464.281 657.102 :T
1758: -464.281 -657.102 :T
1759: 456.281 661.102 :M
1760: f3_10 sf
1761: (VP)S
1762: gR
1763: gR
1764: gS 62 353 239 89 rC
1765: -1 -1 228 416 1 1 235 405 @b
1766: 223 415 8 9 rC
1767: gS
1768: .511 .61 scale
1769: 442.698 687.63 :T
1770: -442.698 -687.63 :T
1771: 436.698 690.63 :M
1772: f3_10 sf
1773: ( V)S
1774: gR
1775: gR
1776: gS 62 353 239 89 rC
1777: 235 406 -1 1 243 414 1 235 405 @a
1778: 240 414 10 8 rC
1779: gS
1780: .511 .61 scale
1781: 479.947 684.989 :T
1782: -479.947 -684.989 :T
1783: 471.947 688.989 :M
1784: f3_10 sf
1785: (NP)S
1786: gR
1787: gR
1788: gS 62 353 239 89 rC
1789: -1 -1 227 434 1 1 226 422 @b
1790: 268 397 8 8 rC
1791: gS
1792: .511 .61 scale
1793: 531.821 657.102 :T
1794: -531.821 -657.102 :T
1795: 524.821 661.102 :M
1796: f3_10 sf
1797: (PP)S
1798: gR
1799: gR
1800: gS 261 414 6 8 rC
1801: gS
1802: .511 .61 scale
1803: 516.113 684.989 :T
1804: -516.113 -684.989 :T
1805: 511.113 688.989 :M
1806: f3_10 sf
1807: ( P)S
1808: gR
1809: gR
1810: gS 276 413 9 9 rC
1811: gS
1812: .511 .61 scale
1813: 549.445 684.349 :T
1814: -549.445 -684.349 :T
1815: 542.445 687.349 :M
1816: f3_10 sf
1817: (NP)S
1818: gR
1819: gR
1820: gS 62 353 239 89 rC
1821: -1 -1 264 433 1 1 263 422 @b
1822: -1 -1 236 397 1 1 252 382 @b
1823: 252 383 -1 1 270 395 1 252 382 @a
1824: gR
1825: gS 29 30 538 781 rC
1826: 235 432 :M
1827: 240.33 424.66 243 421 243 421 :C
1828: 243 421 244.33 422.66 247 426 :C
1829: 249.663 429.327 251 431 251 431 :C
1830: stroke
1831: 233 432 8 8 rC
1832: gS
1833: .511 .61 scale
1834: 464.239 714.517 :T
1835: -464.239 -714.517 :T
1836: 458.239 718.517 :M
1837: f3_10 sf
1838: (the)S
1839: gR
1840: gR
1841: gS 62 353 239 89 rC
1842: gR
1843: gS 29 30 538 781 rC
1844: 263 414 :M
1845: 268.329 407.327 271 404 271 404 :C
1846: 271 404 272.329 405.66 275 409 :C
1847: 277.662 412.327 279 414 279 414 :C
1848: stroke
1849: 62 353 239 89 rC
1850: gR
1851: gS 29 30 538 781 rC
1852: 273 432 :M
1853: 277.662 424.66 280 421 280 421 :C
1854: 280 421 281.329 422.827 284 426.5 :C
1855: 286.662 430.16 288 432 288 432 :C
1856: stroke
1857: 270 432 8 9 rC
1858: gS
1859: .511 .61 scale
1860: 536.696 715.517 :T
1861: -536.696 -715.517 :T
1862: 530.696 718.517 :M
1863: f3_10 sf
1864: (the)S
1865: gR
1866: gR
1867: gS 222 433 13 7 rC
1868: gS
1869: .511 .61 scale
1870: 445.74 715.517 :T
1871: -445.74 -715.517 :T
1872: 434.74 718.517 :M
1873: f3_10 sf
1874: (saw)S
1875: gR
1876: gR
1877: gS 259 433 12 7 rC
1878: gS
1879: .511 .61 scale
1880: 518.196 715.517 :T
1881: -518.196 -715.517 :T
1882: 507.196 718.517 :M
1883: f3_10 sf
1884: (with)S
1885: gR
1886: gR
1887: gS 281 433 19 7 rC
1888: gS
1889: .511 .61 scale
1890: 568.278 715.517 :T
1891: -568.278 -715.517 :T
1892: 550.278 718.517 :M
1893: f3_10 sf
1894: (telescope)S
1895: gR
1896: gR
1897: gS 198 355 5 8 rC
1898: gS
1899: .511 .61 scale
1900: 392.699 588.203 :T
1901: -392.699 -588.203 :T
1902: 389.699 592.203 :M
1903: f7_9 sf
1904: (=)S
1905: gR
1906: gR
1907: gS 245 432 13 8 rC
1908: gS
1909: .511 .61 scale
1910: 490.78 714.517 :T
1911: -490.78 -714.517 :T
1912: 479.78 718.517 :M
1913: f3_10 sf
1914: (dress)S
1915: gR
1916: gR
1917: gS 29 30 538 781 rC
1918: 63 456 :M
1919: f3_9 sf
1920: .64 .064(Figure 2. Derivation and parse tree for the sentence )J
1921: 260 456 :M
1922: f4_9 sf
1923: -.041(She saw)A
1924: 125 468 :M
1925: .452 .045(the dress with the telescope)J
1926: 62 486 :M
1927: f3_10 sf
1928: 4.211 .421(Note that other derivations, involving different)J
1929: 62 499 :M
1930: 1.031 .103(subtrees, may yield the same parse tree; for instance:)J
1931: 62 507 229 88 rC
1932: -1 -1 66 529 1 1 85 515 @b
1933: 85 516 -1 1 105 528 1 85 515 @a
1934: 83 508 9 9 rC
1935: gS
1936: .581 .607 scale
1937: 150.527 843.944 :T
1938: -150.527 -843.944 :T
1939: 144.527 846.944 :M
1940: ( S)S
1941: gR
1942: gR
1943: gS 64 529 10 9 rC
1944: gS
1945: .581 .607 scale
1946: 117.116 878.547 :T
1947: -117.116 -878.547 :T
1948: 110.116 881.547 :M
1949: f3_10 sf
1950: (NP)S
1951: gR
1952: gR
1953: gS 62 507 229 88 rC
1954: -1 -1 68 550 1 1 67 537 @b
1955: 63 548 33 9 rC
1956: gS
1957: .581 .607 scale
1958: 135.395 909.854 :T
1959: -135.395 -909.854 :T
1960: 108.395 912.854 :M
1961: f3_10 sf
1962: (she)S
1963: gR
1964: gR
1965: gS 103 528 11 8 rC
1966: gS
1967: .581 .607 scale
1968: 186.938 875.899 :T
1969: -186.938 -875.899 :T
1970: 178.938 879.899 :M
1971: f3_10 sf
1972: (VP)S
1973: gR
1974: gR
1975: gS 83 550 11 8 rC
1976: gS
1977: .581 .607 scale
1978: 152.527 912.15 :T
1979: -152.527 -912.15 :T
1980: 144.527 916.15 :M
1981: f3_10 sf
1982: (VP)S
1983: gR
1984: gR
1985: gS 124 550 9 8 rC
1986: gS
1987: .581 .607 scale
1988: 220.349 912.15 :T
1989: -220.349 -912.15 :T
1990: 213.349 916.15 :M
1991: f3_10 sf
1992: (PP)S
1993: gR
1994: gR
1995: gS 115 567 8 8 rC
1996: gS
1997: .581 .607 scale
1998: 204.585 940.161 :T
1999: -204.585 -940.161 :T
2000: 199.585 944.161 :M
2001: f3_10 sf
2002: ( P)S
2003: gR
2004: gR
2005: gS 133 566 10 10 rC
2006: gS
2007: .581 .607 scale
2008: 237.555 941.161 :T
2009: -237.555 -941.161 :T
2010: 230.555 944.161 :M
2011: f3_10 sf
2012: (NP)S
2013: gR
2014: gR
2015: gS 62 507 229 88 rC
2016: -1 -1 119 586 1 1 118 575 @b
2017: -1 -1 87 550 1 1 106 536 @b
2018: 106 537 -1 1 125 548 1 106 536 @a
2019: gR
2020: gS 29 30 538 781 rC
2021: 118 567 :M
2022: 123.998 560.325 127 557 127 557 :C
2023: 127 557 128.498 558.658 131.5 562 :C
2024: 134.498 565.325 136 567 136 567 :C
2025: stroke
2026: 62 507 229 88 rC
2027: gR
2028: gS 29 30 538 781 rC
2029: 129 585 :M
2030: 134.331 577.658 137 574 137 574 :C
2031: 137 574 138.498 575.824 141.5 579.5 :C
2032: 144.498 583.158 146 585 146 585 :C
2033: stroke
2034: 127 585 8 9 rC
2035: gS
2036: .581 .607 scale
2037: 224.511 970.821 :T
2038: -224.511 -970.821 :T
2039: 218.511 973.821 :M
2040: f3_10 sf
2041: (the)S
2042: gR
2043: gR
2044: gS 113 585 15 9 rC
2045: gS
2046: .581 .607 scale
2047: 207.143 970.821 :T
2048: -207.143 -970.821 :T
2049: 196.143 973.821 :M
2050: f3_10 sf
2051: (with)S
2052: gR
2053: gR
2054: gS 138 585 23 9 rC
2055: gS
2056: .581 .607 scale
2057: 257.157 970.821 :T
2058: -257.157 -970.821 :T
2059: 239.157 973.821 :M
2060: f3_10 sf
2061: (telescope)S
2062: gR
2063: gR
2064: gS 156 508 9 7 rC
2065: gS
2066: .581 .607 scale
2067: 275.407 842.944 :T
2068: -275.407 -842.944 :T
2069: 268.407 846.944 :M
2070: f3_10 sf
2071: (NP)S
2072: gR
2073: gR
2074: gS 62 507 229 88 rC
2075: gR
2076: gS 29 30 538 781 rC
2077: 150 528 :M
2078: 155.331 519.325 158 515 158 515 :C
2079: 158 515 159.664 516.992 163 521 :C
2080: 166.331 524.992 168 527 168 527 :C
2081: stroke
2082: 146 528 9 8 rC
2083: gS
2084: .581 .607 scale
2085: 258.922 875.899 :T
2086: -258.922 -875.899 :T
2087: 252.922 879.899 :M
2088: f3_10 sf
2089: (the)S
2090: gR
2091: gR
2092: gS 162 528 15 8 rC
2093: gS
2094: .581 .607 scale
2095: 291.451 875.899 :T
2096: -291.451 -875.899 :T
2097: 280.451 879.899 :M
2098: f3_10 sf
2099: (dress)S
2100: gR
2101: gR
2102: gS 123 508 5 12 rC
2103: gS
2104: .581 .607 scale
2105: 216.349 846.887 :T
2106: -216.349 -846.887 :T
2107: 213.349 851.887 :M
2108: f1_14 sf
2109: <B0>S
2110: gR
2111: gR
2112: gS 185 508 5 7 rC
2113: gS
2114: .581 .607 scale
2115: 323.024 842.944 :T
2116: -323.024 -842.944 :T
2117: 320.024 846.944 :M
2118: f7_9 sf
2119: (=)S
2120: gR
2121: gR
2122: gS 62 507 229 88 rC
2123: -1 -1 196 529 1 1 215 515 @b
2124: 215 516 -1 1 234 528 1 215 515 @a
2125: 213 508 8 9 rC
2126: gS
2127: .581 .607 scale
2128: 372.479 843.944 :T
2129: -372.479 -843.944 :T
2130: 366.479 846.944 :M
2131: f3_10 sf
2132: ( S)S
2133: gR
2134: gR
2135: gS 193 529 10 9 rC
2136: gS
2137: .581 .607 scale
2138: 340.788 878.547 :T
2139: -340.788 -878.547 :T
2140: 333.788 881.547 :M
2141: f3_10 sf
2142: (NP)S
2143: gR
2144: gR
2145: gS 62 507 229 88 rC
2146: -1 -1 197 550 1 1 196 537 @b
2147: 192 548 33 9 rC
2148: gS
2149: .581 .607 scale
2150: 359.067 909.854 :T
2151: -359.067 -909.854 :T
2152: 332.067 912.854 :M
2153: f3_10 sf
2154: (she)S
2155: gR
2156: gR
2157: gS 233 528 11 8 rC
2158: gS
2159: .581 .607 scale
2160: 408.89 875.899 :T
2161: -408.89 -875.899 :T
2162: 400.89 879.899 :M
2163: f3_10 sf
2164: (VP)S
2165: gR
2166: gR
2167: gS 213 550 11 8 rC
2168: gS
2169: .581 .607 scale
2170: 374.479 912.15 :T
2171: -374.479 -912.15 :T
2172: 366.479 916.15 :M
2173: f3_10 sf
2174: (VP)S
2175: gR
2176: gR
2177: gS 62 507 229 88 rC
2178: -1 -1 208 569 1 1 215 558 @b
2179: 202 568 9 9 rC
2180: gS
2181: .581 .607 scale
2182: 355.273 942.809 :T
2183: -355.273 -942.809 :T
2184: 349.273 945.809 :M
2185: f3_10 sf
2186: ( V)S
2187: gR
2188: gR
2189: gS 62 507 229 88 rC
2190: 215 559 -1 1 224 567 1 215 558 @a
2191: 222 567 11 8 rC
2192: gS
2193: .581 .607 scale
2194: 389.964 940.161 :T
2195: -389.964 -940.161 :T
2196: 381.964 944.161 :M
2197: f3_10 sf
2198: (NP)S
2199: gR
2200: gR
2201: gS 62 507 229 88 rC
2202: -1 -1 206 587 1 1 205 576 @b
2203: 253 550 10 8 rC
2204: gS
2205: .581 .607 scale
2206: 444.021 912.15 :T
2207: -444.021 -912.15 :T
2208: 437.021 916.15 :M
2209: f3_10 sf
2210: (PP)S
2211: gR
2212: gR
2213: gS 245 567 8 8 rC
2214: gS
2215: .581 .607 scale
2216: 428.257 940.161 :T
2217: -428.257 -940.161 :T
2218: 423.257 944.161 :M
2219: f3_10 sf
2220: ( P)S
2221: gR
2222: gR
2223: gS 263 566 10 10 rC
2224: gS
2225: .581 .607 scale
2226: 461.227 941.161 :T
2227: -461.227 -941.161 :T
2228: 454.227 944.161 :M
2229: f3_10 sf
2230: (NP)S
2231: gR
2232: gR
2233: gS 62 507 229 88 rC
2234: -1 -1 248 586 1 1 247 575 @b
2235: -1 -1 216 550 1 1 235 536 @b
2236: 235 537 -1 1 255 548 1 235 536 @a
2237: gR
2238: gS 29 30 538 781 rC
2239: 215 585 :M
2240: 220.997 577.658 224 574 224 574 :C
2241: 224 574 225.496 575.658 228.5 579 :C
2242: 231.497 582.325 233 584 233 584 :C
2243: stroke
2244: 214 585 8 8 rC
2245: gS
2246: .581 .607 scale
2247: 374.199 969.821 :T
2248: -374.199 -969.821 :T
2249: 368.199 973.821 :M
2250: f3_10 sf
2251: (the)S
2252: gR
2253: gR
2254: gS 62 507 229 88 rC
2255: gR
2256: gS 29 30 538 781 rC
2257: 247 567 :M
2258: 252.996 560.325 256 557 256 557 :C
2259: 256 557 257.496 558.658 260.5 562 :C
2260: 263.496 565.325 265 567 265 567 :C
2261: stroke
2262: 62 507 229 88 rC
2263: gR
2264: gS 29 30 538 781 rC
2265: 258 585 :M
2266: 263.996 577.658 267 574 267 574 :C
2267: 267 574 268.329 575.824 271 579.5 :C
2268: 273.663 583.158 275 585 275 585 :C
2269: stroke
2270: 256 585 9 9 rC
2271: gS
2272: .581 .607 scale
2273: 448.183 970.821 :T
2274: -448.183 -970.821 :T
2275: 442.183 973.821 :M
2276: f3_10 sf
2277: (the)S
2278: gR
2279: gR
2280: gS 201 585 14 9 rC
2281: gS
2282: .581 .607 scale
2283: 356.832 970.821 :T
2284: -356.832 -970.821 :T
2285: 345.832 973.821 :M
2286: f3_10 sf
2287: (saw)S
2288: gR
2289: gR
2290: gS 243 585 14 9 rC
2291: gS
2292: .581 .607 scale
2293: 429.095 970.821 :T
2294: -429.095 -970.821 :T
2295: 418.095 973.821 :M
2296: f3_10 sf
2297: (with)S
2298: gR
2299: gR
2300: gS 268 585 22 9 rC
2301: gS
2302: .581 .607 scale
2303: 479.109 970.821 :T
2304: -479.109 -970.821 :T
2305: 461.109 973.821 :M
2306: f3_10 sf
2307: (telescope)S
2308: gR
2309: gR
2310: gS 227 585 15 8 rC
2311: gS
2312: .581 .607 scale
2313: 403.287 969.821 :T
2314: -403.287 -969.821 :T
2315: 392.287 973.821 :M
2316: f3_10 sf
2317: (dress)S
2318: gR
2319: gR
2320: gS 62 507 229 88 rC
2321: -1 -1 79 568 1 1 86 557 @b
2322: 74 567 8 9 rC
2323: gS
2324: .581 .607 scale
2325: 133.321 941.161 :T
2326: -133.321 -941.161 :T
2327: 127.321 944.161 :M
2328: f3_10 sf
2329: ( V)S
2330: gR
2331: gR
2332: gS 62 507 229 88 rC
2333: 86 558 -1 1 96 566 1 86 557 @a
2334: 93 566 11 8 rC
2335: gS
2336: .581 .607 scale
2337: 168.012 940.161 :T
2338: -168.012 -940.161 :T
2339: 160.012 944.161 :M
2340: f3_10 sf
2341: (NP)S
2342: gR
2343: gR
2344: gS 62 507 229 88 rC
2345: -1 -1 78 586 1 1 77 575 @b
2346: 73 585 15 8 rC
2347: gS
2348: .581 .607 scale
2349: 138.321 969.821 :T
2350: -138.321 -969.821 :T
2351: 127.321 973.821 :M
2352: f3_10 sf
2353: (saw)S
2354: gR
2355: gR
2356: gS 29 30 538 781 rC
2357: 66 609 :M
2358: f3_9 sf
2359: .714 .071(Figure 3. Different derivation yielding the same parse tree)J
2360: 103 621 :M
2361: -.247(for )A
2362: 116 621 :M
2363: f4_9 sf
2364: .266 .027(She saw the dress with the telescope)J
2365: 62 639 :M
2366: f3_10 sf
2367: 3.961 .396(Note also that, given this example corpus, the)J
2368: 62 652 :M
2369: 1.621 .162(sentence we considered is ambiguous; by combining)J
2370: -4096 -4095 -1 1 -4094 -4095 1 -4096 -4096 @a
2371: 62 666.24 -.24 .24 289.24 666 .24 62 666 @a
2372: 62 681 :M
2373: f3_9 sf
2374: 3.502 .35(\(Hoogweg 2000\), Tree-Adjoining Grammar )J
2375: 251 681 :M
2376: .145(\(Neumann)A
2377: 62 693 :M
2378: 1.395 .14(1998\), Lexical-Functional Grammar \(Bod & )J
2379: 238 693 :M
2380: 1.553 .155(Kaplan 1998;)J
2381: 62 705 :M
2382: 3.018 .302(Way 1999; Bod 2000a\), Head-driven )J
2383: 224 705 :M
2384: 2.711 .271(Phrase Structure)J
2385: 62 717 :M
2386: 1.678 .168(Grammar \(Neumann )J
2387: 146 717 :M
2388: 2.434 .243(& Flickinger 1999\), and Montague)J
2389: 62 729 :M
2390: 1.715 .171(Grammar \(van den Berg et al. 1994; Bod 1998\). For the)J
2391: 62 741 :M
2392: 1.757 .176(relation between DOP )J
2393: 154 741 :M
2394: 2.382 .238(and Memory-Based Learning, see)J
2395: 62 753 :M
2396: .974 .097(Daelemans \(1999\).)J
2397: 304 95 :M
2398: f3_10 sf
2399: 3.017 .302(other subtrees, a different parse may be derived,)J
2400: 304 108 :M
2401: 1.206 .121(which is analogous to the first rather than the second)J
2402: 304 121 :M
2403: 1.458 .146(corpus sentence:)J
2404: 304 129 234 107 rC
2405: -1 -1 309 148 1 1 320 138 @b
2406: 320 139 -1 1 333 147 1 320 138 @a
2407: 317 131 7 8 rC
2408: gS
2409: .581 .601 scale
2410: 550.947 223.925 :T
2411: -550.947 -223.925 :T
2412: 545.947 227.925 :M
2413: ( S)S
2414: gR
2415: gR
2416: gS 305 148 11 7 rC
2417: gS
2418: .581 .601 scale
2419: 535.002 252.208 :T
2420: -535.002 -252.208 :T
2421: 527.002 256.208 :M
2422: f3_10 sf
2423: (NP)S
2424: gR
2425: gR
2426: gS 330 148 11 7 rC
2427: gS
2428: .581 .601 scale
2429: 578.058 252.208 :T
2430: -578.058 -252.208 :T
2431: 570.058 256.208 :M
2432: f3_10 sf
2433: (VP)S
2434: gR
2435: gR
2436: gS 304 129 234 107 rC
2437: -1 -1 309 168 1 1 308 156 @b
2438: 315 167 9 9 rC
2439: gS
2440: .581 .601 scale
2441: 550.225 283.818 :T
2442: -550.225 -283.818 :T
2443: 544.225 287.818 :M
2444: f3_10 sf
2445: ( V)S
2446: gR
2447: gR
2448: gS 345 167 11 8 rC
2449: gS
2450: .581 .601 scale
2451: 602.169 283.818 :T
2452: -602.169 -283.818 :T
2453: 594.169 287.818 :M
2454: f3_10 sf
2455: (NP)S
2456: gR
2457: gR
2458: gS 331 193 10 8 rC
2459: gS
2460: .581 .601 scale
2461: 577.058 327.074 :T
2462: -577.058 -327.074 :T
2463: 570.058 331.074 :M
2464: f3_10 sf
2465: (NP)S
2466: gR
2467: gR
2468: gS 359 192 9 8 rC
2469: gS
2470: .581 .601 scale
2471: 625.28 325.41 :T
2472: -625.28 -325.41 :T
2473: 618.28 329.41 :M
2474: f3_10 sf
2475: (PP)S
2476: gR
2477: gR
2478: gS 304 129 234 107 rC
2479: gR
2480: gS 29 30 538 781 rC
2481: 319 167 :M
2482: 328.328 158.998 333 155 333 155 :C
2483: 333 155 334.995 156.831 339 160.5 :C
2484: 342.995 164.164 345 166 345 166 :C
2485: stroke
2486: 304 129 234 107 rC
2487: gR
2488: gS 29 30 538 781 rC
2489: 335 193 :M
2490: 343.661 180.997 348 175 348 175 :C
2491: 348 175 349.995 177.497 354 182.5 :C
2492: 357.995 187.497 360 190 360 190 :C
2493: stroke
2494: 305 167 11 8 rC
2495: gS
2496: .581 .601 scale
2497: 533.28 283.818 :T
2498: -533.28 -283.818 :T
2499: 525.28 287.818 :M
2500: f3_10 sf
2501: (she)S
2502: gR
2503: gR
2504: gS 304 129 234 107 rC
2505: gR
2506: gS 29 30 538 781 rC
2507: 324 214 :M
2508: 329.328 205.33 332 201 332 201 :C
2509: 332 201 333.661 202.997 337 207 :C
2510: 340.328 210.997 342 213 342 213 :C
2511: stroke
2512: 320 214 9 8 rC
2513: gS
2514: .581 .601 scale
2515: 558.836 362.011 :T
2516: -558.836 -362.011 :T
2517: 552.836 366.011 :M
2518: f3_10 sf
2519: (the)S
2520: gR
2521: gR
2522: gS 336 214 14 8 rC
2523: gS
2524: .581 .601 scale
2525: 591.391 362.011 :T
2526: -591.391 -362.011 :T
2527: 580.391 366.011 :M
2528: f3_10 sf
2529: (dress)S
2530: gR
2531: gR
2532: gS 361 131 9 9 rC
2533: gS
2534: .581 .601 scale
2535: 627.725 223.925 :T
2536: -627.725 -223.925 :T
2537: 621.725 227.925 :M
2538: f3_10 sf
2539: ( V)S
2540: gR
2541: gR
2542: gS 304 129 234 107 rC
2543: -1 -1 366 150 1 1 365 139 @b
2544: 361 149 14 8 rC
2545: gS
2546: .581 .601 scale
2547: 632.725 253.872 :T
2548: -632.725 -253.872 :T
2549: 621.725 257.872 :M
2550: f3_10 sf
2551: (saw)S
2552: gR
2553: gR
2554: gS 397 131 10 8 rC
2555: gS
2556: .581 .601 scale
2557: 692.447 223.925 :T
2558: -692.447 -223.925 :T
2559: 685.447 227.925 :M
2560: f3_10 sf
2561: (PP)S
2562: gR
2563: gR
2564: gS 389 148 7 7 rC
2565: gS
2566: .581 .601 scale
2567: 674.947 252.208 :T
2568: -674.947 -252.208 :T
2569: 669.947 256.208 :M
2570: f3_10 sf
2571: ( P)S
2572: gR
2573: gR
2574: gS 407 147 10 9 rC
2575: gS
2576: .581 .601 scale
2577: 709.67 251.544 :T
2578: -709.67 -251.544 :T
2579: 702.67 254.544 :M
2580: f3_10 sf
2581: (NP)S
2582: gR
2583: gR
2584: gS 304 129 234 107 rC
2585: -1 -1 393 167 1 1 392 155 @b
2586: gR
2587: gS 29 30 538 781 rC
2588: 392 148 :M
2589: 397.327 141.331 400 138 400 138 :C
2590: 400 138 401.66 139.664 405 143 :C
2591: 408.327 146.331 410 148 410 148 :C
2592: stroke
2593: 304 129 234 107 rC
2594: gR
2595: gS 29 30 538 781 rC
2596: 403 165 :M
2597: 408.327 157.664 411 154 411 154 :C
2598: 411 154 412.494 155.831 415.5 159.5 :C
2599: 418.494 163.164 420 165 420 165 :C
2600: stroke
2601: 400 166 9 9 rC
2602: gS
2603: .581 .601 scale
2604: 696.614 283.154 :T
2605: -696.614 -283.154 :T
2606: 690.614 286.154 :M
2607: f3_10 sf
2608: (the)S
2609: gR
2610: gR
2611: gS 387 166 15 7 rC
2612: gS
2613: .581 .601 scale
2614: 679.225 282.154 :T
2615: -679.225 -282.154 :T
2616: 668.225 286.154 :M
2617: f3_10 sf
2618: (with)S
2619: gR
2620: gR
2621: gS 412 166 23 7 rC
2622: gS
2623: .581 .601 scale
2624: 729.281 282.154 :T
2625: -729.281 -282.154 :T
2626: 711.281 286.154 :M
2627: f3_10 sf
2628: (telescope)S
2629: gR
2630: gR
2631: gS 426 130 5 8 rC
2632: gS
2633: .581 .601 scale
2634: 738.392 222.262 :T
2635: -738.392 -222.262 :T
2636: 735.392 226.262 :M
2637: f7_9 sf
2638: (=)S
2639: gR
2640: gR
2641: gS 304 129 234 107 rC
2642: -1 -1 451 147 1 1 462 137 @b
2643: 462 138 -1 1 475 146 1 462 137 @a
2644: 458 130 7 7 rC
2645: gS
2646: .581 .601 scale
2647: 794.503 222.262 :T
2648: -794.503 -222.262 :T
2649: 790.503 226.262 :M
2650: f3_10 sf
2651: ( S)S
2652: gR
2653: gR
2654: gS 447 146 10 8 rC
2655: gS
2656: .581 .601 scale
2657: 776.837 248.881 :T
2658: -776.837 -248.881 :T
2659: 769.837 252.881 :M
2660: f3_10 sf
2661: (NP)S
2662: gR
2663: gR
2664: gS 472 146 10 8 rC
2665: gS
2666: .581 .601 scale
2667: 820.892 248.881 :T
2668: -820.892 -248.881 :T
2669: 812.892 252.881 :M
2670: f3_10 sf
2671: (VP)S
2672: gR
2673: gR
2674: gS 304 129 234 107 rC
2675: -1 -1 451 167 1 1 450 155 @b
2676: 457 166 8 9 rC
2677: gS
2678: .581 .601 scale
2679: 793.059 283.154 :T
2680: -793.059 -283.154 :T
2681: 787.059 286.154 :M
2682: f3_10 sf
2683: ( V)S
2684: gR
2685: gR
2686: gS 486 166 11 7 rC
2687: gS
2688: .581 .601 scale
2689: 845.726 282.154 :T
2690: -845.726 -282.154 :T
2691: 838.726 286.154 :M
2692: f3_10 sf
2693: (NP)S
2694: gR
2695: gR
2696: gS 472 192 10 8 rC
2697: gS
2698: .581 .601 scale
2699: 820.615 325.41 :T
2700: -820.615 -325.41 :T
2701: 814.615 329.41 :M
2702: f3_10 sf
2703: (NP)S
2704: gR
2705: gR
2706: gS 500 191 10 8 rC
2707: gS
2708: .581 .601 scale
2709: 868.837 323.747 :T
2710: -868.837 -323.747 :T
2711: 862.837 327.747 :M
2712: f3_10 sf
2713: (PP)S
2714: gR
2715: gR
2716: gS 304 129 234 107 rC
2717: gR
2718: gS 29 30 538 781 rC
2719: 461 166 :M
2720: 470.326 157.998 475 154 475 154 :C
2721: 475 154 476.993 155.831 481 159.5 :C
2722: 484.993 163.164 487 165 487 165 :C
2723: stroke
2724: 304 129 234 107 rC
2725: gR
2726: gS 29 30 538 781 rC
2727: 477 192 :M
2728: 485.659 179.997 490 174 490 174 :C
2729: 490 174 491.992 176.497 496 181.5 :C
2730: 499.992 186.497 502 189 502 189 :C
2731: stroke
2732: 446 166 11 7 rC
2733: gS
2734: .581 .601 scale
2735: 777.837 282.154 :T
2736: -777.837 -282.154 :T
2737: 769.837 286.154 :M
2738: f3_10 sf
2739: (she)S
2740: gR
2741: gR
2742: gS 304 129 234 107 rC
2743: gR
2744: gS 29 30 538 781 rC
2745: 465 213 :M
2746: 470.993 204.33 474 200 474 200 :C
2747: 474 200 475.493 201.997 478.5 206 :C
2748: 481.493 209.997 483 212 483 212 :C
2749: stroke
2750: 462 213 8 7 rC
2751: gS
2752: .581 .601 scale
2753: 802.392 360.348 :T
2754: -802.392 -360.348 :T
2755: 797.392 364.348 :M
2756: f3_10 sf
2757: (the)S
2758: gR
2759: gR
2760: gS 478 213 14 7 rC
2761: gS
2762: .581 .601 scale
2763: 834.226 360.348 :T
2764: -834.226 -360.348 :T
2765: 823.226 364.348 :M
2766: f3_10 sf
2767: (dress)S
2768: gR
2769: gR
2770: gS 304 129 234 107 rC
2771: -1 -1 462 185 1 1 461 174 @b
2772: 456 184 15 8 rC
2773: gS
2774: .581 .601 scale
2775: 798.059 312.101 :T
2776: -798.059 -312.101 :T
2777: 787.059 316.101 :M
2778: f3_10 sf
2779: (saw)S
2780: gR
2781: gR
2782: gS 492 208 7 8 rC
2783: gS
2784: .581 .601 scale
2785: 852.337 352.029 :T
2786: -852.337 -352.029 :T
2787: 847.337 356.029 :M
2788: f3_10 sf
2789: ( P)S
2790: gR
2791: gR
2792: gS 510 208 10 9 rC
2793: gS
2794: .581 .601 scale
2795: 887.059 353.029 :T
2796: -887.059 -353.029 :T
2797: 880.059 356.029 :M
2798: f3_10 sf
2799: (NP)S
2800: gR
2801: gR
2802: gS 304 129 234 107 rC
2803: -1 -1 495 227 1 1 494 216 @b
2804: gR
2805: gS 29 30 538 781 rC
2806: 494 208 :M
2807: 499.992 201.997 503 199 503 199 :C
2808: 503 199 504.492 200.497 507.5 203.5 :C
2809: 510.492 206.497 512 208 512 208 :C
2810: stroke
2811: 304 129 234 107 rC
2812: gR
2813: gS 29 30 538 781 rC
2814: 505 226 :M
2815: 510.992 218.663 514 215 514 215 :C
2816: 514 215 515.325 216.83 518 220.5 :C
2817: 520.659 224.163 522 226 522 226 :C
2818: stroke
2819: 503 226 9 9 rC
2820: gS
2821: .581 .601 scale
2822: 874.004 382.976 :T
2823: -874.004 -382.976 :T
2824: 868.004 385.976 :M
2825: f3_10 sf
2826: (the)S
2827: gR
2828: gR
2829: gS 490 226 14 8 rC
2830: gS
2831: .581 .601 scale
2832: 854.892 381.976 :T
2833: -854.892 -381.976 :T
2834: 843.892 385.976 :M
2835: f3_10 sf
2836: (with)S
2837: gR
2838: gR
2839: gS 515 226 22 8 rC
2840: gS
2841: .581 .601 scale
2842: 904.948 381.976 :T
2843: -904.948 -381.976 :T
2844: 886.948 385.976 :M
2845: f3_10 sf
2846: (telescope)S
2847: gR
2848: gR
2849: gS 342 130 5 12 rC
2850: gS
2851: .581 .601 scale
2852: 592.003 226.253 :T
2853: -592.003 -226.253 :T
2854: 589.003 231.253 :M
2855: f1_14 sf
2856: <B0>S
2857: gR
2858: gR
2859: gS 382 130 5 12 rC
2860: gS
2861: .581 .601 scale
2862: 660.892 226.253 :T
2863: -660.892 -226.253 :T
2864: 657.892 231.253 :M
2865: f1_14 sf
2866: <B0>S
2867: gR
2868: gR
2869: gS 29 30 538 781 rC
2870: 306 250 :M
2871: f3_9 sf
2872: .593 .059(Figure 4. Different derivation yielding a different parse tree)J
2873: 345 262 :M
2874: -.247(for )A
2875: 358 262 :M
2876: f4_9 sf
2877: .266 .027(She saw the dress with the telescope)J
2878: 304 280 :M
2879: f3_10 sf
2880: 1.852 .185(The probabilistic and non-probabilistic DOP models)J
2881: 304 293 :M
2882: 1.367 .137(differ in the way they define the best parse tree of a)J
2883: 304 306 :M
2884: 1.078 .108(sentence. We now discuss these models separately.)J
2885: 304 328 :M
2886: f0_10 sf
2887: 1.127 .113(2.1  The probabilistic)J
2888: f3_10 sf
2889: .147 .015( )J
2890: f0_10 sf
2891: 1.787 .179(DOP model)J
2892: 304 341 :M
2893: f3_10 sf
2894: 3.391 .339(The probabilistic DOP model introduced in Bod)J
2895: 304 354 :M
2896: .883 .088(\(1992, 93\) computes the most probable parse tree of a)J
2897: 304 367 :M
2898: 1.747 .175(sentence from the normalized subtree frequencies in)J
2899: 304 380 :M
2900: 1.036 .104(the corpus. The probability of a subtree )J
2901: f4_10 sf
2902: .2(t)A
2903: f3_10 sf
2904: 1.124 .112( is estimated)J
2905: 304 393 :M
2906: 1.203 .12(as the number of occurrences of )J
2907: f4_10 sf
2908: .236(t)A
2909: f3_10 sf
2910: 1.105 .111( seen in the corpus,)J
2911: 304 406 :M
2912: .992 .099(divided by the total number of occurrences of corpus-)J
2913: 304 419 :M
2914: 1.978 .198(subtrees that have the same root label as )J
2915: f4_10 sf
2916: .411(t)A
2917: f3_10 sf
2918: 1.337 .134(. Let |\312)J
2919: f4_10 sf
2920: .39<74CA>A
2921: f3_10 sf
2922: (|)S
2923: 304 432 :M
2924: 1.877 .188(return the number of occurrences of )J
2925: f4_10 sf
2926: .343(t)A
2927: f3_10 sf
2928: 1.554 .155( in the corpus)J
2929: 304 445 :M
2930: 4.193 .419(and let )J
2931: f4_10 sf
2932: 1.551(r)A
2933: f3_10 sf
2934: 1.327<28>A
2935: f4_10 sf
2936: 1.108(t)A
2937: f3_10 sf
2938: 4.506 .451(\) return the root label of )J
2939: f4_10 sf
2940: 1.108(t)A
2941: f3_10 sf
2942: 5.982 .598( then:)J
2943: 304 461 :M
2944: f4_10 sf
2945: .331(P)A
2946: f3_10 sf
2947: .18<28>A
2948: f4_10 sf
2949: .15(t)A
2950: f3_10 sf
2951: .167<29CA3DCA7CCA>A
2952: f4_10 sf
2953: .143<74CA>A
2954: f3_10 sf
2955: .132<7CCA2FCA>A
2956: f1_12 sf
2957: .384(S)A
2958: f4_9 sf
2959: 0 2 rm
2960: .12(t')A
2961: 0 -2 rm
2962: f3_9 sf
2963: 0 2 rm
2964: (:)S
2965: 0 -2 rm
2966: 368 463 :M
2967: f4_9 sf
2968: (r)S
2969: 372 463 :M
2970: f3_9 sf
2971: -.17<28>A
2972: f4_9 sf
2973: -.251(t')A
2974: 380 463 :M
2975: f3_9 sf
2976: -.071(\)=)A
2977: 389 463 :M
2978: f4_9 sf
2979: -.267(r)A
2980: f3_9 sf
2981: <28>S
2982: 396 463 :M
2983: f4_9 sf
2984: (t)S
2985: 399 463 :M
2986: f3_9 sf
2987: .627<29CA>A
2988: f3_10 sf
2989: 0 -2 rm
2990: .537<7CCA>A
2991: 0 2 rm
2992: f4_10 sf
2993: 0 -2 rm
2994: .591<7427CA>A
2995: 0 2 rm
2996: f3_10 sf
2997: 0 -2 rm
2998: .537(|.)A
2999: 0 2 rm
3000: f3_7 sf
3001: 0 -5 rm
3002: .836(2)A
3003: 0 5 rm
3004: f3_10 sf
3005: 0 -2 rm
3006: 3.273 .327( The probability of a)J
3007: 0 2 rm
3008: 304 475 :M
3009: 4.622 .462(derivation is computed as the product of the)J
3010: 304 488 :M
3011: 3.46 .346(probabilities of the subtrees involved in it. The)J
3012: 304 501 :M
3013: 1.226 .123(probability of a parse tree is computed as the sum of)J
3014: 304 514 :M
3015: 4.001 .4(the probabilities of all distinct derivations that)J
3016: 304 527 :M
3017: 2.544 .254(produce that tree. The parse tree with the highest)J
3018: -4096 -4095 -1 1 -4094 -4095 1 -4096 -4096 @a
3019: 304 546.24 -.24 .24 447.24 546 .24 304 546 @a
3020: 304 558 :M
3021: f3_7 sf
3022: .236(2)A
3023: f3_9 sf
3024: 0 3 rm
3025: .84 .084( It should be stressed that there may be several other ways)J
3026: 0 -3 rm
3027: 304 573 :M
3028: 2.505 .25(to estimate subtree probabilities in DOP. For example,)J
3029: 304 585 :M
3030: 1.086 .109(Bonnema et al. \(1999\) estimate the probability of a subtree)J
3031: 304 597 :M
3032: .811 .081(as the probability that it has been involved in the derivation)J
3033: 304 609 :M
3034: .736 .074(of a corpus tree. It is not yet known whether this alternative)J
3035: 304 621 :M
3036: 1.512 .151(probability model outperforms the )J
3037: 442 621 :M
3038: 2.349 .235(model in Bod \(1993\).)J
3039: 304 633 :M
3040: 1.84 .184(Johnson \(1998\) pointed out that the subtree )J
3041: 484 633 :M
3042: 2.03 .203(estimator in)J
3043: 304 645 :M
3044: 1.169 .117(Bod \(1993\) )J
3045: 352 645 :M
3046: 1.931 .193(yields a statistically inconsistent model. This)J
3047: 304 657 :M
3048: 5.286 .529(means that as the training corpus increases the)J
3049: 304 669 :M
3050: 1.059 .106(corresponding sequences of probability distributions )J
3051: 507 669 :M
3052: 1.5 .15(do not)J
3053: 304 681 :M
3054: .491 .049(converge to )J
3055: 351 681 :M
3056: 1.256 .126(the true distribution that generated the training)J
3057: 304 693 :M
3058: 1.763 .176(data. Experiments with a )J
3059: 408 693 :M
3060: f4_9 sf
3061: .112(consistent)A
3062: 446 693 :M
3063: f3_9 sf
3064: 2.238 .224( maximum likelihood)J
3065: 304 705 :M
3066: .905 .09(estimator \(based on the inside-outside algorithm in Lari and)J
3067: 304 717 :M
3068: 1.957 .196(Young 1990\), leads however to a significant decrease )J
3069: 526 717 :M
3070: (in)S
3071: 304 729 :M
3072: 3.074 .307(parse accuracy on the ATIS and OVIS corpora. This)J
3073: 304 741 :M
3074: 2.061 .206(indicates that statistical consistency does not )J
3075: 490 741 :M
3076: .202(necessarily)A
3077: 304 753 :M
3078: .725 .072(lead to better performance.)J
3079: endp
3080: %%Page: 3 3
3081: %%BeginPageSetup
3082: initializepage
3083: (rens; page: 3 of 7)setjob
3084: %%EndPageSetup
3085: -29 -30 :T
3086: gS 29 30 538 781 rC
3087: 62 95 :M
3088: f3_10 sf
3089: 2.84 .284(probability is defined as the best parse tree of a)J
3090: 62 108 :M
3091: .26(sentence.)A
3092: 97 121 :M
3093: 1.708 .171(The probabilistic DOP model thus considers)J
3094: 62 134 :M
3095: 3.582 .358(counts of subtrees of a wide range of sizes in)J
3096: 62 147 :M
3097: 1.563 .156(computing the probability of a tree: everything from)J
3098: 62 160 :M
3099: .757 .076(counts of single-level rules to counts of entire trees.)J
3100: 62 182 :M
3101: f0_10 sf
3102: 1.186 .119(2.2  The non-probabilistic)J
3103: f3_10 sf
3104: .137 .014( )J
3105: f0_10 sf
3106: 1.667 .167(DOP model)J
3107: 62 195 :M
3108: f3_10 sf
3109: 3.767 .377(The non-probabilistic DOP model uses a rather)J
3110: 62 208 :M
3111: 1.662 .166(different definition of the best parse tree. Instead of)J
3112: 62 221 :M
3113: 1.711 .171(computing the most probable parse of a sentence, it)J
3114: 62 234 :M
3115: 1.935 .194(computes the parse tree which can be generated by)J
3116: 62 247 :M
3117: 3.835 .383(the fewest corpus-subtrees, i.e., by the shortest)J
3118: 62 260 :M
3119: 2.395 .239(derivation independent of the subtree probabilities.)J
3120: 62 273 :M
3121: 1.142 .114(Since subtrees are allowed to be of arbitrary size, the)J
3122: 62 286 :M
3123: 1.502 .15(shortest derivation typically corresponds to the parse)J
3124: 62 299 :M
3125: 3.304 .33(tree which consists of )J
3126: f4_10 sf
3127: .924(largest)A
3128: f3_10 sf
3129: 4.382 .438( possible corpus-)J
3130: 62 312 :M
3131: 3.551 .355(subtrees, thus maximizing syntactic context. For)J
3132: 62 325 :M
3133: 1.214 .121(example, given the corpus in Figure 1, the best parse)J
3134: 62 338 :M
3135: .908 .091(tree for )J
3136: f4_10 sf
3137: 1.256 .126(She saw the dress with the telescope)J
3138: f3_10 sf
3139: 1.007 .101( is given)J
3140: 62 351 :M
3141: 1.165 .117(in Figure 3, since that parse tree can be generated by)J
3142: 62 364 :M
3143: 2.062 .206(a derivation of only two corpus-subtrees, while the)J
3144: 62 377 :M
3145: 2.305 .23(parse tree in Figure 4 needs at least three corpus-)J
3146: 62 390 :M
3147: 1.16 .116(subtrees to be generated. \(Interestingly, the parse tree)J
3148: 62 403 :M
3149: 2.127 .213(with the shortest derivation in Figure 3 is also the)J
3150: 62 416 :M
3151: 2.301 .23(most probable parse tree according to probabilistic)J
3152: 62 429 :M
3153: 1.343 .134(DOP for this corpus, but this need not always be so.)J
3154: 62 442 :M
3155: 3.525 .352(As mentioned, the probabilistic DOP model has)J
3156: 62 455 :M
3157: 1.568 .157(already a bias to assign higher probabilities to parse)J
3158: 62 468 :M
3159: .875 .087(trees that can be generated by shorter derivations. The)J
3160: 62 481 :M
3161: 5.011 .501(non-probabilistic DOP model makes this bias)J
3162: 62 494 :M
3163: .205(absolute.\))A
3164: 97 507 :M
3165: 1.169 .117(The shortest derivation may not be unique: it)J
3166: 62 520 :M
3167: 2.18 .218(may happen that different parses of a sentence are)J
3168: 62 533 :M
3169: 2.301 .23(generated by the same minimal number of corpus-)J
3170: 62 546 :M
3171: 3.372 .337(subtrees. In that case the model backs off to a)J
3172: 62 559 :M
3173: 3.706 .371(frequency ordering of the subtrees. That is, all)J
3174: 62 572 :M
3175: 3.509 .351(subtrees of each root label are assigned a rank)J
3176: 62 585 :M
3177: 1.635 .163(according to their frequency in the corpus: the most)J
3178: 62 598 :M
3179: 1.793 .179(frequent subtree \(or subtrees\) of each root label get)J
3180: 62 611 :M
3181: 1.215 .121(rank 1, the second most frequent subtree gets rank 2,)J
3182: 62 624 :M
3183: 2.314 .231(etc. Next, the rank of each \(shortest\) derivation is)J
3184: 62 637 :M
3185: 2.434 .243(computed as the sum of the ranks of the subtrees)J
3186: 62 650 :M
3187: 2.241 .224(involved. The derivation with the smallest sum, or)J
3188: 62 663 :M
3189: .955 .095(highest rank, is taken as the best derivation producing)J
3190: 62 676 :M
3191: .939 .094(the best parse tree.)J
3192: 97 689 :M
3193: .936 .094(The way we compute the rank of a derivation)J
3194: 62 702 :M
3195: 1.906 .191(by summing up the ranks of its subtrees may seem)J
3196: 62 715 :M
3197: 1.696 .17(rather ad hoc. However, it is possible to provide an)J
3198: 62 728 :M
3199: 3.185 .318(information-theoretical motivation for this model.)J
3200: 62 741 :M
3201: 1.288 .129(According to Zipf's law, rank is roughly proportional)J
3202: 304 95 :M
3203: .922 .092(to the negative logarithm of frequency \(Zipf 1935\). In)J
3204: 304 108 :M
3205: 2.249 .225(Shannon's Information Theory \(Shannon 1948\), the)J
3206: 304 121 :M
3207: .889 .089(negative logarithm \(of base 2\) of the probability of an)J
3208: 304 134 :M
3209: .708 .071(event is better known as the )J
3210: f4_10 sf
3211: .238(information)A
3212: f3_10 sf
3213: .691 .069( of that event.)J
3214: 304 147 :M
3215: 1.147 .115(Thus, the rank of a subtree is roughly proportional to)J
3216: 304 160 :M
3217: 1.085 .109(its information. It follows that minimizing the sum of)J
3218: 304 173 :M
3219: 3.326 .333(the subtree ranks in a derivation corresponds to)J
3220: 304 186 :M
3221: .883 .088(minimizing the \(self-\)information of a derivation.)J
3222: 304 210 :M
3223: f0_12 sf
3224: .253 .025(3.  Computational Aspects)J
3225: 304 228 :M
3226: f0_10 sf
3227: 1.097 .11(3.1  Computing the most probable parse)J
3228: 304 241 :M
3229: f3_10 sf
3230: 3.75 .375(Bod \(1993\) showed how standard chart parsing)J
3231: 304 254 :M
3232: 1.218 .122(techniques can be applied to probabilistic DOP. Each)J
3233: 304 267 :M
3234: 1.43 .143(corpus-subtree )J
3235: f4_10 sf
3236: .162(t)A
3237: f3_10 sf
3238: .788 .079( is converted into a context-free rule )J
3239: f4_10 sf
3240: (r)S
3241: 304 280 :M
3242: f3_10 sf
3243: 1.577 .158(where the lefthand side of )J
3244: f4_10 sf
3245: .444(r)A
3246: f3_10 sf
3247: 1.711 .171( corresponds to the root)J
3248: 304 293 :M
3249: 1.272 .127(label of )J
3250: f4_10 sf
3251: .313(t)A
3252: f3_10 sf
3253: 1.351 .135( and the righthand side of )J
3254: f4_10 sf
3255: .438(r)A
3256: f3_10 sf
3257: 2.023 .202( corresponds to)J
3258: 304 306 :M
3259: 1.848 .185(the frontier labels of )J
3260: f4_10 sf
3261: .386(t)A
3262: f3_10 sf
3263: 1.752 .175(. Indices link the rules to the)J
3264: 304 319 :M
3265: 3.379 .338(original subtrees so as to maintain the subtree's)J
3266: 304 332 :M
3267: .826 .083(internal structure and probability. These rules are used)J
3268: 304 345 :M
3269: 1.863 .186(to create a derivation forest for a sentence, and the)J
3270: 304 358 :M
3271: 3.313 .331(most probable parse is computed by sampling a)J
3272: 304 371 :M
3273: 1.28 .128(sufficiently large number of random derivations from)J
3274: 304 384 :M
3275: 2.28 .228(the forest \("Monte Carlo disambiguation", see Bod)J
3276: 304 397 :M
3277: 4.192 .419(1998; Chappelier & Rajman 2000\). While this)J
3278: 304 410 :M
3279: 2.423 .242(technique has been successfully applied to parsing)J
3280: 304 423 :M
3281: 1.007 .101(the ATIS portion in the Penn Treebank \(Marcus et al.)J
3282: 304 436 :M
3283: 1.03 .103(1993\), it is extremely time consuming. This is mainly)J
3284: 304 449 :M
3285: .92 .092(because the number of random derivations that should)J
3286: 304 462 :M
3287: 2.496 .25(be sampled to reliably estimate the most probable)J
3288: 304 475 :M
3289: 4.055 .405(parse increases exponentially with the sentence)J
3290: 304 488 :M
3291: 1.494 .149(length \(see Goodman 1998\). It is therefore question-)J
3292: 304 501 :M
3293: 1.288 .129(able whether Bod's sampling technique can be scaled)J
3294: 304 514 :M
3295: 2.677 .268(to larger corpora such as the OVIS and the WSJ)J
3296: 304 527 :M
3297: (corpora.)S
3298: 339 540 :M
3299: 2.02 .202(Goodman \(1998\) showed how the probabil-)J
3300: 304 553 :M
3301: 3.713 .371(istic DOP model can be reduced to a compact)J
3302: 304 566 :M
3303: 4.589 .459(stochastic context-free grammar \(SCFG\) which)J
3304: 304 579 :M
3305: 1.874 .187(contains exactly eight SCFG rules for each node in)J
3306: 304 592 :M
3307: 1.49 .149(the training set trees. Although Goodman's reduction)J
3308: 304 605 :M
3309: 5.113 .511(method does still not allow for an efficient)J
3310: 304 618 :M
3311: 1.802 .18(computation of the most probable parse in DOP \(in)J
3312: 304 631 :M
3313: 2.541 .254(fact, the problem of computing the most probable)J
3314: 304 644 :M
3315: 2.161 .216(parse is NP-hard -- see Sima'an 1996\), his method)J
3316: 304 657 :M
3317: 3.915 .392(does allow for an efficient computation of the)J
3318: 304 670 :M
3319: 1.002 .1("maximum constituents parse", i.e., the parse tree that)J
3320: 304 683 :M
3321: 1.735 .174(is most likely to have the largest number of correct)J
3322: 304 696 :M
3323: 2.085 .208(constituents \(also called the "labeled recall parse"\).)J
3324: 304 709 :M
3325: 2.486 .249(Goodman has shown on the ATIS corpus that the)J
3326: 304 722 :M
3327: 1.15 .115(maximum constituents parse performs at least as well)J
3328: 304 735 :M
3329: 1.953 .195(as the most probable parse if all subtrees are used.)J
3330: 304 748 :M
3331: 1.691 .169(Unfortunately, Goodman's reduction method remains)J
3332: endp
3333: %%Page: 4 4
3334: %%BeginPageSetup
3335: initializepage
3336: (rens; page: 4 of 7)setjob
3337: %%EndPageSetup
3338: -29 -30 :T
3339: gS 29 30 538 781 rC
3340: 62 95 :M
3341: f3_10 sf
3342: 2.348 .235(beneficial only if indeed )J
3343: f4_10 sf
3344: .545(all)A
3345: f3_10 sf
3346: 2.641 .264( treebank subtrees are)J
3347: 62 108 :M
3348: 1.425 .143(used \(see Sima'an 1999: 108\), while maximum parse)J
3349: 62 121 :M
3350: 3.11 .311(accuracy is typically obtained with a subtree set)J
3351: 62 134 :M
3352: 1.202 .12(which is smaller than the total set of subtrees \(this is)J
3353: 62 147 :M
3354: 4.476 .448(probably due to data-sparseness effects -- see)J
3355: 62 160 :M
3356: .824 .082(Bonnema et al. 1997; Bod 1998; Sima'an 1999\).)J
3357: 97 173 :M
3358: .756 .076(In this paper we will use Bod's subtree-to-rule)J
3359: 62 186 :M
3360: 3.572 .357(conversion method for studying the behavior of)J
3361: 62 199 :M
3362: 4.619 .462(probabilistic against non-probabilistic DOP for)J
3363: 62 212 :M
3364: 1.933 .193(different maximum subtree sizes. However, we will)J
3365: 62 225 :M
3366: 1.589 .159(not use Bod's Monte Carlo sampling technique from)J
3367: 62 238 :M
3368: 2.206 .221(complete derivation forests, as this turns out to be)J
3369: 62 251 :M
3370: 2.422 .242(computationally impractical for our larger corpora.)J
3371: 62 264 :M
3372: 1.368 .137(Instead, we use a Viterbi )J
3373: f4_10 sf
3374: .512(n)A
3375: f3_10 sf
3376: 1.899 .19(-best search and estimate)J
3377: 62 277 :M
3378: .892 .089(the most probable parse from the 1,000 most probable)J
3379: 62 290 :M
3380: 1.538 .154(derivations, summing up the probabilities of derivat-)J
3381: 62 303 :M
3382: 2.012 .201(ions that generate the same tree. The algorithm for)J
3383: 62 316 :M
3384: 4.653 .465(computing )J
3385: f4_10 sf
3386: 1.028(n)A
3387: f3_10 sf
3388: 3.91 .391( most probable derivations follows)J
3389: 62 329 :M
3390: 1.28 .128(straightforwardly from the algorithm which computes)J
3391: 62 342 :M
3392: 2.66 .266(the most probable derivation by means of Viterbi)J
3393: 62 355 :M
3394: 1.2 .12(optimization \(see Sima'an 1995, 1999\).)J
3395: 62 377 :M
3396: f0_10 sf
3397: .988 .099(3.2  Computing the shortest derivation)J
3398: 62 390 :M
3399: f3_10 sf
3400: .885 .088(As with the probabilistic DOP model, we first convert)J
3401: 62 403 :M
3402: 3.527 .353(the corpus-subtrees into rewrite rules. Next, the)J
3403: 62 416 :M
3404: 1.263 .126(shortest derivation can be computed in the same way)J
3405: 62 429 :M
3406: 2.306 .231(as the most probable derivation \(by Viterbi\) if we)J
3407: 62 442 :M
3408: 1.938 .194(give all rules equal probabilities, in which case the)J
3409: 62 455 :M
3410: 3.06 .306(shortest derivation is equal to the most probable)J
3411: 62 468 :M
3412: 1.587 .159(derivation. This can be seen as follows: if each rule)J
3413: 62 481 :M
3414: .817 .082(has a probability )J
3415: f4_10 sf
3416: .284(p)A
3417: f3_10 sf
3418: .837 .084( then the probability of a derivation)J
3419: 62 495 :M
3420: 1.819 .182(involving )J
3421: f4_10 sf
3422: .445(n)A
3423: f3_10 sf
3424: .878 .088( rules is equal to )J
3425: f4_10 sf
3426: .445(p)A
3427: f4_9 sf
3428: 0 -3 rm
3429: .401(n)A
3430: 0 3 rm
3431: f3_10 sf
3432: 1.126 .113(, and since 0<)J
3433: f4_10 sf
3434: .445(p)A
3435: f3_10 sf
3436: 1.506 .151(<1 the)J
3437: 62 508 :M
3438: 3.204 .32(derivation with the fewest rules has the greatest)J
3439: 62 521 :M
3440: 1.509 .151(probability. In our experiments, we gave each rule a)J
3441: 62 534 :M
3442: 1.22 .122(probability mass equal to 1/)J
3443: f4_10 sf
3444: .456(R)A
3445: f3_10 sf
3446: .851 .085(, where )J
3447: f4_10 sf
3448: .456(R)A
3449: f3_10 sf
3450: .989 .099( is the number)J
3451: 62 547 :M
3452: .689 .069(of distinct rules derived by Bod's method.)J
3453: 97 560 :M
3454: 1.842 .184(As mentioned above, the shortest derivation)J
3455: 62 573 :M
3456: 2.781 .278(may not be unique. In that case we compute )J
3457: f4_10 sf
3458: 1.095(all)A
3459: 62 586 :M
3460: f3_10 sf
3461: 1.37 .137(shortest derivations of a sentence and then apply our)J
3462: 62 599 :M
3463: 2.153 .215(ranking scheme to these derivations. Note that this)J
3464: 62 612 :M
3465: 1.399 .14(ranking scheme )J
3466: f4_10 sf
3467: .342(does)A
3468: f3_10 sf
3469: 1.35 .135( distinguish between subtrees or)J
3470: 62 625 :M
3471: 2.191 .219(different root labels, as it ranks the subtrees given)J
3472: 62 638 :M
3473: 1.37 .137(their root label. The ranks of the shortest derivations)J
3474: 62 651 :M
3475: 3.666 .367(are computed by summing up the ranks of the)J
3476: 62 664 :M
3477: .874 .087(subtrees they involve. The shortest derivation with the)J
3478: 62 677 :M
3479: 1.175 .118(smallest sum of subtree ranks is taken to produce the)J
3480: 62 691 :M
3481: .728 .073(best parse tree.)J
3482: f3_9 sf
3483: 0 -3 rm
3484: (3)S
3485: 0 3 rm
3486: -4096 -4095 -1 1 -4094 -4095 1 -4096 -4096 @a
3487: 62 702.24 -.24 .24 205.24 702 .24 62 702 @a
3488: 62 714 :M
3489: f3_7 sf
3490: -.457(3)A
3491: f3_9 sf
3492: 0 3 rm
3493: ( )S
3494: 0 -3 rm
3495: 68 717 :M
3496: 1.208 .121(It may happen that different shortest derivations generate)J
3497: 62 729 :M
3498: .968 .097(the same tree. We will not distinguish between these cases,)J
3499: 62 741 :M
3500: 1.045 .104(however, and compute only the shortest derivation with the)J
3501: 62 753 :M
3502: .234 .023(highest rank.)J
3503: 304 97 :M
3504: f0_12 sf
3505: -.012(4.  Experimental Comparison)A
3506: 304 115 :M
3507: f0_10 sf
3508: 1.167 .117(4.1  Experiments on the ATIS corpus)J
3509: 304 128 :M
3510: f3_10 sf
3511: 1.439 .144(For our first comparison, we used 10 splits from the)J
3512: 304 141 :M
3513: 1.454 .145(Penn ATIS corpus \(Marcus et al. 1993\) into training)J
3514: 304 154 :M
3515: 1.873 .187(sets of 675 sentences and test sets of 75 sentences.)J
3516: 304 167 :M
3517: 1.762 .176(These splits were random except for one constraint:)J
3518: 304 180 :M
3519: 1.427 .143(that all words in the test set actually occurred in the)J
3520: 304 193 :M
3521: 2.713 .271(training set. As in Bod \(1998\), we eliminated all)J
3522: 304 206 :M
3523: 1.365 .137(epsilon productions and all "pseudo-attachments". As)J
3524: 304 219 :M
3525: 1.62 .162(accuracy metric we used the exact match defined as)J
3526: 304 232 :M
3527: 3.863 .386(the percentage of the best parse trees that are)J
3528: 304 245 :M
3529: 1.546 .155(identical to the test set parses. Since the Penn ATIS)J
3530: 304 258 :M
3531: 1.571 .157(portion is relatively small, we were able to compute)J
3532: 304 271 :M
3533: 2.6 .26(the most probable parse both by means of Monte)J
3534: 304 284 :M
3535: .889 .089(Carlo sampling and by means of Viterbi )J
3536: f4_10 sf
3537: .297(n)A
3538: f3_10 sf
3539: 1.336 .134(-best. Table)J
3540: 304 297 :M
3541: 1.313 .131(1 shows the means of the exact match accuracies for)J
3542: 304 310 :M
3543: .902 .09(increasing maximum subtree depths \(up to depth 6\).)J
3544: 313 328 33 11 rC
3545: gS
3546: .751 .722 scale
3547: 439.19 461.848 :T
3548: -439.19 -461.848 :T
3549: 418.19 466.848 :M
3550: f3_12 sf
3551: (Depth of)S
3552: gR
3553: gR
3554: gS 367 328 67 12 rC
3555: gS
3556: .751 .722 scale
3557: 534.108 462.848 :T
3558: -534.108 -462.848 :T
3559: 490.108 466.848 :M
3560: f3_12 sf
3561: (Probabilistic DOP)S
3562: gR
3563: gR
3564: gS 440 328 85 21 rC
3565: gS
3566: .751 .722 scale
3567: 640.999 467.462 :T
3568: -640.999 -467.462 :T
3569: 585.999 465.462 :M
3570: f3_12 sf
3571: (    Non-probabilistic )S
3572: gR
3573: gS
3574: .751 .722 scale
3575: 640.999 467.462 :T
3576: -640.999 -467.462 :T
3577: 585.999 480.701 :M
3578: f3_12 sf
3579: (              DOP)S
3580: gR
3581: gR
3582: gS 313 339 31 11 rC
3583: gS
3584: .751 .722 scale
3585: 437.19 477.086 :T
3586: -437.19 -477.086 :T
3587: 418.19 482.086 :M
3588: f3_12 sf
3589: (subtrees)S
3590: gR
3591: gR
3592: gS 323 361 6 11 rC
3593: gS
3594: .751 .722 scale
3595: 434.508 507.563 :T
3596: -434.508 -507.563 :T
3597: 431.508 512.563 :M
3598: f3_12 sf
3599: (1)S
3600: gR
3601: gR
3602: gS 365 363 18 9 rC
3603: gS
3604: .751 .722 scale
3605: 497.445 509.563 :T
3606: -497.445 -509.563 :T
3607: 487.445 512.563 :M
3608: f3_12 sf
3609: (46.7)S
3610: gR
3611: gR
3612: gS 418 363 17 9 rC
3613: gS
3614: .751 .722 scale
3615: 568.031 509.563 :T
3616: -568.031 -509.563 :T
3617: 558.031 512.563 :M
3618: f3_12 sf
3619: (46.7)S
3620: gR
3621: gR
3622: gS 463 363 26 9 rC
3623: gS
3624: .751 .722 scale
3625: 633.963 509.563 :T
3626: -633.963 -509.563 :T
3627: 617.963 512.563 :M
3628: f3_12 sf
3629: (    24.8)S
3630: gR
3631: gR
3632: gS 321 372 11 11 rC
3633: gS
3634: .751 .722 scale
3635: 434.845 521.416 :T
3636: -434.845 -521.416 :T
3637: 428.845 526.416 :M
3638: f3_12 sf
3639: cF
3640: f1_12 sf
3641: <A3>S
3642: sf
3643: (2)S
3644: gR
3645: gR
3646: gS 365 374 18 9 rC
3647: gS
3648: .751 .722 scale
3649: 497.445 522.416 :T
3650: -497.445 -522.416 :T
3651: 487.445 526.416 :M
3652: f3_12 sf
3653: (67.5)S
3654: gR
3655: gR
3656: gS 418 374 17 9 rC
3657: gS
3658: .751 .722 scale
3659: 568.031 522.416 :T
3660: -568.031 -522.416 :T
3661: 558.031 526.416 :M
3662: f3_12 sf
3663: (67.5)S
3664: gR
3665: gR
3666: gS 463 375 26 8 rC
3667: gS
3668: .751 .722 scale
3669: 633.963 524.801 :T
3670: -633.963 -524.801 :T
3671: 617.963 527.801 :M
3672: f3_12 sf
3673: (    40.3)S
3674: gR
3675: gR
3676: gS 321 383 11 10 rC
3677: gS
3678: .751 .722 scale
3679: 434.845 536.654 :T
3680: -434.845 -536.654 :T
3681: 428.845 541.654 :M
3682: f3_12 sf
3683: cF
3684: f1_12 sf
3685: <A3>S
3686: sf
3687: (3)S
3688: gR
3689: gR
3690: gS 365 385 18 8 rC
3691: gS
3692: .751 .722 scale
3693: 497.445 538.654 :T
3694: -497.445 -538.654 :T
3695: 487.445 541.654 :M
3696: f3_12 sf
3697: (78.1)S
3698: gR
3699: gR
3700: gS 418 385 17 8 rC
3701: gS
3702: .751 .722 scale
3703: 568.031 538.654 :T
3704: -568.031 -538.654 :T
3705: 558.031 541.654 :M
3706: f3_12 sf
3707: (78.2)S
3708: gR
3709: gR
3710: gS 463 386 26 8 rC
3711: gS
3712: .751 .722 scale
3713: 633.963 539.039 :T
3714: -633.963 -539.039 :T
3715: 617.963 543.039 :M
3716: f3_12 sf
3717: (    57.1)S
3718: gR
3719: gR
3720: gS 321 393 11 11 rC
3721: gS
3722: .751 .722 scale
3723: 434.845 551.893 :T
3724: -434.845 -551.893 :T
3725: 428.845 556.893 :M
3726: f3_12 sf
3727: cF
3728: f1_12 sf
3729: <A3>S
3730: sf
3731: (4)S
3732: gR
3733: gR
3734: gS 365 396 18 8 rC
3735: gS
3736: .751 .722 scale
3737: 497.445 552.893 :T
3738: -497.445 -552.893 :T
3739: 487.445 556.893 :M
3740: f3_12 sf
3741: (83.6)S
3742: gR
3743: gR
3744: gS 418 396 17 8 rC
3745: gS
3746: .751 .722 scale
3747: 568.031 552.893 :T
3748: -568.031 -552.893 :T
3749: 558.031 556.893 :M
3750: f3_12 sf
3751: (83.0)S
3752: gR
3753: gR
3754: gS 463 396 26 8 rC
3755: gS
3756: .751 .722 scale
3757: 633.963 552.893 :T
3758: -633.963 -552.893 :T
3759: 617.963 556.893 :M
3760: f3_12 sf
3761: (    81.5)S
3762: gR
3763: gR
3764: gS 321 404 11 11 rC
3765: gS
3766: .751 .722 scale
3767: 434.845 567.131 :T
3768: -434.845 -567.131 :T
3769: 428.845 572.131 :M
3770: f3_12 sf
3771: cF
3772: f1_12 sf
3773: <A3>S
3774: sf
3775: (5)S
3776: gR
3777: gR
3778: gS 365 406 18 9 rC
3779: gS
3780: .751 .722 scale
3781: 497.445 569.131 :T
3782: -497.445 -569.131 :T
3783: 487.445 572.131 :M
3784: f3_12 sf
3785: (83.9)S
3786: gR
3787: gR
3788: gS 418 406 17 9 rC
3789: gS
3790: .751 .722 scale
3791: 568.031 569.131 :T
3792: -568.031 -569.131 :T
3793: 558.031 572.131 :M
3794: f3_12 sf
3795: (83.4)S
3796: gR
3797: gR
3798: gS 463 406 26 9 rC
3799: gS
3800: .751 .722 scale
3801: 633.963 569.131 :T
3802: -633.963 -569.131 :T
3803: 617.963 572.131 :M
3804: f3_12 sf
3805: (    83.6)S
3806: gR
3807: gR
3808: gS 321 415 11 11 rC
3809: gS
3810: .751 .722 scale
3811: 434.845 582.369 :T
3812: -434.845 -582.369 :T
3813: 428.845 587.369 :M
3814: f3_12 sf
3815: cF
3816: f1_12 sf
3817: <A3>S
3818: sf
3819: (6)S
3820: gR
3821: gR
3822: gS 365 417 18 9 rC
3823: gS
3824: .751 .722 scale
3825: 497.445 583.369 :T
3826: -497.445 -583.369 :T
3827: 487.445 587.369 :M
3828: f3_12 sf
3829: (84.1)S
3830: gR
3831: gR
3832: gS 418 417 17 9 rC
3833: gS
3834: .751 .722 scale
3835: 568.031 583.369 :T
3836: -568.031 -583.369 :T
3837: 558.031 587.369 :M
3838: f3_12 sf
3839: (84.0)S
3840: gR
3841: gR
3842: gS 463 417 26 9 rC
3843: gS
3844: .751 .722 scale
3845: 633.963 583.369 :T
3846: -633.963 -583.369 :T
3847: 617.963 587.369 :M
3848: f3_12 sf
3849: (    85.6)S
3850: gR
3851: gR
3852: gS 309 322 217 109 rC
3853: 311 324 -1 1 513 323 1 311 323 @a
3854: 311 358 -1 1 513 357 1 311 357 @a
3855: 311 431 -1 1 513 430 1 311 430 @a
3856: 355 342 46 9 rC
3857: gS
3858: .751 .722 scale
3859: 504.126 479.471 :T
3860: -504.126 -479.471 :T
3861: 474.126 483.471 :M
3862: f3_10 sf
3863: (Monte Carlo)S
3864: gR
3865: gR
3866: gS 406 341 51 10 rC
3867: gS
3868: .751 .722 scale
3869: 575.049 479.471 :T
3870: -575.049 -479.471 :T
3871: 542.049 483.471 :M
3872: f3_10 sf
3873: (Viterbi )S
3874: gR
3875: gS
3876: .751 .722 scale
3877: 575.049 479.471 :T
3878: -575.049 -479.471 :T
3879: 572.681 483.471 :M
3880: f4_10 sf
3881: (n)S
3882: gR
3883: gS
3884: .751 .722 scale
3885: 575.049 479.471 :T
3886: -575.049 -479.471 :T
3887: 578.008 483.471 :M
3888: f3_10 sf
3889: (-best)S
3890: gR
3891: gR
3892: gS 29 30 538 781 rC
3893: 304 449 :M
3894: f3_9 sf
3895: .65 .065(   Table 1. Exact match accuracies for the ATIS corpus)J
3896: 304 471 :M
3897: f3_10 sf
3898: .773 .077(The table shows that the two methods for probabilistic)J
3899: 304 484 :M
3900: .968 .097(DOP score roughly the same: at depth )J
3901: cF
3902: f1_10 sf
3903: .097<A3>A
3904: sf
3905: .968 .097( 6, the Monte)J
3906: 304 497 :M
3907: 1.261 .126(Carlo method obtains 84.1% while the Viterbi )J
3908: f4_10 sf
3909: .387(n)A
3910: f3_10 sf
3911: .376(-best)A
3912: 304 510 :M
3913: 3.069 .307(method obtains 84.0%. These differences are not)J
3914: 304 523 :M
3915: 1.401 .14(statistically significant. The table also shows that for)J
3916: 304 536 :M
3917: .976 .098(small subtree depths the non-probabilistic DOP model)J
3918: 304 549 :M
3919: 2.528 .253(performs considerably worse than the probabilistic)J
3920: 304 562 :M
3921: 2.3 .23(model. This may not be surprising since for small)J
3922: 304 575 :M
3923: 2.552 .255(subtrees the shortest derivation corresponds to the)J
3924: 304 588 :M
3925: 3.129 .313(smallest parse tree which is known to be a bad)J
3926: 304 601 :M
3927: 3.302 .33(prediction of the correct parse tree. Only if the)J
3928: 304 614 :M
3929: 1.19 .119(subtrees are larger than depth 4, the non-probabilistic)J
3930: 304 627 :M
3931: 5.402 .54(DOP model scores roughly the same as its)J
3932: 304 640 :M
3933: 2.165 .216(probabilistic counterpart. At subtree depth )J
3934: cF
3935: f1_10 sf
3936: .216<A3>A
3937: sf
3938: 2.165 .216( 6, the)J
3939: 304 653 :M
3940: 1.264 .126(non-probabilistic DOP model scores 1.5% better than)J
3941: 304 666 :M
3942: 1.231 .123(the best score of the probabilistic DOP model, which)J
3943: 304 679 :M
3944: .963 .096(is statistically significant according to paired )J
3945: f4_10 sf
3946: .161(t)A
3947: f3_10 sf
3948: .227(-tests.)A
3949: 304 701 :M
3950: f0_10 sf
3951: 1.155 .116(4.2  Experiments on the OVIS corpus)J
3952: 304 714 :M
3953: f3_10 sf
3954: 1.021 .102(For our comparison on the OVIS corpus \(Bonnema et)J
3955: 304 727 :M
3956: 1.418 .142(al. 1997; Bod 1998\) we again used 10 random splits)J
3957: 304 740 :M
3958: 2.758 .276(under the condition that all words in the test set)J
3959: 304 753 :M
3960: 3.422 .342(occurred in the training set \(9000 sentences for)J
3961: endp
3962: %%Page: 5 5
3963: %%BeginPageSetup
3964: initializepage
3965: (rens; page: 5 of 7)setjob
3966: %%EndPageSetup
3967: -29 -30 :T
3968: gS 29 30 538 781 rC
3969: 62 95 :M
3970: f3_10 sf
3971: 1.369 .137(training, 1000 sentences for testing\). The OVIS trees)J
3972: 62 108 :M
3973: 1.702 .17(contain both syntactic and semantic annotations, but)J
3974: 62 121 :M
3975: 1.209 .121(no epsilon productions. As in Bod \(1998\), we treated)J
3976: 62 134 :M
3977: 1.715 .172(the syntactic and semantic annotations of each node)J
3978: 62 147 :M
3979: 3.689 .369(as one label. Consequently, the labels are very)J
3980: 62 160 :M
3981: 3.697 .37(restrictive and collecting statistics over them is)J
3982: 62 173 :M
3983: 1.749 .175(difficult. Bonnema et al. \(1997\) and Sima'an \(1999\))J
3984: 62 186 :M
3985: 2.149 .215(report that \(probabilistic\) DOP suffers considerably)J
3986: 62 199 :M
3987: 1.129 .113(from data-sparseness on OVIS, yielding a decrease in)J
3988: 62 212 :M
3989: 2.525 .252(parse accuracy if subtrees larger than depth 4 are)J
3990: 62 225 :M
3991: .74 .074(included. Thus it is interesting to investigate how non-)J
3992: 62 238 :M
3993: 2.284 .228(probabilistic DOP behaves on this corpus. Table 2)J
3994: 62 251 :M
3995: 2.022 .202(shows the means of the exact match accuracies for)J
3996: 62 264 :M
3997: .985 .098(increasing subtree depths.)J
3998: 98 282 32 11 rC
3999: gS
4000: .729 .717 scale
4001: 155.822 400.809 :T
4002: -155.822 -400.809 :T
4003: 135.822 405.809 :M
4004: f3_12 sf
4005: (Depth of)S
4006: gR
4007: gR
4008: gS 98 293 30 11 rC
4009: gS
4010: .729 .717 scale
4011: 154.822 416.148 :T
4012: -154.822 -416.148 :T
4013: 135.822 421.148 :M
4014: f3_12 sf
4015: (subtrees)S
4016: gR
4017: gR
4018: gS 107 315 6 10 rC
4019: gS
4020: .729 .717 scale
4021: 150.169 445.434 :T
4022: -150.169 -445.434 :T
4023: 148.169 450.434 :M
4024: f3_12 sf
4025: (1)S
4026: gR
4027: gR
4028: gS 158 315 17 10 rC
4029: gS
4030: .729 .717 scale
4031: 228.138 445.434 :T
4032: -228.138 -445.434 :T
4033: 218.138 450.434 :M
4034: f3_12 sf
4035: (83.1)S
4036: gR
4037: gR
4038: gS 216 315 17 11 rC
4039: gS
4040: .729 .717 scale
4041: 307.71 446.828 :T
4042: -307.71 -446.828 :T
4043: 297.71 451.828 :M
4044: f3_12 sf
4045: (70.4)S
4046: gR
4047: gR
4048: gS 105 325 11 11 rC
4049: gS
4050: .729 .717 scale
4051: 151.425 460.773 :T
4052: -151.425 -460.773 :T
4053: 145.425 465.773 :M
4054: f3_12 sf
4055: cF
4056: f1_12 sf
4057: <A3>S
4058: sf
4059: (2)S
4060: gR
4061: gR
4062: gS 158 325 17 11 rC
4063: gS
4064: .729 .717 scale
4065: 228.138 460.773 :T
4066: -228.138 -460.773 :T
4067: 218.138 465.773 :M
4068: f3_12 sf
4069: (87.6)S
4070: gR
4071: gR
4072: gS 216 326 17 11 rC
4073: gS
4074: .729 .717 scale
4075: 307.71 462.168 :T
4076: -307.71 -462.168 :T
4077: 297.71 467.168 :M
4078: f3_12 sf
4079: (85.1)S
4080: gR
4081: gR
4082: gS 105 336 11 11 rC
4083: gS
4084: .729 .717 scale
4085: 151.425 476.113 :T
4086: -151.425 -476.113 :T
4087: 145.425 481.113 :M
4088: f3_12 sf
4089: cF
4090: f1_12 sf
4091: <A3>S
4092: sf
4093: (3)S
4094: gR
4095: gR
4096: gS 158 336 17 11 rC
4097: gS
4098: .729 .717 scale
4099: 228.138 476.113 :T
4100: -228.138 -476.113 :T
4101: 218.138 481.113 :M
4102: f3_12 sf
4103: (89.6)S
4104: gR
4105: gR
4106: gS 216 337 17 11 rC
4107: gS
4108: .729 .717 scale
4109: 307.71 477.508 :T
4110: -307.71 -477.508 :T
4111: 297.71 482.508 :M
4112: f3_12 sf
4113: (89.5)S
4114: gR
4115: gR
4116: gS 105 347 11 11 rC
4117: gS
4118: .729 .717 scale
4119: 151.425 491.453 :T
4120: -151.425 -491.453 :T
4121: 145.425 496.453 :M
4122: f3_12 sf
4123: cF
4124: f1_12 sf
4125: <A3>S
4126: sf
4127: (4)S
4128: gR
4129: gR
4130: gS 158 347 17 11 rC
4131: gS
4132: .729 .717 scale
4133: 228.138 491.453 :T
4134: -228.138 -491.453 :T
4135: 218.138 496.453 :M
4136: f3_12 sf
4137: (90.0)S
4138: gR
4139: gR
4140: gS 216 348 17 10 rC
4141: gS
4142: .729 .717 scale
4143: 307.71 491.453 :T
4144: -307.71 -491.453 :T
4145: 297.71 496.453 :M
4146: f3_12 sf
4147: (90.9)S
4148: gR
4149: gR
4150: gS 105 358 11 11 rC
4151: gS
4152: .729 .717 scale
4153: 151.425 505.398 :T
4154: -151.425 -505.398 :T
4155: 145.425 510.398 :M
4156: f3_12 sf
4157: cF
4158: f1_12 sf
4159: <A3>S
4160: sf
4161: (5)S
4162: gR
4163: gR
4164: gS 158 358 17 11 rC
4165: gS
4166: .729 .717 scale
4167: 228.138 505.398 :T
4168: -228.138 -505.398 :T
4169: 218.138 510.398 :M
4170: f3_12 sf
4171: (89.7)S
4172: gR
4173: gR
4174: gS 216 358 17 11 rC
4175: gS
4176: .729 .717 scale
4177: 307.71 506.793 :T
4178: -307.71 -506.793 :T
4179: 297.71 511.793 :M
4180: f3_12 sf
4181: (91.5)S
4182: gR
4183: gR
4184: gS 105 369 11 10 rC
4185: gS
4186: .729 .717 scale
4187: 151.425 520.738 :T
4188: -151.425 -520.738 :T
4189: 145.425 525.738 :M
4190: f3_12 sf
4191: cF
4192: f1_12 sf
4193: <A3>S
4194: sf
4195: (6)S
4196: gR
4197: gR
4198: gS 158 369 17 10 rC
4199: gS
4200: .729 .717 scale
4201: 228.138 520.738 :T
4202: -228.138 -520.738 :T
4203: 218.138 525.738 :M
4204: f3_12 sf
4205: (88.8)S
4206: gR
4207: gR
4208: gS 216 369 17 11 rC
4209: gS
4210: .729 .717 scale
4211: 307.71 522.133 :T
4212: -307.71 -522.133 :T
4213: 297.71 527.133 :M
4214: f3_12 sf
4215: (92.2)S
4216: gR
4217: gR
4218: gS 94 276 164 109 rC
4219: 95 278 -1 1 258 277 1 95 277 @a
4220: 95 311 -1 1 258 310 1 95 310 @a
4221: 95 385 -1 1 258 384 1 95 384 @a
4222: 144 285 66 18 rC
4223: gS
4224: .729 .717 scale
4225: 242.931 410.203 :T
4226: -242.931 -410.203 :T
4227: 198.931 407.203 :M
4228: f3_12 sf
4229: (Probabilistic )S
4230: gR
4231: gS
4232: .729 .717 scale
4233: 242.931 410.203 :T
4234: -242.931 -410.203 :T
4235: 198.931 418.359 :M
4236: f3_12 sf
4237: (      DOP)S
4238: gR
4239: gR
4240: gS 195 285 62 18 rC
4241: gS
4242: .729 .717 scale
4243: 308.527 410.203 :T
4244: -308.527 -410.203 :T
4245: 267.527 407.203 :M
4246: f3_12 sf
4247: (Non-probabilistic)S
4248: gR
4249: gS
4250: .729 .717 scale
4251: 308.527 410.203 :T
4252: -308.527 -410.203 :T
4253: 267.527 418.359 :M
4254: f3_12 sf
4255: (          DOP)S
4256: gR
4257: gR
4258: gS 29 30 538 781 rC
4259: 75 403 :M
4260: f3_9 sf
4261: .719 .072(Table 2. Exact match accuracies for the OVIS corpus)J
4262: 62 425 :M
4263: f3_10 sf
4264: 1.838 .184(We again see that the non-probabilistic DOP model)J
4265: 62 438 :M
4266: 3.088 .309(performs badly for small subtree depths while it)J
4267: 62 451 :M
4268: 3.678 .368(outperforms the probabilistic DOP model if the)J
4269: 62 464 :M
4270: 1.787 .179(subtrees get larger \(in this case for depth > 3\). But)J
4271: 62 477 :M
4272: 1.631 .163(while the accuracy of probabilistic DOP deteriorates)J
4273: 62 490 :M
4274: 1.496 .15(after depth 4, the accuracy of non-probabilistic DOP)J
4275: 62 503 :M
4276: 1.01 .101(continues to grow. Thus non-probabilistic DOP seems)J
4277: 62 516 :M
4278: 1.916 .192(relatively insensitive to the low frequency of larger)J
4279: 62 529 :M
4280: 1.046 .105(subtrees. This property may be especially useful if no)J
4281: 62 542 :M
4282: 5.796 .58(meaningful statistics can be collected while)J
4283: 62 555 :M
4284: 2.693 .269(sentences can still be parsed by large chunks. At)J
4285: 62 568 :M
4286: 1.534 .153(depth )J
4287: cF
4288: f1_10 sf
4289: .153<A3>A
4290: sf
4291: 1.534 .153( 6, non-probabilistic DOP scores 3.4% better)J
4292: 62 581 :M
4293: 1.559 .156(than probabilistic DOP, which is statistically signifi-)J
4294: 62 594 :M
4295: .731 .073(cant using paired )J
4296: f4_10 sf
4297: .138(t)A
4298: f3_10 sf
4299: .195(-tests.)A
4300: 62 616 :M
4301: f0_10 sf
4302: 1.155 .116(4.3  Experiments on the WSJ corpus)J
4303: 62 629 :M
4304: f3_10 sf
4305: 1.543 .154(Both the ATIS and OVIS corpus represent restricted)J
4306: 62 642 :M
4307: 1.966 .197(domains. In order to extend our results to a broad-)J
4308: 62 655 :M
4309: 1.737 .174(coverage domain, we tested the two models also on)J
4310: 62 668 :M
4311: 1.295 .13(the Wall Street Journal portion in the Penn Treebank)J
4312: 62 681 :M
4313: .886 .089(\(Marcus et al. 1993\).)J
4314: 97 694 :M
4315: .912 .091(To make our results comparable to others, we)J
4316: 62 707 :M
4317: 1.891 .189(did not test on different random splits but used the)J
4318: 62 720 :M
4319: 1.303 .13(now standard division of the WSJ with sections 2-21)J
4320: 62 733 :M
4321: .996 .1(for training \(approx. 40,000 sentences\) and section 23)J
4322: 62 746 :M
4323: 1.897 .19(for testing \(see Collins 1997, 1999; Charniak 1997,)J
4324: 304 95 :M
4325: .965 .097(2000; Ratnaparkhi 1999\); we only tested on sentences)J
4326: 304 108 :M
4327: cF
4328: f1_10 sf
4329: .132<A3>A
4330: sf
4331: 1.323 .132( 40 words \(2245 sentences\). All trees were stripped)J
4332: 304 121 :M
4333: 1.543 .154(off their semantic tags, co-reference information and)J
4334: 304 134 :M
4335: 1.348 .135(quotation marks. We used all training set subtrees of)J
4336: 304 147 :M
4337: 2.209 .221(depth 1, but due to memory limitations we used a)J
4338: 304 160 :M
4339: .665 .066(subset of the subtrees larger than depth 1 by taking for)J
4340: 304 173 :M
4341: 1.229 .123(each depth a random sample of 400,000 subtrees. No)J
4342: 304 186 :M
4343: .964 .096(subtrees larger than depth 14 were used. This resulted)J
4344: 304 199 :M
4345: 1.616 .162(in a set of 5,217,529 subtrees which were smoothed)J
4346: 304 212 :M
4347: .926 .093(by the technique described in Bod \(1996\). We did not)J
4348: 304 225 :M
4349: 3.169 .317(employ a separate part-of-speech tagger: the test)J
4350: 304 238 :M
4351: 2.665 .266(sentences were directly parsed by the training set)J
4352: 304 251 :M
4353: .885 .088(subtrees. For words that were unknown in the training)J
4354: 304 264 :M
4355: 2.846 .285(set, we guessed their categories by means of the)J
4356: 304 277 :M
4357: 1.66 .166(method described in Weischedel et al. \(1993\) which)J
4358: 304 290 :M
4359: 3.35 .335(uses statistics on word-endings, hyphenation and)J
4360: 304 303 :M
4361: 5.058 .506(capitalization. The guessed category for each)J
4362: 304 316 :M
4363: 1.555 .156(unknown word was converted into a depth-1 subtree)J
4364: 304 329 :M
4365: 2.687 .269(and assigned a probability \(or frequency for non-)J
4366: 304 342 :M
4367: .632 .063(probabilistic DOP\) by means of simple Good-Turing.)J
4368: 339 355 :M
4369: 3.036 .304(As accuracy metric we used the standard)J
4370: 304 368 :M
4371: f3_9 sf
4372: -.071(PARSEVAL)A
4373: 351 368 :M
4374: f3_10 sf
4375: 1.972 .197( scores \(Black et al. 1991\) to compare a)J
4376: 304 381 :M
4377: 3.164 .316(proposed parse )J
4378: f4_10 sf
4379: 1.078(P)A
4380: f3_10 sf
4381: 3.15 .315( with the corresponding correct)J
4382: 304 394 :M
4383: .898 .09(treebank parse )J
4384: f4_10 sf
4385: .289(T)A
4386: f3_10 sf
4387: .772 .077( as follows:)J
4388: 462 406 2 3 rC
4389: gS
4390: .759 .725 scale
4391: 610.33 562.145 :T
4392: -610.33 -562.145 :T
4393: 610.33 564.145 :M
4394: f4_4 sf
4395: ( )S
4396: gR
4397: gR
4398: gS 383 403 96 11 rC
4399: gS
4400: .759 .725 scale
4401: 568.192 561.904 :T
4402: -568.192 -561.904 :T
4403: 506.192 566.904 :M
4404: f3_12 sf
4405: (# correct constituents in )S
4406: gR
4407: gS
4408: .759 .725 scale
4409: 568.192 561.904 :T
4410: -568.192 -561.904 :T
4411: 623.512 566.904 :M
4412: f4_12 sf
4413: (P)S
4414: gR
4415: gR
4416: gS 396 419 73 11 rC
4417: gS
4418: .759 .725 scale
4419: 570.328 585.352 :T
4420: -570.328 -585.352 :T
4421: 523.328 590.352 :M
4422: f3_12 sf
4423: (# constituents in )S
4424: gR
4425: gS
4426: .759 .725 scale
4427: 570.328 585.352 :T
4428: -570.328 -585.352 :T
4429: 605.057 590.352 :M
4430: f4_12 sf
4431: (P  )S
4432: gR
4433: gR
4434: gS 304 402 176 29 rC
4435: 382 418 -1 1 477 417 1 382 417 @a
4436: 305 413 76 9 rC
4437: gS
4438: .759 .725 scale
4439: 452.371 574.938 :T
4440: -452.371 -574.938 :T
4441: 403.371 577.938 :M
4442: f3_12 sf
4443: (Labeled Precision =)S
4444: gR
4445: gR
4446: gS 305 447 63 9 rC
4447: gS
4448: .751 .725 scale
4449: 448.378 621.835 :T
4450: -448.378 -621.835 :T
4451: 407.378 624.835 :M
4452: f3_12 sf
4453: (Labeled Recall =)S
4454: gR
4455: gR
4456: gS 448 440 2 3 rC
4457: gS
4458: .751 .725 scale
4459: 597.755 609.042 :T
4460: -597.755 -609.042 :T
4461: 597.755 611.042 :M
4462: f4_4 sf
4463: ( )S
4464: gR
4465: gR
4466: gS 371 437 95 11 rC
4467: gS
4468: .751 .725 scale
4469: 557.244 608.801 :T
4470: -557.244 -608.801 :T
4471: 495.244 613.801 :M
4472: f3_12 sf
4473: (# correct constituents in )S
4474: gR
4475: gS
4476: .751 .725 scale
4477: 557.244 608.801 :T
4478: -557.244 -608.801 :T
4479: 612.399 613.801 :M
4480: f4_12 sf
4481: (P)S
4482: gR
4483: gR
4484: gS 384 453 70 11 rC
4485: gS
4486: .751 .725 scale
4487: 556.22 632.249 :T
4488: -556.22 -632.249 :T
4489: 511.22 637.249 :M
4490: f3_12 sf
4491: (# constituents in )S
4492: gR
4493: gS
4494: .751 .725 scale
4495: 556.22 632.249 :T
4496: -556.22 -632.249 :T
4497: 593.761 637.249 :M
4498: f4_12 sf
4499: (T )S
4500: gR
4501: gR
4502: gS 304 436 163 29 rC
4503: 371 452 -1 1 465 451 1 371 451 @a
4504: 365 445 4 11 rC
4505: gS
4506: .751 .725 scale
4507: 488.257 619.835 :T
4508: -488.257 -619.835 :T
4509: 487.257 624.835 :M
4510: f3_12 sf
4511: ( )S
4512: gR
4513: 366 451 2 4 rC
4514: gS
4515: .751 .725 scale
4516: 487.257 624.215 :T
4517: -487.257 -624.215 :T
4518: 487.257 626.215 :M
4519: f3_4 sf
4520: ( )S
4521: gR
4522: gR
4523: gS 29 30 538 781 rC
4524: 304 480 :M
4525: f3_10 sf
4526: 3.438 .344(A constituent in )J
4527: f4_10 sf
4528: 1.487(P)A
4529: f3_10 sf
4530: 2.999 .3( is "correct" if there exists a)J
4531: 304 493 :M
4532: 1.163 .116(constituent in )J
4533: f4_10 sf
4534: .4(T)A
4535: f3_10 sf
4536: .924 .092( of the same label that spans the same)J
4537: 304 506 :M
4538: 2.201 .22(words. As in other work, we collapsed ADVP and)J
4539: 304 519 :M
4540: 1.434 .143(PRT to the same label when calculating these scores)J
4541: 304 532 :M
4542: .91 .091(\(see Collins 1997; Ratnaparkhi 1999; Charniak 1997\).)J
4543: 339 545 :M
4544: 1.083 .108(Table 3 shows the labeled precision \(LP\) and)J
4545: 304 558 :M
4546: 1.689 .169(labeled recall \(LR\) scores for probabilistic and non-)J
4547: 304 571 :M
4548: 1.441 .144(probabilistic DOP for six different maximum subtree)J
4549: 304 584 :M
4550: .066(depths.)A
4551: 332 643 11 12 rC
4552: gS
4553: .74 .761 scale
4554: 456.229 851.766 :T
4555: -456.229 -851.766 :T
4556: 450.229 856.766 :M
4557: f3_12 sf
4558: cF
4559: f1_12 sf
4560: <A3>S
4561: sf
4562: (4)S
4563: gR
4564: gR
4565: gS 375 643 17 12 rC
4566: gS
4567: .74 .761 scale
4568: 517.014 851.766 :T
4569: -517.014 -851.766 :T
4570: 507.014 856.766 :M
4571: f3_12 sf
4572: (84.7)S
4573: gR
4574: gR
4575: gS 409 643 18 12 rC
4576: gS
4577: .74 .761 scale
4578: 564.336 851.766 :T
4579: -564.336 -851.766 :T
4580: 554.336 856.766 :M
4581: f3_12 sf
4582: (84.1)S
4583: gR
4584: gR
4585: gS 452 643 17 12 rC
4586: gS
4587: .74 .761 scale
4588: 622.473 851.766 :T
4589: -622.473 -851.766 :T
4590: 612.473 856.766 :M
4591: f3_12 sf
4592: (81.6)S
4593: gR
4594: gR
4595: gS 488 643 17 12 rC
4596: gS
4597: .74 .761 scale
4598: 671.147 851.766 :T
4599: -671.147 -851.766 :T
4600: 661.147 856.766 :M
4601: f3_12 sf
4602: (80.1)S
4603: gR
4604: gR
4605: gS 332 655 11 11 rC
4606: gS
4607: .74 .761 scale
4608: 456.229 867.534 :T
4609: -456.229 -867.534 :T
4610: 450.229 872.534 :M
4611: f3_12 sf
4612: cF
4613: f1_12 sf
4614: <A3>S
4615: sf
4616: (6)S
4617: gR
4618: gR
4619: gS 375 655 17 11 rC
4620: gS
4621: .74 .761 scale
4622: 517.014 867.534 :T
4623: -517.014 -867.534 :T
4624: 507.014 872.534 :M
4625: f3_12 sf
4626: (86.2)S
4627: gR
4628: gR
4629: gS 409 655 18 11 rC
4630: gS
4631: .74 .761 scale
4632: 564.336 867.534 :T
4633: -564.336 -867.534 :T
4634: 554.336 872.534 :M
4635: f3_12 sf
4636: (86.0)S
4637: gR
4638: gR
4639: gS 452 655 17 11 rC
4640: gS
4641: .74 .761 scale
4642: 622.473 867.534 :T
4643: -622.473 -867.534 :T
4644: 612.473 872.534 :M
4645: f3_12 sf
4646: (85.0)S
4647: gR
4648: gR
4649: gS 488 655 17 11 rC
4650: gS
4651: .74 .761 scale
4652: 671.147 867.534 :T
4653: -671.147 -867.534 :T
4654: 661.147 872.534 :M
4655: f3_12 sf
4656: (84.7)S
4657: gR
4658: gR
4659: gS 332 666 11 11 rC
4660: gS
4661: .74 .761 scale
4662: 456.229 881.989 :T
4663: -456.229 -881.989 :T
4664: 450.229 886.989 :M
4665: f3_12 sf
4666: cF
4667: f1_12 sf
4668: <A3>S
4669: sf
4670: (8)S
4671: gR
4672: gR
4673: gS 375 666 17 11 rC
4674: gS
4675: .74 .761 scale
4676: 517.014 881.989 :T
4677: -517.014 -881.989 :T
4678: 507.014 886.989 :M
4679: f3_12 sf
4680: (87.9)S
4681: gR
4682: gR
4683: gS 409 666 18 11 rC
4684: gS
4685: .74 .761 scale
4686: 564.336 881.989 :T
4687: -564.336 -881.989 :T
4688: 554.336 886.989 :M
4689: f3_12 sf
4690: (87.1)S
4691: gR
4692: gR
4693: gS 452 666 17 11 rC
4694: gS
4695: .74 .761 scale
4696: 622.473 881.989 :T
4697: -622.473 -881.989 :T
4698: 612.473 886.989 :M
4699: f3_12 sf
4700: (87.2)S
4701: gR
4702: gR
4703: gS 488 666 17 11 rC
4704: gS
4705: .74 .761 scale
4706: 671.147 881.989 :T
4707: -671.147 -881.989 :T
4708: 661.147 886.989 :M
4709: f3_12 sf
4710: (87.0)S
4711: gR
4712: gR
4713: gS 332 677 15 12 rC
4714: gS
4715: .74 .761 scale
4716: 459.229 897.758 :T
4717: -459.229 -897.758 :T
4718: 450.229 902.758 :M
4719: f3_12 sf
4720: cF
4721: f1_12 sf
4722: <A3>S
4723: sf
4724: (10)S
4725: gR
4726: gR
4727: gS 375 677 17 12 rC
4728: gS
4729: .74 .761 scale
4730: 517.014 897.758 :T
4731: -517.014 -897.758 :T
4732: 507.014 902.758 :M
4733: f3_12 sf
4734: (88.6)S
4735: gR
4736: gR
4737: gS 409 677 18 12 rC
4738: gS
4739: .74 .761 scale
4740: 564.336 897.758 :T
4741: -564.336 -897.758 :T
4742: 554.336 902.758 :M
4743: f3_12 sf
4744: (88.0)S
4745: gR
4746: gR
4747: gS 452 677 17 12 rC
4748: gS
4749: .74 .761 scale
4750: 622.473 897.758 :T
4751: -622.473 -897.758 :T
4752: 612.473 902.758 :M
4753: f3_12 sf
4754: (86.8)S
4755: gR
4756: gR
4757: gS 488 677 17 12 rC
4758: gS
4759: .74 .761 scale
4760: 671.147 897.758 :T
4761: -671.147 -897.758 :T
4762: 661.147 902.758 :M
4763: f3_12 sf
4764: (86.5)S
4765: gR
4766: gR
4767: gS 332 689 15 11 rC
4768: gS
4769: .74 .761 scale
4770: 459.229 912.212 :T
4771: -459.229 -912.212 :T
4772: 450.229 917.212 :M
4773: f3_12 sf
4774: cF
4775: f1_12 sf
4776: <A3>S
4777: sf
4778: (12)S
4779: gR
4780: gR
4781: gS 375 689 17 11 rC
4782: gS
4783: .74 .761 scale
4784: 517.014 912.212 :T
4785: -517.014 -912.212 :T
4786: 507.014 917.212 :M
4787: f3_12 sf
4788: (89.1)S
4789: gR
4790: gR
4791: gS 409 689 18 11 rC
4792: gS
4793: .74 .761 scale
4794: 564.336 912.212 :T
4795: -564.336 -912.212 :T
4796: 554.336 917.212 :M
4797: f3_12 sf
4798: (88.8)S
4799: gR
4800: gR
4801: gS 452 689 17 11 rC
4802: gS
4803: .74 .761 scale
4804: 622.473 912.212 :T
4805: -622.473 -912.212 :T
4806: 612.473 917.212 :M
4807: f3_12 sf
4808: (87.1)S
4809: gR
4810: gR
4811: gS 488 689 17 11 rC
4812: gS
4813: .74 .761 scale
4814: 671.147 912.212 :T
4815: -671.147 -912.212 :T
4816: 661.147 917.212 :M
4817: f3_12 sf
4818: (86.9)S
4819: gR
4820: gR
4821: gS 332 700 15 12 rC
4822: gS
4823: .74 .761 scale
4824: 459.229 926.667 :T
4825: -459.229 -926.667 :T
4826: 450.229 931.667 :M
4827: f3_12 sf
4828: cF
4829: f1_12 sf
4830: <A3>S
4831: sf
4832: (14)S
4833: gR
4834: gR
4835: gS 375 700 17 12 rC
4836: gS
4837: .74 .761 scale
4838: 517.014 926.667 :T
4839: -517.014 -926.667 :T
4840: 507.014 931.667 :M
4841: f3_12 sf
4842: (89.5)S
4843: gR
4844: gR
4845: gS 409 700 18 12 rC
4846: gS
4847: .74 .761 scale
4848: 564.336 926.667 :T
4849: -564.336 -926.667 :T
4850: 554.336 931.667 :M
4851: f3_12 sf
4852: (89.3)S
4853: gR
4854: gR
4855: gS 452 700 17 12 rC
4856: gS
4857: .74 .761 scale
4858: 622.473 926.667 :T
4859: -622.473 -926.667 :T
4860: 612.473 931.667 :M
4861: f3_12 sf
4862: (87.2)S
4863: gR
4864: gR
4865: gS 488 700 17 12 rC
4866: gS
4867: .74 .761 scale
4868: 671.147 926.667 :T
4869: -671.147 -926.667 :T
4870: 661.147 931.667 :M
4871: f3_12 sf
4872: (86.9)S
4873: gR
4874: gR
4875: gS 323 603 192 21 rC
4876: gS
4877: .74 .761 scale
4878: 566.06 806.203 :T
4879: -566.06 -806.203 :T
4880: 438.06 804.203 :M
4881: f3_12 sf
4882: (Depth of       Probabilistic DOP     Non-probabilistic)S
4883: gR
4884: gS
4885: .74 .761 scale
4886: 566.06 806.203 :T
4887: -566.06 -806.203 :T
4888: 438.06 818.658 :M
4889: f3_12 sf
4890: (\011\011\011\011\011                                                             DOP)S
4891: gR
4892: gR
4893: gS 324 617 30 12 rC
4894: gS
4895: .74 .761 scale
4896: 457.06 817.6 :T
4897: -457.06 -817.6 :T
4898: 438.06 822.6 :M
4899: f3_12 sf
4900: (subtrees)S
4901: gR
4902: gR
4903: gS 375 622 14 11 rC
4904: gS
4905: .74 .761 scale
4906: 515.014 824.17 :T
4907: -515.014 -824.17 :T
4908: 507.014 829.17 :M
4909: f3_12 sf
4910: ( LP)S
4911: gR
4912: gR
4913: gS 400 622 24 11 rC
4914: gS
4915: .74 .761 scale
4916: 556.167 824.17 :T
4917: -556.167 -824.17 :T
4918: 542.167 829.17 :M
4919: f3_12 sf
4920: (     LR)S
4921: gR
4922: gR
4923: gS 455 622 12 11 rC
4924: gS
4925: .74 .761 scale
4926: 623.529 824.17 :T
4927: -623.529 -824.17 :T
4928: 616.529 829.17 :M
4929: f3_12 sf
4930: (LP)S
4931: gR
4932: gR
4933: gS 478 622 24 11 rC
4934: gS
4935: .74 .761 scale
4936: 661.626 824.17 :T
4937: -661.626 -824.17 :T
4938: 647.626 829.17 :M
4939: f3_12 sf
4940: (     LR)S
4941: gR
4942: gR
4943: gS 320 596 196 121 rC
4944: 321 599 -1 1 510 598 1 321 598 @a
4945: 321 638 -1 1 510 637 1 321 637 @a
4946: 321 717 -1 1 510 716 1 321 716 @a
4947: gR
4948: gS 29 30 538 781 rC
4949: 307 735 :M
4950: f3_9 sf
4951: .703 .07(Table 3. Scores on the WSJ corpus \(sentences )J
4952: cF
4953: f1_9 sf
4954: .07<A3>A
4955: sf
4956: .703 .07( 40 words\))J
4957: endp
4958: %%Page: 6 6
4959: %%BeginPageSetup
4960: initializepage
4961: (rens; page: 6 of 7)setjob
4962: %%EndPageSetup
4963: -29 -30 :T
4964: gS 29 30 538 781 rC
4965: 62 95 :M
4966: f3_10 sf
4967: 1.759 .176(The table shows that probabilistic DOP outperforms)J
4968: 62 108 :M
4969: 1.147 .115(non-probabilistic DOP for maximum subtree depths 4)J
4970: 62 121 :M
4971: .787 .079(and 6, while the models yield rather similar results for)J
4972: 62 134 :M
4973: 1.148 .115(maximum subtree depth 8. Surprisingly, the scores of)J
4974: 62 147 :M
4975: 1.666 .167(non-probabilistic DOP deteriorate if the subtrees are)J
4976: 62 160 :M
4977: 2.832 .283(further enlarged, while the scores of probabilistic)J
4978: 62 173 :M
4979: 1.83 .183(DOP continue to grow, up to 89.5% LP and 89.3%)J
4980: 62 186 :M
4981: 2.434 .243(LR. These scores are higher than those of several)J
4982: 62 199 :M
4983: 1.412 .141(other parsers \(e.g. Collins 1997, 99; Charniak 1997\),)J
4984: 62 212 :M
4985: 1.091 .109(but remain behind the scores of Charniak \(2000\) who)J
4986: 62 225 :M
4987: 1.505 .151(obtains 90.1% LP and 90.1% LR for sentences )J
4988: cF
4989: f1_10 sf
4990: .151<A3>A
4991: sf
4992: 1.505 .151( 40)J
4993: 62 238 :M
4994: 1.51 .151(words. However, in Bod \(2000b\) we show that even)J
4995: 62 251 :M
4996: 1.195 .12(higher scores can be obtained with probabilistic DOP)J
4997: 62 264 :M
4998: 2.584 .258(by restricting the number of words in the subtree)J
4999: 62 277 :M
5000: 1.391 .139(frontiers to 12 and restricting the depth of unlexical-)J
5001: 62 290 :M
5002: 2.01 .201(ized subtrees to 6; with these restrictions an LP of)J
5003: 62 303 :M
5004: .676 .068(90.8% and an LR of 90.6% is achieved.)J
5005: 97 316 :M
5006: 1.402 .14(We may raise the question as to whether we)J
5007: 62 329 :M
5008: 1.133 .113(actually need these extremely large subtrees to obtain)J
5009: 62 342 :M
5010: 1.469 .147(our best results. One could argue that DOP's gain in)J
5011: 62 355 :M
5012: .943 .094(parse accuracy with increasing subtree depth is due to)J
5013: 62 368 :M
5014: 2.85 .285(the model becoming sensitive to the influence of)J
5015: 62 381 :M
5016: 2.267 .227(lexical heads higher in the tree, and that this gain)J
5017: 62 394 :M
5018: 1.992 .199(could also be achieved by a more compact depth-1)J
5019: 62 407 :M
5020: 3.343 .334(DOP model \(i.e. an SCFG\) which annotates the)J
5021: 62 420 :M
5022: 1.201 .12(nonterminals with headwords. However, such a head-)J
5023: 62 433 :M
5024: 3.766 .377(lexicalized stochastic grammar does not capture)J
5025: 62 446 :M
5026: 1.889 .189(dependencies between nonheadwords \(such as )J
5027: f4_10 sf
5028: .64(more)A
5029: 62 459 :M
5030: f3_10 sf
5031: 1.075 .108(and )J
5032: f4_10 sf
5033: .395(than)A
5034: f3_10 sf
5035: 1.195 .119( in the WSJ construction )J
5036: f4_10 sf
5037: 1.824 .182(carry more people)J
5038: 62 472 :M
5039: .746 .075(than cargo)J
5040: f3_10 sf
5041: .453 .045( where neither )J
5042: f4_10 sf
5043: .176(more)A
5044: f3_10 sf
5045: .251 .025( nor )J
5046: f4_10 sf
5047: .152(than)A
5048: f3_10 sf
5049: .627 .063( are headwords)J
5050: 62 485 :M
5051: 3.421 .342(of the NP-constituent )J
5052: f4_10 sf
5053: 3.724 .372(more people than cargo)J
5054: f3_10 sf
5055: 1.178(\),)A
5056: 62 498 :M
5057: 1.131 .113(whereas a frontier-lexicalized DOP model using large)J
5058: 62 511 :M
5059: 2.962 .296(subtrees does capture these dependencies since it)J
5060: 62 524 :M
5061: 1.12 .112(includes subtrees in which e.g. )J
5062: f4_10 sf
5063: .377(more)A
5064: f3_10 sf
5065: .57 .057( and )J
5066: f4_10 sf
5067: .991 .099(than )J
5068: f3_10 sf
5069: 1.234 .123(are the)J
5070: 62 537 :M
5071: .655 .066(only frontier words. In order to isolate the contribution)J
5072: 62 550 :M
5073: 3.424 .342(of nonheadword dependencies, we eliminated all)J
5074: 62 563 :M
5075: .736 .074(subtrees containing two or more nonheadwords \(where)J
5076: 62 576 :M
5077: 1.252 .125(a nonheadword of a subtree is a word which is not a)J
5078: 62 589 :M
5079: .805 .08(headword of the subtree's root nonterminal -- although)J
5080: 62 602 :M
5081: .804 .08(such a nonheadword may be a headword of one of the)J
5082: 62 615 :M
5083: 2.074 .207(subtree's internal nodes\). On the WSJ this led to a)J
5084: 62 628 :M
5085: 2.728 .273(decrease in LP/LR of 1.2%/1.0% for probabilistic)J
5086: 62 641 :M
5087: 1.691 .169(DOP. Thus nonheadword dependencies contribute to)J
5088: 62 654 :M
5089: 1.809 .181(higher parse accuracy, and should not be discarded.)J
5090: 62 667 :M
5091: 1.695 .169(This goes against common wisdom that the relevant)J
5092: 62 680 :M
5093: 1.585 .158(lexical dependencies can be restricted to the locality)J
5094: 62 693 :M
5095: 1.14 .114(of headwords of constituents \(as advocated in Collins)J
5096: 62 706 :M
5097: 1.044 .104(1999\). It also shows that DOP's frontier lexicalization)J
5098: 62 719 :M
5099: 2.656 .266(is a viable alternative to constituent lexicalization)J
5100: 62 732 :M
5101: 2.633 .263(\(as proposed in Charniak 1997; Collins 1997, 99;)J
5102: 62 745 :M
5103: 1.427 .143(Eisner 1997\). Moreover, DOP's use of large subtrees)J
5104: 304 95 :M
5105: 2.427 .243(makes the model not only more lexically but also)J
5106: 304 108 :M
5107: .942 .094(more structurally sensitive.)J
5108: 304 132 :M
5109: f0_12 sf
5110: .201 .02(5.  Conclusion)J
5111: 304 150 :M
5112: f3_10 sf
5113: 2.295 .23(Common wisdom has it that the bias of stochastic)J
5114: 304 163 :M
5115: 1.901 .19(grammars in favor of shorter derivations is harmful)J
5116: 304 176 :M
5117: 2.471 .247(and should be redressed. We have shown that the)J
5118: 304 189 :M
5119: 4.385 .439(common wisdom is wrong for stochastic tree-)J
5120: 304 202 :M
5121: 2.423 .242(substitution grammars that use elementary trees of)J
5122: 304 215 :M
5123: 1.515 .152(flexible size. For such grammars, a non-probabilistic)J
5124: 304 228 :M
5125: 1.232 .123(metric based on the shortest derivation outperforms a)J
5126: 304 241 :M
5127: 1.66 .166(probabilistic metric on the ATIS and OVIS corpora,)J
5128: 304 254 :M
5129: 3.324 .332(while it obtains competitive results on the Wall)J
5130: 304 267 :M
5131: 3.001 .3(Street Journal corpus. We have seen that a non-)J
5132: 304 280 :M
5133: 2.667 .267(probabilistic version of DOP performed especially)J
5134: 304 293 :M
5135: 1.123 .112(well on corpora for which collecting subtree statistics)J
5136: 304 306 :M
5137: 2.419 .242(is difficult, while sentences can still be parsed by)J
5138: 304 319 :M
5139: 3.475 .348(relatively large chunks. We have also seen that)J
5140: 304 332 :M
5141: 1.166 .117(probabilistic DOP obtains very competitive results on)J
5142: 304 345 :M
5143: 3.162 .316(the WSJ corpus. Finally, we conjecture that any)J
5144: 304 358 :M
5145: 3.688 .369(stochastic grammar which uses elementary trees)J
5146: 304 371 :M
5147: 1.976 .198(rather than context-free rules can be turned into an)J
5148: 304 384 :M
5149: 1.278 .128(accurate non-probabilistic version \(e.g. Tree-Insertion)J
5150: 304 397 :M
5151: .68 .068(Grammar and Tree-Adjoining Grammar\).)J
5152: 304 421 :M
5153: f0_12 sf
5154: .047(Acknowledgements)A
5155: 304 439 :M
5156: f3_10 sf
5157: 3.735 .374(Thanks to Khalil Sima'an and three anonymous)J
5158: 304 452 :M
5159: .547 .055(reviewers for useful suggestions.)J
5160: 304 476 :M
5161: f0_12 sf
5162: -.106(References)A
5163: 304 493 :M
5164: f3_9 sf
5165: 1.224 .122(Berg, M.\312van den, R. Bod and )J
5166: 426 493 :M
5167: 1.522 .152(R. Scha, 1994. "A Corpus)J
5168: 530 493 :M
5169: (-)S
5170: 326 505 :M
5171: 6.335 .634(Based Approach to )J
5172: 428 505 :M
5173: 4.226 .423(Semantic Interpretation",)J
5174: 326 517 :M
5175: f4_9 sf
5176: 7.032 .703(Proceedings )J
5177: 390 517 :M
5178: 6.763 .676(Ninth Amsterdam Colloquium)J
5179: f3_9 sf
5180: (,)S
5181: 326 529 :M
5182: .771 .077(Amsterdam, The Netherlands.)J
5183: 304 542 :M
5184: 2.637 .264(Black, E. et al. )J
5185: 375 542 :M
5186: 2.573 .257(1991., A Procedure for Quantitatively)J
5187: 326 554 :M
5188: 4.329 .433(Comparing the Syntactic Coverage of )J
5189: 502 554 :M
5190: .036(English,)A
5191: 326 566 :M
5192: f4_9 sf
5193: 1.721 .172(Proceedings DARPA Speech )J
5194: 442 566 :M
5195: 1.794 .179(and Natural Language)J
5196: 326 578 :M
5197: .073(Workshop)A
5198: 364 578 :M
5199: f3_9 sf
5200: .598 .06(, Pacific Grove, Morgan Kaufmann.)J
5201: 304 589 :M
5202: 1.032 .103(Bod,\312R. 1992. "Data-Oriented Parsing \(DOP\)", )J
5203: 486 589 :M
5204: f4_9 sf
5205: .102(Proceedings)A
5206: 326 601 :M
5207: -.124(COLING-92)A
5208: 371 601 :M
5209: f3_9 sf
5210: .911 .091(, Nantes, France.)J
5211: 304 613 :M
5212: 1.074 .107(Bod,\312R.\3121993. "Using an Annotated Corpus as a )J
5213: 492 613 :M
5214: .223(Stochastic)A
5215: 326 625 :M
5216: 2.29 .229(Grammar", )J
5217: 374 625 :M
5218: f4_9 sf
5219: 2.27 .227(Proceedings )J
5220: 427 625 :M
5221: 2.935 .294(European Chapter of the)J
5222: 326 637 :M
5223: -.086(ACL'93)A
5224: 354 637 :M
5225: f3_9 sf
5226: .723 .072(, Utrecht, The Netherlands.)J
5227: 304 650 :M
5228: 3.467 .347(Bod, R. 1996. "Two Questions about )J
5229: 473 650 :M
5230: 3.309 .331(Data Oriented)J
5231: 326 662 :M
5232: .439 .044(Parsing", )J
5233: 363 662 :M
5234: f4_9 sf
5235: .421 .042(Proceedings Fourth Workshop on Very Large)J
5236: 326 674 :M
5237: .12(Corpora)A
5238: f3_9 sf
5239: .594 .059(, Copenhagen, Denmark.)J
5240: 304 685 :M
5241: 2.906 .291(Bod, R. 1998. )J
5242: 370 685 :M
5243: f4_9 sf
5244: 2.352 .235(Beyond )J
5245: 404 685 :M
5246: 2.033 .203(Grammar: An Experience-Based)J
5247: 326 697 :M
5248: 1.354 .135(Theory of Language)J
5249: 405 697 :M
5250: f3_9 sf
5251: 1.994 .199(, CSLI Publications, Cambridge)J
5252: 326 709 :M
5253: .199 .02(University Press.)J
5254: 304 721 :M
5255: 1.647 .165(Bod, R. 2000a. "An Empirical )J
5256: 429 721 :M
5257: 1.691 .169(Evaluation of LFG-DOP",)J
5258: 326 733 :M
5259: f4_9 sf
5260: .654 .065(Proceedings COLING-2000)J
5261: f3_9 sf
5262: .455 .046(, Saarbr\237cken, Germany.)J
5263: endp
5264: %%Page: 7 7
5265: %%BeginPageSetup
5266: initializepage
5267: (rens; page: 7 of 7)setjob
5268: %%EndPageSetup
5269: -29 -30 :T
5270: gS 29 30 538 781 rC
5271: 62 94 :M
5272: f3_9 sf
5273: 1.126 .113(Bod, R. 2000b. "Redundancy and Minimality in )J
5274: 251 94 :M
5275: .251(Statistical)A
5276: 84 106 :M
5277: 4.405 .44(Parsing with the )J
5278: 166 106 :M
5279: 4.231 .423(DOP Model", submitted for)J
5280: 84 118 :M
5281: .114(publication.)A
5282: 62 130 :M
5283: 2.408 .241(Bod, R. and R. Kaplan, 1998. "A Probabilistic )J
5284: 262 130 :M
5285: -.165(Corpus-)A
5286: 84 142 :M
5287: 3.601 .36(Driven Model for Lexical )J
5288: 203 142 :M
5289: 2.947 .295(Functional Analysis",)J
5290: 84 154 :M
5291: f4_9 sf
5292: .883 .088(Proceedings COLING-ACL'98,)J
5293: f3_9 sf
5294: .525 .053( Montreal, Canada.)J
5295: 62 166 :M
5296: .509 .051(Bonnema, R., R. Bod and R. Scha, 1997. "A DOP Model for)J
5297: 84 178 :M
5298: 1.36 .136(Semantic Interpretation", )J
5299: 184 178 :M
5300: f4_9 sf
5301: .552 .055(Proceedings ACL/EACL-97)J
5302: 287 178 :M
5303: f3_9 sf
5304: (,)S
5305: 84 190 :M
5306: .548 .055(Madrid, Spain.)J
5307: 62 202 :M
5308: 2.467 .247(Bonnema, R., P. Buying and )J
5309: 188 202 :M
5310: 2.933 .293(R. Scha, 1999. "A New)J
5311: 84 214 :M
5312: 4.955 .495(Probability Model for )J
5313: 192 214 :M
5314: 3.553 .355(Data-Oriented Parsing",)J
5315: 84 226 :M
5316: f4_9 sf
5317: 3.182 .318(Proceedings of the )J
5318: 170 226 :M
5319: 2.502 .25(Amsterdam Colloquium 1999)J
5320: 288 226 :M
5321: f3_9 sf
5322: (,)S
5323: 84 238 :M
5324: .771 .077(Amsterdam, The Netherlands.)J
5325: 62 250 :M
5326: .998 .1(Briscoe, T.\312and J. Carroll, 1993. )J
5327: 189 250 :M
5328: 1.749 .175("Generalized Probabilistic)J
5329: 84 262 :M
5330: 2.955 .296(LR Parsing of Natural )J
5331: 185 262 :M
5332: 3.225 .322(Language \(Corpora\) with)J
5333: 84 274 :M
5334: 6.585 .658(Unification-Based )J
5335: 171 274 :M
5336: 8.425 .843(Grammars", )J
5337: 236 274 :M
5338: f4_9 sf
5339: (Computational)S
5340: 84 286 :M
5341: .122(Linguistics)A
5342: f3_9 sf
5343: .508 .051( 19\(1\), 25-59.)J
5344: 62 298 :M
5345: .66 .066(Caraballo, S. and E. Charniak, )J
5346: 180 298 :M
5347: .85 .085(1998. "New Figures of Merit)J
5348: 84 310 :M
5349: 7.892 .789(for )J
5350: 108 310 :M
5351: 6.139 .614(Best-First Probabilistic Chart Parsing",)J
5352: 84 322 :M
5353: f4_9 sf
5354: .222 .022(Computational Linguistics )J
5355: 184 322 :M
5356: f3_9 sf
5357: -.068(24, 275-298.)A
5358: 62 334 :M
5359: 3.714 .371(Chappelier, J. and M. Rajman, 2000. )J
5360: 233 334 :M
5361: 3.138 .314("Monte Carlo)J
5362: 84 346 :M
5363: 1.174 .117(Sampling for NP-hard Maximization Problems )J
5364: 267 346 :M
5365: 1.836 .184(in the)J
5366: 84 358 :M
5367: 4.866 .487(Framework of Weighted Parsing", in )J
5368: 262 358 :M
5369: f4_9 sf
5370: (Natural)S
5371: 84 370 :M
5372: 1.339 .134(Language Processing -- NLP 2000, Lecture Notes )J
5373: 283 370 :M
5374: (in)S
5375: 84 382 :M
5376: 1.053 .105(Artificial Intelligence )J
5377: 169 382 :M
5378: -.333(1835)A
5379: 187 382 :M
5380: f3_9 sf
5381: 1.656 .166(, D. Christodoulakis \(ed.\),)J
5382: 84 394 :M
5383: -.057(2000, 106-117.)A
5384: 62 406 :M
5385: 3.139 .314(Charniak,\312E.\3121996. "Tree-bank Grammars", )J
5386: 244 406 :M
5387: f4_9 sf
5388: .102(Proceedings)A
5389: 84 418 :M
5390: -.247(AAAI-96)A
5391: 115 418 :M
5392: f3_9 sf
5393: .764 .076(, Menlo Park, Ca.)J
5394: 62 430 :M
5395: 1.214 .121(Charniak, E.\3121997. "Statistical Parsing with a )J
5396: 241 430 :M
5397: .138(Context-Free)A
5398: 84 442 :M
5399: .331 .033(Grammar and )J
5400: 138 442 :M
5401: .69 .069(Word Statistics", )J
5402: f4_9 sf
5403: 1.163 .116(Proceedings AAAI-97)J
5404: 287 442 :M
5405: f3_9 sf
5406: (,)S
5407: 84 454 :M
5408: .592 .059(Menlo Park, Ca.)J
5409: 62 467 :M
5410: 1.322 .132(Charniak, )J
5411: 103 467 :M
5412: 1.008 .101(E. 2000. "A Maximum-Entropy-Inspired Parser",)J
5413: 84 479 :M
5414: f4_9 sf
5415: 1.077 .108(Proceedings ANLP-NAACL'2000)J
5416: f3_9 sf
5417: .641 .064(, Seattle, Washington.)J
5418: 62 490 :M
5419: 1.142 .114(Chitrao, M. and R. Grishman, 1990. "Statistical )J
5420: 250 490 :M
5421: 1.19 .119(Parsing of)J
5422: 84 502 :M
5423: .921 .092(Messages", )J
5424: 130 502 :M
5425: f4_9 sf
5426: .335 .034(Proceedings DARPA Speech and Language)J
5427: 84 514 :M
5428: .145 .014(Workshop )J
5429: 124 514 :M
5430: f3_9 sf
5431: -.312(1990.)A
5432: 62 527 :M
5433: 1.281 .128(Collins, M. 1997. "Three generative lexicalised models )J
5434: 280 527 :M
5435: -.745(for)A
5436: 84 539 :M
5437: 4.554 .455(statistical parsing", )J
5438: f4_9 sf
5439: 6.068 .607(Proceedings )J
5440: 235 539 :M
5441: .071(EACL/ACL'97)A
5442: f3_9 sf
5443: (,)S
5444: 84 551 :M
5445: .548 .055(Madrid, Spain.)J
5446: 62 562 :M
5447: .246 .025(Collins, M. 1999. )J
5448: f4_9 sf
5449: .351 .035(Head-Driven Statistical Models for Natural)J
5450: 84 574 :M
5451: 4.862 .486(Language )J
5452: 132 574 :M
5453: 1.054(Parsing)A
5454: f3_9 sf
5455: 4.447 .445(, PhD-thesis, University of)J
5456: 84 586 :M
5457: 1.062 .106(Pennsylvania, PA.)J
5458: 62 598 :M
5459: 3.989 .399(Daelemans, W. \(ed.\) )J
5460: 159 598 :M
5461: 3.215 .322(1999. Memory-Based Language)J
5462: 84 610 :M
5463: 1.196 .12(Processing, )J
5464: 131 610 :M
5465: f4_9 sf
5466: .891 .089(Journal for Experimental and Theoretical)J
5467: 84 622 :M
5468: .869 .087(Artificial Intelligence)J
5469: 165 622 :M
5470: f3_9 sf
5471: .447 .045(, 11\(3\).)J
5472: 62 635 :M
5473: 1.307 .131(Eisner, J. 1997. )J
5474: 127 635 :M
5475: 2.452 .245("Bilexical Grammars and a Cubic-Time)J
5476: 84 647 :M
5477: 1.573 .157(Probabilistic Parser", )J
5478: 170 647 :M
5479: f4_9 sf
5480: 1.105 .111(Proceedings Fifth International)J
5481: 84 659 :M
5482: .509 .051(Workshop on Parsing Technologies)J
5483: f3_9 sf
5484: .402 .04(, Boston, Mass.)J
5485: 62 670 :M
5486: .108 .011(Goodman, J. 1998. )J
5487: 134 670 :M
5488: f4_9 sf
5489: -.072(Parsing Inside-Out)A
5490: 203 670 :M
5491: f3_9 sf
5492: 1.015 .101(, Ph.D. thesis, Harvard)J
5493: 84 682 :M
5494: .487 .049(University, Mass.)J
5495: 62 694 :M
5496: 2.556 .256(Hoogweg, L. 2000. )J
5497: 147 694 :M
5498: f4_9 sf
5499: 2.226 .223(Extending DOP1 with the )J
5500: 257 694 :M
5501: .064(Insertion)A
5502: 84 706 :M
5503: (Operation)S
5504: 122 706 :M
5505: f3_9 sf
5506: 1.188 .119(, Master's thesis, University of Amsterdam,)J
5507: 84 718 :M
5508: .412 .041(The Netherlands.)J
5509: 62 730 :M
5510: 1.023 .102(Johnson, M. 1998. "The DOP Estimation Method is Biased)J
5511: 84 742 :M
5512: .533 .053(and Inconsistent", squib.)J
5513: 304 94 :M
5514: .341 .034(Lari, )J
5515: 325 94 :M
5516: 1.009 .101(K. and S. Young 1990. "The Estimation of Stochastic)J
5517: 326 106 :M
5518: 3.209 .321(Context-Free Grammars Using )J
5519: 459 106 :M
5520: 2.51 .251(the Inside-Outside)J
5521: 326 118 :M
5522: .159 .016(Algorithm", )J
5523: 373 118 :M
5524: f4_9 sf
5525: .201 .02(Computer Speech and Language)J
5526: f3_9 sf
5527: .121 .012(, 4, 35-56.)J
5528: 304 130 :M
5529: .783 .078(Magerman, D. and M. Marcus, 1991. "Pearl: A Probabilistic)J
5530: 326 142 :M
5531: 5.912 .591(Chart Parser", )J
5532: 400 142 :M
5533: f4_9 sf
5534: 5.709 .571(Proceedings EACL'91)J
5535: f3_9 sf
5536: 3.356 .336(, Berlin,)J
5537: 326 154 :M
5538: -.105(Germany.)A
5539: 304 166 :M
5540: 2.448 .245(Marcus, M., B. Santorini and M. )J
5541: 447 166 :M
5542: 2.166 .217(Marcinkiewicz, 1993.)J
5543: 326 178 :M
5544: 1.074 .107("Building a )J
5545: 374 178 :M
5546: 1.7 .17(Large Annotated Corpus of English: the)J
5547: 326 190 :M
5548: .744 .074(Penn Treebank", )J
5549: 392 190 :M
5550: f4_9 sf
5551: .367 .037(Computational Linguistics)J
5552: 490 190 :M
5553: f3_9 sf
5554: .631 .063( 19\(2\).)J
5555: 304 202 :M
5556: 2.111 .211(Neumann, G. 1998. "Automatic Extraction of Stochastic)J
5557: 326 214 :M
5558: 5.24 .524(Lexicalized Tree Grammars )J
5559: 459 214 :M
5560: 3.945 .394(from Treebanks",)J
5561: 326 226 :M
5562: f4_9 sf
5563: 1.307 .131(Proceedings of the )J
5564: 403 226 :M
5565: 1.741 .174(4th Workshop on Tree-Adjoining)J
5566: 326 238 :M
5567: -.007(Grammars and Related Frameworks)A
5568: 459 238 :M
5569: f3_9 sf
5570: .858 .086(, Philadelphia, PA.)J
5571: 304 250 :M
5572: .864 .086(Neumann, G. and D. Flickinger, 1999. "Learning Stochastic)J
5573: 326 262 :M
5574: 3.552 .355(Lexicalized Tree Grammars from HPSG", )J
5575: 512 262 :M
5576: -.331(DFKI)A
5577: 326 274 :M
5578: 1.041 .104(Technical Report, Saarbr\237cken, Germany.)J
5579: 304 286 :M
5580: .866 .087(Ratnaparkhi, A. 1999. "Learning to Parse Natural Language)J
5581: 326 298 :M
5582: .537 .054(with )J
5583: 346 298 :M
5584: 1.352 .135(Maximum Entropy Models", )J
5585: f4_9 sf
5586: 1.87 .187(Machine Learning)J
5587: 326 310 :M
5588: f3_9 sf
5589: -.068(34, 151-176.)A
5590: 304 323 :M
5591: .89 .089(Shannon, C. 1948. A )J
5592: 388 323 :M
5593: 1.882 .188(Mathematical Theory of Communic)J
5594: 530 323 :M
5595: (-)S
5596: 326 335 :M
5597: .279 .028(ation. )J
5598: f4_9 sf
5599: .399 .04(Bell System Technical Journal)J
5600: 462 335 :M
5601: f3_9 sf
5602: (. 27, )S
5603: 481 335 :M
5604: (379-423, 623-)S
5605: 326 347 :M
5606: -.25(656.)A
5607: 304 358 :M
5608: 2.868 .287(Sima'an, K. )J
5609: 359 358 :M
5610: 3.261 .326(1995. "An optimized algorithm for Data)J
5611: 326 370 :M
5612: 2.574 .257(Oriented Parsing", in: R. Mitkov and N. Nicolov)J
5613: 326 382 :M
5614: 4.862 .486(\(eds.\), )J
5615: 360 382 :M
5616: f4_9 sf
5617: 4.542 .454(Recent )J
5618: 395 382 :M
5619: 4.345 .434(Advances in Natural Language)J
5620: 326 394 :M
5621: 1.151 .115(Processing 1995)J
5622: 390 394 :M
5623: f3_9 sf
5624: 2.502 .25(, volume 136 of )J
5625: 464 394 :M
5626: f4_9 sf
5627: 1.768 .177(Current Issues )J
5628: 526 394 :M
5629: (in)S
5630: 326 406 :M
5631: .849 .085(Linguistic Theory)J
5632: f3_9 sf
5633: .7 .07(. John Benjamins, Amsterdam.)J
5634: 304 418 :M
5635: 5.448 .545(Sima'an, K. )J
5636: 367 418 :M
5637: 4.895 .49(1996. "Computational Complexity of)J
5638: 326 430 :M
5639: 2.722 .272(Probabilistic )J
5640: 381 430 :M
5641: 3.935 .393(Disambiguation by means of Tree)J
5642: 326 442 :M
5643: 1.926 .193(Grammars", )J
5644: 377 442 :M
5645: f4_9 sf
5646: 2.281 .228(Proceedings COLING-96)J
5647: f3_9 sf
5648: 1.803 .18(, Copenhagen,)J
5649: 326 454 :M
5650: -.105(Denmark.)A
5651: 304 466 :M
5652: .934 .093(Sima'an, K. 1999. )J
5653: 376 466 :M
5654: f4_9 sf
5655: .793 .079(Learning Efficient )J
5656: 448 466 :M
5657: (Disambiguation)S
5658: 507 466 :M
5659: f3_9 sf
5660: 1.005 .1(. ILLC)J
5661: 326 478 :M
5662: 2.571 .257(Dissertation Series )J
5663: 408 478 :M
5664: 2.85 .285(1999-02, Utrecht University /)J
5665: 326 490 :M
5666: 4.913 .491(University of Amsterdam, )J
5667: 450 490 :M
5668: 5.816 .582(March 1999, The)J
5669: 326 502 :M
5670: .025(Netherlands.)A
5671: 304 514 :M
5672: 1.331 .133(Weischedel, R., M. Meteer, R, Schwarz, L. Ramshaw and)J
5673: 326 526 :M
5674: 3.023 .302(J. Palmucci, 1993. )J
5675: 410 526 :M
5676: 3.106 .311("Coping with Ambiguity and)J
5677: 326 538 :M
5678: 3.091 .309(Unknown )J
5679: 370 538 :M
5680: 3.824 .382(Words through Probabilistic Models",)J
5681: 326 550 :M
5682: f4_9 sf
5683: .367 .037(Computational Linguistics)J
5684: 424 550 :M
5685: f3_9 sf
5686: .351 .035(, 19\(2\), 359-382.)J
5687: 304 562 :M
5688: .682 .068(Way, A. 1999. "A Hybrid Architecture for Robust MT using)J
5689: 326 574 :M
5690: .967 .097(LFG-DOP", )J
5691: 375 574 :M
5692: f4_9 sf
5693: .901 .09(Journal of Experimental )J
5694: 471 574 :M
5695: 1.567 .157(and Theoretical)J
5696: 326 586 :M
5697: 1.181 .118(Artificial Intelligence)J
5698: 408 586 :M
5699: f3_9 sf
5700: 1.661 .166( 11 \(Special Issue on Memory)J
5701: 530 586 :M
5702: (-)S
5703: 326 598 :M
5704: 1.042 .104(Based Language Processing\).)J
5705: 304 610 :M
5706: .57 .057(Zipf, G. 1935. )J
5707: 361 610 :M
5708: f4_9 sf
5709: 1.149 .115(The Psycho-Biology of Language)J
5710: 489 610 :M
5711: f3_9 sf
5712: 1.317 .132(, Houghton)J
5713: 326 622 :M
5714: -.177(Mifflin.)A
5715: endp
5716: %%Trailer
5717: end
5718: %%EOF
5719: