1: \documentclass[10pt, letterpaper, onecolumn, peerreview]{IEEEtran}
2: \usepackage{epsfig}
3: \usepackage{amssymb}
4: \usepackage[tbtags]{amsmath}
5: \usepackage{graphics,eepic,epic}
6: \usepackage{latexsym}
7: \usepackage{euscript}
8: %\usepackage{styles/preprint}
9: \usepackage{graphics,eepic,epic,psfrag}
10:
11:
12: % GWW DEFINITIONS AND ABBREVIATIONS
13:
14: % TeX Defs
15:
16: \usepackage[tbtags]{amsmath} % defines many math commands and
17: % subequations environment, etc
18: \usepackage{amssymb} % get, among others, blackboard bold fonts
19: % defines extra symbols like \gtreqless, etc
20: \usepackage{verbatim} % get comment environment, + new verbatim
21: % \usepackage{amsxtra} % get, eg, \accentedsymbol
22:
23: \DeclareMathOperator*{\argmax}{arg\,max}
24: \DeclareMathOperator*{\argmin}{arg\,min}
25: \DeclareMathOperator*{\argsup}{arg\,sup}
26: \DeclareMathOperator*{\arginf}{arg\,inf}
27: \DeclareMathOperator{\erfc}{erfc}
28: \DeclareMathOperator{\diag}{diag}
29: \DeclareMathOperator{\cum}{cum}
30: \DeclareMathOperator{\sgn}{sgn}
31: \DeclareMathOperator{\tr}{tr}
32: \DeclareMathOperator{\spn}{span}
33: \DeclareMathOperator{\adj}{adj}
34: \DeclareMathOperator{\var}{var}
35: \DeclareMathOperator{\cov}{cov}
36: \DeclareMathOperator{\sech}{sech}
37: \DeclareMathOperator{\sinc}{sinc}
38: \DeclareMathOperator*{\lms}{l.i.m.\,}
39: \newcommand{\varop}[1]{\var\left[{#1}\right]}
40: \newcommand{\covop}[2]{\cov\left({#1},{#2}\right)}
41:
42: \newcommand{\p}{\partial}
43:
44: % LIST ENVIRONMENTS
45:
46: \newcounter{actr}
47: \newenvironment{alist}%
48: {\begin{list}{(\alph{actr})}{\usecounter{actr}}}{\end{list}}
49:
50: \newcounter{ictr}
51: \newenvironment{ilist}%
52: {\begin{list}{(\roman{ictr})}{\usecounter{ictr}}}{\end{list}}
53:
54: \iffalse
55:
56: % SPACING ENVIRONMENTS
57:
58: \newenvironment{singlespace}%
59: {\begin{spacing}{1}}{\end{spacing}}
60:
61: \newenvironment{onehalfspace}% for 11pt font
62: {\begin{spacing}{1.21}}{\end{spacing}}
63:
64: \newenvironment{doublespace}% for 11pt font
65: {\begin{spacing}{1.62}}{\end{spacing}}
66:
67: \fi
68:
69: % THEOREM ENVIRONMENTS
70:
71: \newtheorem{thm}{Theorem}
72: \newtheorem{lemma}{Lemma}
73: \newtheorem{claim}{Claim}
74: \newtheorem{corol}{Corollary}
75: \newtheorem{prop}{Proposition}
76: \newtheorem{conj}{Conjecture}
77: \newtheorem{defn}{Definition}
78: %\newenvironment{proof}%
79: %{\noindent{\em Proof: } \begin{singlespace} \small \noindent}%
80: %{\noindent\qed \end{singlespace}}
81: %\newenvironment{new-proof}[1]%
82: %{{\em Proof of #1: } \begin{singlespace} \small \noindent}%
83: %{\ \noindent\qed \end{singlespace}}
84:
85: \iffalse
86: \newenvironment{proof}%
87: {\noindent{\em Proof: } \small \noindent}%
88: {\noindent\qed }
89: \newenvironment{new-proof}[1]%
90: {{\em Proof of #1: } \small \noindent}%
91: {\ \noindent\qed }
92: \fi
93:
94: \newcommand{\abs}[1]{\left|#1\right|}
95: %\newcommand{\comb}[2]{{#1\choose#2}}
96: \newcommand{\comb}[2]{\binom{#1}{#2}}
97: \newcommand{\ie}{i.e.}
98: \newcommand{\eg}{e.g.}
99: \newcommand{\etc}{etc.}
100: \newcommand{\viz}{viz.}
101: \newcommand{\etal}{et al.}
102: \newcommand{\cf}{cf.}
103:
104: \newcommand{\vect}[3]{\begin{bmatrix} #1 & #2 & \cdots & #3 \end{bmatrix}^\T}
105:
106: \newcommand{\dsp}{.5\baselineskip} % double space amount
107: \newcommand{\down}{\vspace{\dsp}} % double space command
108: \newcommand{\ddown}{\vspace{\baselineskip}} % quadruple space command
109: \newcommand{\spec}{\hspace*{1pt}} % little bit of space
110: \newcommand{\ds}{\displaystyle} % abbreviation
111: \newcommand{\ts}{\textstyle} % abbreviation
112: \newcommand{\nin}{\noindent} % noindent abbreviation
113: \newcommand{\cvar}[1]{\mathrm{var_{#1}\,}}
114: \newcommand{\qed}{\rule[0.1ex]{1.4ex}{1.6ex}}
115: \newcommand{\mycap}[2]{\caption{\sl #2 \label{#1}}}
116: \newcommand{\subcap}[1]{{\begin{center}\sl #1\end{center}}}
117: \newcommand{\ditem}[1]{\item[#1 \hspace*{\fill}]}
118: \newcommand{\appfig}{\vspace*{1in}\begin{center} Figure appended to
119: end of manuscript. \end{center} \vspace*{1in}}
120: \newcommand{\psx}[1]{\centerline{\epsfxsize=6in \epsfbox{#1}}}
121: \newcommand{\psy}[1]{\centerline{\epsfysize=7in \epsfbox{#1}}}
122: \newcommand{\psxs}[2]{\centerline{\epsfxsize=#1in \epsfbox{#2}}}
123: \newcommand{\psxsbb}[3]{\centerline{\epsfxsize=#1in \epsfbox[#3]{#2}}}
124: \newcommand{\psys}[2]{\centerline{\epsfysize=#1in \epsfbox{#2}}}
125: \newcommand{\convsamp}[3]{\left.\left\{#1 \ast #2\right\}\right|_{#3}}
126: \newcommand{\gap}{\qquad}
127: \newcommand{\order}[1]{\mathcal{O}\left(#1\right)}
128: \newcommand{\arror}[3]{\begin{cases} #1 & #2 \\
129: #3 & \text{otherwise} \end{cases}}
130: \newcommand{\arrorc}[3]{\begin{cases} #1 & #2 \\
131: #3 & \text{otherwise,} \end{cases}}
132: \newcommand{\arrorp}[3]{\begin{cases} #1 & #2 \\
133: #3 & \text{otherwise.} \end{cases}}
134: \newcommand{\darror}[4]{\begin{cases} #1 & #2 \\ #3 & #4 \end{cases}}
135: % \newcommand{\defeq}{\stackrel{\triangle}{=}}
136: \newcommand{\defeq}{\stackrel{\Delta}{=}}
137: \newcommand{\msconv}{\stackrel{\mathrm{m.s.}}{\longrightarrow}}
138: \newcommand{\pwaeconv}{\stackrel{\mathrm{p.w.a.e.}}{\longrightarrow}}
139: \newcommand{\peq}{\stackrel{\mathcal{P}}{=}}
140: % \newcommand{\glt}{ \begin{array}{c} \Hh=H_1 \\
141: % \renewcommand{\arraystretch}{.3}
142: % \begin{array}{c} > \\ < \end{array}
143: % \renewcommand{\arraystretch}{1} \\ \Hh=H_0 \end{array}}
144:
145: \hyphenation{or-tho-nor-mal}
146: \hyphenation{wave-let wave-lets}
147:
148: \newcommand{\crb}{Cram\'{e}r-Rao} % obsolete
149: \newcommand{\CR}{Cram\'{e}r-Rao}
150: \newcommand{\KL}{Karhunen-Lo\`{e}ve}
151: \newcommand{\sE}{\sqrt{E_0}}
152: \newcommand{\pe}{\Pr(\eps)}
153: \newcommand{\jw}{j\w}
154: \newcommand{\ejw}{e^{j\w}}
155: \newcommand{\ejv}{e^{j\nu}}
156: \newcommand{\wo}{{\w_0}}
157: \newcommand{\woh}{{\wh_0}}
158: \newcommand{\sumi}[1]{\sum_{#1=-\infty}^{+\infty}}
159: \newcommand{\inti}{\int_{-\infty}^{+\infty}}
160: \newcommand{\intp}{\int_{-\pi}^{\pi}}
161: \newcommand{\nintp}{\frac{1}{2\pi}\int_{-\pi}^{\pi}}
162: \newcommand{\inth}{\int_{0}^{\infty}}
163: \newcommand{\E}[1]{E\left[{#1}\right]}
164: \newcommand{\bigE}[1]{E\bigl[{#1}\bigr]}
165: \newcommand{\BigE}[1]{E\Bigl[{#1}\Bigr]}
166: \newcommand{\biggE}[1]{E\biggl[{#1}\biggr]}
167: \newcommand{\BiggE}[1]{E\Biggl[{#1}\Biggr]}
168: \newcommand{\Prob}[1]{\Pr\left[{#1}\right]}
169: \newcommand{\Pu}[1]{\Pr\left[{#1}\right]} % obsolete; same as \Prob now
170: \newcommand{\Pc}[2]{\Pr\left[{#1}\mid{#2}\right]} % obsolete
171: \newcommand{\Pcb}[2]{\Pr\left[{#1}\Bigm|{#2}\right]} % obsolete
172: \newcommand{\Q}[1]{\mathcal{Q}\left({#1}\right)}
173: \newcommand{\FT}[1]{\mathcal{F}\left\{{#1}\right\}}
174: \newcommand{\LT}[1]{\mathcal{L}\left\{{#1}\right\}}
175: \newcommand{\ZT}[1]{\mathcal{Z}\left\{{#1}\right\}}
176: %\newcommand{\reals}{\mathbf{R}}
177: \newcommand{\reals}{\mathbb{R}}
178: %\newcommand{\ints}{\mathbf{Z}}
179: \newcommand{\ints}{\mathbb{Z}}
180: \newcommand{\compls}{\mathbb{C}}
181: \newcommand{\nats}{\mathbb{N}}
182: \newcommand{\rats}{\mathbb{Q}}
183: \newcommand{\ltwor}{L^2(\reals)}
184: \newcommand{\ltwoz}{\ell^2(\ints)}
185: \newcommand{\ltwow}{L^2(\Omega)}
186: % \newcommand{\ltwo}{\mathbf{L}^2}
187: % \newcommand{\ltwor}{\mathbf{L}^2 (\reals)}
188: % \newcommand{\ltwoz}{\mathbf{l}^2 (\ints)}
189: \newcommand{\sys}[1]{\mathcal{S}\left\{#1\right\}}
190: \newcommand{\nn}{\nonumber}
191:
192: \newcommand{\ip}[2]{\left\langle{#1},{#2}\right\rangle}
193: \newcommand{\di}[2]{d\left({#1},{#2}\right)}
194: \newcommand{\ceil}[1]{\lceil{#1}\rceil}
195: \newcommand{\floor}[1]{\lfloor{#1}\rfloor}
196: \newcommand{\phase}{\measuredangle}
197:
198: \newcommand{\Ht}{\mathrm{H}}
199: \newcommand{\T}{{\mathrm{T}}}
200: % \newcommand{\R}{\Re\mathit{e}}
201: % \newcommand{\I}{\Im\mathit{m}}
202: \DeclareMathOperator{\R}{Re}
203: \DeclareMathOperator{\I}{Im}
204:
205:
206:
207:
208: % ABBREVIATIONS FOR CHARACTERS IN VARIOUS FONTS
209:
210: % STANDARD CHARACTERS
211:
212: \newcommand{\ba}{{\mathbf{a}}}
213: \newcommand{\bah}{{\hat{\ba}}}
214: \newcommand{\ah}{{\hat{a}}}
215: \newcommand{\Ah}{{\hat{A}}}
216: \newcommand{\cA}{{\mathcal{A}}}
217: \newcommand{\at}{{\tilde{a}}}
218: \newcommand{\bat}{{\tilde{\ba}}}
219: \newcommand{\At}{{\tilde{A}}}
220: \newcommand{\bA}{{\mathbf{A}}}
221: \newcommand{\ac}{a^{\ast}}
222:
223: \newcommand{\bb}{{\mathbf{b}}}
224: \newcommand{\bbt}{{\tilde{\bb}}}
225: \newcommand{\cB}{{\mathcal{B}}}
226: \newcommand{\tb}{{\tilde{b}}}
227: \newcommand{\tB}{{\tilde{B}}}
228: \newcommand{\hb}{{\hat{b}}}
229: \newcommand{\hB}{{\hat{B}}}
230: \newcommand{\bB}{{\mathbf{B}}}
231:
232: \newcommand{\bc}{{\mathbf{c}}}
233: \newcommand{\bch}{{\hat{\mathbf{c}}}}
234: \newcommand{\bC}{{\mathbf{C}}}
235: \newcommand{\cC}{{\mathcal{C}}}
236: \newcommand{\ct}{{\tilde{c}}}
237: \newcommand{\Ct}{{\tilde{C}}}
238: \newcommand{\ctc}{\ct^{\ast}}
239:
240: \newcommand{\bd}{{\mathbf{d}}}
241: \newcommand{\bD}{{\mathbf{D}}}
242: \newcommand{\cD}{{\mathcal{D}}}
243: \newcommand{\hd}{{\hat{d}}} % old: \dh
244: \newcommand{\dt}{{\tilde{d}}}
245: \newcommand{\bdt}{{\tilde{\bd}}}
246: \newcommand{\Dt}{{\tilde{D}}}
247: \newcommand{\dtc}{\dt^{\ast}}
248:
249: \newcommand{\et}{{\tilde{e}}}
250: \newcommand{\bfe}{{\mathbf{e}}}
251: \newcommand{\bE}{{\mathbf{E}}}
252: \newcommand{\cE}{{\mathcal{E}}}
253: \newcommand{\cEt}{{\tilde{\cE}}}
254: \newcommand{\cEb}{{\bar{\cE}}}
255: \newcommand{\bcE}{{\mathbf{\cE}}} % bf cal E doesn't exist
256:
257: \newcommand{\bff}{{{\mathbf{f}}}}
258: \newcommand{\bF}{{\mathbf{F}}}
259: \newcommand{\cF}{{\mathcal{F}}}
260: \newcommand{\ft}{{\tilde{f}}}
261: \newcommand{\Ft}{{\tilde{F}}}
262: \newcommand{\Fh}{{\hat{F}}}
263: \newcommand{\ftc}{\ft^{\ast}}
264: \newcommand{\bft}{{\tilde{\bff}}}
265: \newcommand{\bFt}{{\tilde{\bF}}}
266: \newcommand{\fh}{{\hat{f}}}
267:
268: \newcommand{\bg}{{\mathbf{g}}}
269: \newcommand{\gt}{{\tilde{g}}}
270: \newcommand{\bgt}{{\tilde{\bg}}}
271: \newcommand{\bG}{{\mathbf{G}}}
272: \newcommand{\cG}{{\mathcal{G}}}
273: \newcommand{\Gt}{{\tilde{\bG}}}
274:
275: \newcommand{\hti}{{\tilde{h}}}
276: \newcommand{\Hti}{{\tilde{H}}}
277: \newcommand{\bh}{{\mathbf{h}}}
278: \newcommand{\bht}{{\tilde{\bh}}}
279: \newcommand{\Hh}{{\hat{H}}}
280: \newcommand{\bH}{{\mathbf{H}}}
281: \newcommand{\bHh}{{\hat{\mathbf{H}}}}
282:
283: \newcommand{\ih}{{\hat{\imath}}}
284: \newcommand{\bI}{{\mathbf{I}}}
285: \newcommand{\cI}{{\mathcal{I}}}
286:
287: \newcommand{\jh}{{\hat{\jmath}}}
288: \newcommand{\bJ}{{\mathbf{J}}}
289: \newcommand{\cJ}{{\mathcal{J}}}
290: \newcommand{\Jt}{{\tilde{J}}}
291:
292: \newcommand{\bk}{{\mathbf{k}}}
293: \newcommand{\bK}{{\mathbf{K}}}
294: \newcommand{\Kt}{{\tilde{K}}}
295: \newcommand{\Kh}{{\hat{K}}}
296: \newcommand{\cK}{{\mathcal{K}}}
297:
298: \newcommand{\cl}{\ell}
299: \newcommand{\bL}{{\mathbf{L}}}
300: \newcommand{\cL}{{\mathcal{L}}}
301:
302: \newcommand{\mb}{{\mathbf{m}}}
303: \newcommand{\mh}{{\hat{m}}}
304: \newcommand{\bM}{{\mathbf{M}}}
305: \newcommand{\cM}{{\mathcal{M}}}
306:
307:
308: \newcommand{\cN}{{\mathcal{N}}}
309: \newcommand{\Nt}{{\tilde{N}}}
310: \newcommand{\tN}{{\tilde{N}}} % backward compatibility
311:
312: \newcommand{\bo}{{\mathbf{o}}}
313: \newcommand{\cO}{{\mathcal{O}}}
314:
315: \newcommand{\bp}{{\mathbf{p}}}
316: \newcommand{\bP}{{\mathbf{P}}}
317: \newcommand{\cP}{{\mathcal{P}}}
318: \newcommand{\ph}{{\hat{p}}}
319: \newcommand{\Ph}{{\hat{P}}}
320:
321: \newcommand{\bq}{{\mathbf{q}}}
322: \newcommand{\cQ}{{\mathcal{Q}}}
323: \newcommand{\bQ}{{\mathbf{Q}}}
324:
325: \newcommand{\br}{{\mathbf{r}}}
326: \newcommand{\bR}{{\mathbf{R}}}
327: \newcommand{\cR}{{\mathcal{R}}}
328: \newcommand{\Rt}{{\tilde{R}}}
329:
330: \newcommand{\sh}{{\hat{s}}}
331: \newcommand{\sck}{{\check{s}}}
332: \newcommand{\shh}{{\Hat{\Hat{s}}}}
333: \newcommand{\bs}{{\mathbf{s}}}
334: \newcommand{\bsh}{{\hat{\mathbf{s}}}}
335: \newcommand{\bsc}{{\check{\mathbf{s}}}}
336: \newcommand{\bshh}{{\Hat{\Hat{\mathbf{s}}}}}
337: \newcommand{\bS}{{\mathbf{S}}}
338: \newcommand{\cS}{{\mathcal{S}}}
339: \newcommand{\st}{{\tilde{s}}}
340:
341: \newcommand{\bT}{{\mathbf{T}}}
342: \newcommand{\cT}{{\mathcal{T}}}
343:
344: \newcommand{\bu}{{\mathbf{u}}}
345: \newcommand{\bU}{{\mathbf{U}}}
346: \newcommand{\bUt}{{\tilde{\bU}}}
347: \newcommand{\ut}{{\tilde{u}}}
348: \newcommand{\cU}{{\mathcal{U}}}
349:
350: \newcommand{\vh}{{\hat{v}}}
351: \newcommand{\bv}{{\mathbf{v}}}
352: \newcommand{\bV}{{\mathbf{V}}}
353: \newcommand{\cV}{{\mathcal{V}}}
354:
355: \newcommand{\bw}{{\mathbf{w}}}
356: \newcommand{\bW}{{\mathbf{W}}}
357: \newcommand{\cW}{{\mathcal{W}}}
358:
359: \newcommand{\bx}{{\mathbf{x}}}
360: \newcommand{\bxt}{{\tilde{\bx}}}
361: \newcommand{\xt}{{\tilde{x}}}
362: \newcommand{\Xt}{{\tilde{X}}}
363: \newcommand{\bX}{{\mathbf{X}}}
364: \newcommand{\cX}{{\mathcal{X}}}
365: \newcommand{\bXt}{{\tilde{\bX}}}
366: \newcommand{\xh}{{\hat{x}}}
367: \newcommand{\xc}{{\check{x}}}
368: \newcommand{\xhh}{{\Hat{\Hat{x}}}}
369: \newcommand{\bxh}{{\hat{\bx}}}
370: \newcommand{\bxc}{{\check{\bx}}}
371: \newcommand{\bxhh}{{\Hat{\hat{\bx}}}}
372:
373: \newcommand{\cY}{{\mathcal{Y}}}
374: \newcommand{\by}{{\mathbf{y}}}
375: \newcommand{\byt}{{\tilde{\by}}}
376: \newcommand{\bY}{{\mathbf{Y}}}
377: \newcommand{\Yt}{{\tilde{Y}}}
378: \newcommand{\yt}{{\tilde{y}}}
379: \newcommand{\yh}{{\hat{y}}}
380:
381: \newcommand{\zt}{{\tilde{z}}}
382: \newcommand{\zh}{{\hat{z}}}
383: \newcommand{\bz}{{\mathbf{z}}}
384: \newcommand{\bZ}{{\mathbf{Z}}}
385: \newcommand{\cZ}{{\mathcal{Z}}}
386:
387: % GREEK CHARACTERS
388:
389: \newcommand{\al}{\alpha}
390: \newcommand{\bal}{{\boldsymbol{\al}}}
391: \newcommand{\balh}{{\hat{\boldsymbol{\al}}}}
392: \newcommand{\alh}{{\hat{\al}}}
393:
394: \newcommand{\bt}{\beta}
395: \newcommand{\btt}{{\tilde{\bt}}}
396: \newcommand{\btht}{{\hat{\bt}}}
397:
398: \newcommand{\g}{\gamma}
399: \newcommand{\G}{\Gamma}
400: \newcommand{\bGa}{{\boldsymbol{\Gamma}}}
401: \newcommand{\gh}{{\hat{\g}}}
402:
403: \newcommand{\de}{\delta}
404: \newcommand{\De}{\Delta}
405: \newcommand{\Deh}{{\hat{\Delta}}}
406: \newcommand{\bde}{{\boldsymbol{\de}}}
407: \newcommand{\bDe}{{\boldsymbol{\De}}}
408:
409: \newcommand{\e}{\epsilon}
410: \newcommand{\eps}{\varepsilon}
411:
412: \newcommand{\etah}{{\hat{\eta}}}
413: \newcommand{\bpi}{{\boldsymbol{\pi}}}
414:
415: \newcommand{\pht}{{\tilde{\phi}}}
416: \newcommand{\Pht}{{\tilde{\Phi}}}
417:
418: \newcommand{\pst}{{\tilde{\psi}}}
419: \newcommand{\Pst}{{\tilde{\Psi}}}
420:
421: \newcommand{\s}{\sigma}
422: \newcommand{\sih}{\hat{\sigma}}
423:
424: \newcommand{\z}{\zeta}
425: \newcommand{\ztt}{{\tilde{\z}}}
426: \newcommand{\ztb}{{\bar{\z}}}
427:
428: % \newcommand{\th}{\theta} % symbol name used by other latex package
429: \newcommand{\thh}{{\hat{\theta}}}
430: \newcommand{\Thh}{{\hat{\Theta}}}
431: \newcommand{\Th}{\Theta}
432: \newcommand{\bth}{{\boldsymbol{\theta}}}
433: \newcommand{\bTh}{{\boldsymbol{\Theta}}}
434: \newcommand{\bThh}{{\hat{\bTh}}}
435: \newcommand{\Tht}{{\tilde{\Theta}}}
436:
437: \newcommand{\la}{\lambda}
438: %\newcommand{\La}{\Lambda}
439: \newcommand{\lam}{\lambda} % backward compatibility
440: \newcommand{\Lam}{\Lambda} % backward compatibility
441: \newcommand{\bLa}{{\boldsymbol{\La}}}
442: \newcommand{\lah}{{\hat{\lam}}}
443:
444: \newcommand{\bmu}{{\boldsymbol{\mu}}}
445:
446: \newcommand{\bXi}{{\boldsymbol{\Xi}}}
447:
448: \newcommand{\bPi}{{\boldsymbol{\Pi}}}
449:
450: \newcommand{\rht}{{\tilde{\rho}}}
451: \newcommand{\rhc}{{\check{\rho}}}
452:
453: \newcommand{\bSi}{{\boldsymbol{\Sigma}}}
454:
455: \newcommand{\ups}{\upsilon}
456: \newcommand{\Ups}{\Upsilon}
457: \newcommand{\bUp}{{\boldsymbol{\Ups}}}
458:
459: \newcommand{\bPs}{{\boldsymbol{\Psi}}}
460:
461: \newcommand{\w}{\omega}
462: \newcommand{\wh}{{\hat{\omega}}}
463: \newcommand{\W}{\Omega}
464:
465:
466: \newcounter{psctr}
467: \newcounter{probctr}[psctr]
468: %\renewcommand{\theprobctr}{\arabic{psctr}.\arabic{probctr}}
469: \newcommand{\problem}[1]{%
470: \addtocounter{probctr}{1}
471: \vspace{.15in}
472:
473: \noindent\textbf{Problem \thepsctr.\theprobctr}\nopagebreak
474:
475: \noindent{#1}
476:
477: }
478: \newcommand{\extraproblem}[1]{%
479: \addtocounter{probctr}{1}
480: \vspace{.15in}
481:
482: \noindent\textbf{Problem \thepsctr.\theprobctr\ (practice)}\nopagebreak
483:
484: \noindent{#1}
485:
486: }
487:
488: \DeclareMathAlphabet{\mathbsf}{OT1}{cmss}{bx}{n}% bold sans serif
489: \DeclareMathAlphabet{\mathssf}{OT1}{cmss}{m}{sl}% slanted sans serif
490:
491: % define some useful uppercase Greek letters in regular and bold sf
492: \DeclareSymbolFont{bsfletters}{OT1}{cmss}{bx}{n}
493: \DeclareSymbolFont{ssfletters}{OT1}{cmss}{m}{n}
494: \DeclareMathSymbol{\bsfGamma}{0}{bsfletters}{'000}
495: \DeclareMathSymbol{\ssfGamma}{0}{ssfletters}{'000}
496: \DeclareMathSymbol{\bsfDelta}{0}{bsfletters}{'001}
497: \DeclareMathSymbol{\ssfDelta}{0}{ssfletters}{'001}
498: \DeclareMathSymbol{\bsfTheta}{0}{bsfletters}{'002}
499: \DeclareMathSymbol{\ssfTheta}{0}{ssfletters}{'002}
500: \DeclareMathSymbol{\bsfLambda}{0}{bsfletters}{'003}
501: \DeclareMathSymbol{\ssfLambda}{0}{ssfletters}{'003}
502: \DeclareMathSymbol{\bsfXi}{0}{bsfletters}{'004}
503: \DeclareMathSymbol{\ssfXi}{0}{ssfletters}{'004}
504: \DeclareMathSymbol{\bsfPi}{0}{bsfletters}{'005}
505: \DeclareMathSymbol{\ssfPi}{0}{ssfletters}{'005}
506: \DeclareMathSymbol{\bsfSigma}{0}{bsfletters}{'006}
507: \DeclareMathSymbol{\ssfSigma}{0}{ssfletters}{'006}
508: \DeclareMathSymbol{\bsfUpsilon}{0}{bsfletters}{'007}
509: \DeclareMathSymbol{\ssfUpsilon}{0}{ssfletters}{'007}
510: \DeclareMathSymbol{\bsfPhi}{0}{bsfletters}{'010}
511: \DeclareMathSymbol{\ssfPhi}{0}{ssfletters}{'010}
512: \DeclareMathSymbol{\bsfPsi}{0}{bsfletters}{'011}
513: \DeclareMathSymbol{\ssfPsi}{0}{ssfletters}{'011}
514: \DeclareMathSymbol{\bsfOmega}{0}{bsfletters}{'012}
515: \DeclareMathSymbol{\ssfOmega}{0}{ssfletters}{'012}
516:
517: \newcommand{\fxfm}{\stackrel{\mathcal{F}}{\longleftrightarrow}}
518: \newcommand{\lxfm}{\stackrel{\mathcal{L}}{\longleftrightarrow}}
519: \newcommand{\zxfm}{\stackrel{\mathcal{Z}}{\longleftrightarrow}}
520:
521: \DeclareMathOperator*{\gltop}{\gtreqless}
522: \newcommand{\glt}{\;\gltop^{\Hh=\svH_1}_{\Hh=\svH_0}\;}
523: \newcommand{\glty}{\;\gltop^{\Hh(\svy)=\svH_1}_{\Hh(\svy)=\svH_0}\;}
524: \newcommand{\gltby}{\;\gltop^{\Hh(\svby)=\svH_1}_{\Hh(\svby)=\svH_0}\;}
525: \DeclareMathOperator*{\geltop}{\genfrac{}{}{0pt}{}{\ge}{<}}
526: \newcommand{\gelty}{\;\geltop^{\Hh(\svy)=\svH_1}_{\Hh(\svy)=\svH_0}\;}
527: \newcommand{\geltby}{\;\geltop^{\Hh(\svby)=\svH_1}_{\Hh(\svby)=\svH_0}\;}
528: \renewcommand{\pe}{\Pr(e)}
529: \renewcommand{\defeq}{\triangleq}
530: \newcommand{\like}{\svlike}
531: \newcommand{\rvlike}{\mathssf{L}}
532: \newcommand{\sst}{\cl}
533: \newcommand{\svlike}{L}
534: \newcommand{\llike}{\rvllike}
535: \newcommand{\rvllike}{\cl}
536: \newcommand{\svllike}{l}
537: \newcommand{\bllike}{\rvbllike}
538: \newcommand{\rvbllike}{\boldsymbol{\cl}}
539: \newcommand{\svbllike}{\mathbf{l}}
540: \newcommand{\Qb}{\overline{Q}}
541: \renewcommand{\comb}[2]{\binom{#1}{#2}}
542:
543:
544: %% Random/sample variable/vector declarations. Please add in alphabetical
545: %% order. First section is for capitals. Second for lower case.
546: % Capitals
547: \newcommand{\rvA}{{\mathssf{A}}} % A
548: \newcommand{\svA}{A}
549: \newcommand{\rvbA}{{\mathbsf{A}}}
550: \newcommand{\svbA}{{\mathbf{A}}}
551: \newcommand{\rvC}{{\mathssf{C}}} % A
552: \newcommand{\svC}{C}
553: \newcommand{\rvbC}{{\mathbsf{C}}}
554: \newcommand{\svbC}{{\mathbf{C}}}
555: \newcommand{\rvD}{{\mathssf{D}}} % D
556: \newcommand{\svD}{D}
557: \newcommand{\rvbD}{{\mathbsf{D}}}
558: \newcommand{\svbD}{{\mathbf{D}}}
559: \newcommand{\rvFh}{{\hat{\mathssf{F}}}} % F
560: \newcommand{\rvF}{{\mathssf{F}}}
561: \newcommand{\rvHh}{{\hat{\mathssf{H}}}} % H
562: \newcommand{\rvH}{{\mathssf{H}}}
563: \newcommand{\svH}{H}
564: \newcommand{\rvI}{{\mathssf{I}}} % I
565: \newcommand{\svHh}{{\hat{\svH}}}
566: \newcommand{\rvL}{{\mathssf{L}}} % L
567: \newcommand{\svL}{L}
568: \newcommand{\rvN}{{\mathssf{N}}} % N
569: \newcommand{\rvP}{{\mathssf{P}}} % P
570: \newcommand{\rvR}{{\mathssf{R}}} % R
571: \newcommand{\rvRh}{{\hat{\rvR}}}
572: \newcommand{\rvS}{{\mathssf{S}}} % S
573: \newcommand{\rvSh}{{\hat{\rvS}}}
574: \newcommand{\rvW}{{\mathssf{W}}} % W
575: \newcommand{\rvX}{{\mathssf{X}}} % X, random variable
576: \newcommand{\svX}{X}
577: \newcommand{\rvXt}{{\tilde{\rvX}}}
578: \newcommand{\rvY}{{\mathssf{Y}}} % Y
579: \newcommand{\rvZ}{{\mathssf{Z}}} % Z
580:
581: \newcommand{\rva}{{\mathssf{a}}} % a
582: \newcommand{\rvah}{{\hat{\rva}}}
583: \newcommand{\sva}{a}
584: \newcommand{\svah}{{\hat{\sva}}}
585: \newcommand{\rvba}{{\mathbsf{a}}}
586: \newcommand{\svba}{{\mathbf{a}}}
587: \newcommand{\rvb}{{\mathssf{b}}} % b
588: \newcommand{\rvc}{{\mathssf{c}}} % c
589: \newcommand{\rvch}{{\hat{\rvc}}}
590: \newcommand{\svc}{c}
591: \newcommand{\svch}{{\hat{\svc}}}
592: \newcommand{\rvbc}{{\mathbsf{c}}}
593: \newcommand{\svbc}{{\mathbf{c}}}
594:
595: \newcommand{\rvd}{{\mathssf{d}}} % d
596: \newcommand{\rvdh}{{\hat{\rvd}}}
597: \newcommand{\svd}{d}
598: \newcommand{\svdh}{{\hat{\svd}}}
599: \newcommand{\rvbd}{{\mathbsf{d}}}
600: \newcommand{\svbd}{{\mathbf{d}}}
601:
602:
603:
604: \newcommand{\rve}{{\mathssf{e}}} % e
605: \newcommand{\sve}{e}
606: \newcommand{\rvbe}{{\mathbsf{e}}}
607: \newcommand{\svbe}{{\mathbf{e}}}
608: \newcommand{\rvf}{{\mathssf{f}}} % f
609: \newcommand{\svf}{f}
610: \newcommand{\rvbf}{{\mathbsf{f}}}
611: \newcommand{\svbf}{{\mathbf{f}}}
612: \newcommand{\rvg}{{\mathssf{g}}} % g
613: \newcommand{\svg}{g}
614: \newcommand{\rvbg}{{\mathbsf{g}}}
615: \newcommand{\svbg}{{\mathbf{g}}}
616: \newcommand{\rvh}{{\mathssf{h}}} % h
617: \newcommand{\svh}{h}
618: \newcommand{\rvbh}{{\mathbsf{h}}}
619: \newcommand{\svbh}{{\mathbf{h}}}
620: \newcommand{\rvk}{{\mathssf{k}}} % k
621: \newcommand{\svk}{k}
622: \newcommand{\rvm}{{\mathssf{m}}} % m
623: \newcommand{\svm}{m}
624: \newcommand{\rvbm}{{\mathbsf{m}}}
625: \newcommand{\svbm}{{\mathbf{m}}}
626: \newcommand{\rvn}{{\mathssf{n}}} % n
627: \newcommand{\svn}{n}
628: \newcommand{\rvbn}{{\mathbsf{n}}}
629: \newcommand{\svbn}{{\mathbf{n}}}
630: \newcommand{\rvp}{{\mathssf{p}}} % p
631: \newcommand{\svp}{p}
632: \newcommand{\rvq}{{\mathssf{q}}} % q
633: \newcommand{\svq}{q}
634: \newcommand{\rvr}{{\mathssf{r}}} % r
635: \newcommand{\rvbr}{{\mathbsf{r}}}
636: \newcommand{\svr}{r}
637: \newcommand{\rvs}{{\mathssf{s}}} % s
638: \newcommand{\rvbs}{{\mathbsf{s}}}
639: \newcommand{\svs}{s}
640: \newcommand{\svbs}{{\mathbf{s}}}
641: \newcommand{\rvt}{{\mathssf{t}}} % t
642: \newcommand{\rvbt}{{\mathbsf{t}}}
643: \newcommand{\svt}{t}
644: \newcommand{\svbt}{{\mathbf{t}}}
645: \newcommand{\rvu}{{\mathssf{u}}} % u
646: \newcommand{\svu}{u}
647: \newcommand{\svuh}{{\hat{\svu}}}
648: \newcommand{\rvbu}{{\mathbsf{u}}}
649: \newcommand{\svbu}{{\mathbf{u}}}
650: \newcommand{\rvv}{{\mathssf{v}}} % v
651: \newcommand{\svv}{v}
652: \newcommand{\svvh}{{\hat{\svv}}}
653: \newcommand{\rvbv}{{\mathbsf{v}}}
654: \newcommand{\svbv}{{\mathbf{v}}}
655: \newcommand{\rvvh}{{\hat{\rvv}}}
656: \newcommand{\rvw}{{\mathssf{w}}} % w
657: \newcommand{\svw}{w}
658: \newcommand{\rvwh}{{\hat{\rvw}}}
659: \newcommand{\svwh}{{\hat{\svw}}}
660: \newcommand{\rvbw}{{\mathbsf{w}}}
661: \newcommand{\svbw}{{\mathbf{w}}}
662: \newcommand{\rvx}{{\mathssf{x}}} % x, random variable
663: \newcommand{\rvxh}{{\hat{\rvx}}}
664: \newcommand{\rvxt}{{\tilde{\rvx}}}
665: \newcommand{\svx}{x} % sample value
666: \newcommand{\svxh}{{\hat{\svx}}}
667: \newcommand{\svxt}{{\tilde{\svx}}}
668: \newcommand{\rvbx}{{\mathbsf{x}}}
669: \newcommand{\rvbxh}{{\hat{\rvbx}}}
670: \newcommand{\rvbxt}{{\tilde{\rvbx}}}
671: \newcommand{\svbx}{{\mathbf{\svx}}}
672: \newcommand{\svbxt}{{\tilde{\svbx}}}
673: \newcommand{\svbxh}{{\hat{\mathbf{x}}}}
674: \newcommand{\rvy}{{\mathssf{y}}} % y
675: \newcommand{\rvyh}{{\hat{\mathssf{y}}}}
676: \newcommand{\svy}{y}
677: \newcommand{\rvyt}{{\tilde{\rvy}}}
678: \newcommand{\svyt}{{\tilde{\svy}}}
679: \newcommand{\svyh}{{\hat{\svy}}}
680: \newcommand{\rvby}{{\mathbsf{y}}}
681: \newcommand{\rvbyt}{{\tilde{\rvby}}}
682: \newcommand{\svby}{{\mathbf{y}}}
683: \newcommand{\svbyt}{{\tilde{\svby}}}
684: \newcommand{\rvz}{{\mathssf{z}}} % z
685: \newcommand{\rvzh}{{\hat{\rvz}}}
686: \newcommand{\rvzt}{{\tilde{\rvz}}}
687: \newcommand{\svz}{z}
688: \newcommand{\svzh}{{\hat{\svz}}}
689: \newcommand{\rvbz}{{\mathbsf{z}}}
690: \newcommand{\svbz}{{\mathbf{z}}}
691:
692: % Handle uppercase Greek differently
693: \newcommand{\rvTh}{\ssfTheta}
694: \newcommand{\svTh}{\Theta}
695: \newcommand{\rvbTh}{\bsfTheta}
696: \newcommand{\svbTh}{\boldsymbol{\Theta}}
697: \newcommand{\rvPh}{\ssfPhi}
698: \newcommand{\svPh}{\Phi}
699: \newcommand{\rvbPh}{\bsfPhi}
700: \newcommand{\svbPh}{\boldsymbol{\Phi}}
701:
702: \newcommand{\ddx}{\frac{\p}{\p \svx}}
703: \newcommand{\ddbx}{\frac{\p}{\p\svbx}}
704:
705: \usepackage{fullpage}
706:
707:
708: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
709: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
710: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
711: %
712: % NEW COMMANDS
713: %
714: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
715: %
716: % RANDOM VARIABLES AND VECTORS
717: %
718: % Random variable: \rvx
719: % Random vector (assume length-n): \rvbx
720: % Sample variable: \svx
721: % Sample vector: \svbx
722: %
723: % Tilde (e.g., \svbxtil), Bar, and BBar
724: % used to denote other sequences, \svbx and \svby
725: % reserved for actual source sequences
726: %
727: % Hat used for estimate
728: %
729:
730: \newcommand{\rvxtil}{\tilde{\rvx}}
731: \newcommand{\svxtil}{\tilde{\svx}}
732: \newcommand{\rvxhat}{\hat{\rvx}}
733: \newcommand{\svxhat}{\hat{\svx}}
734: \newcommand{\rvxBar}{\bar{\rvx}}
735: \newcommand{\svxBar}{\bar{\svx}}
736: \newcommand{\rvxBBar}{\bar{\bar{\rvx}}}
737: \newcommand{\svxBBar}{\bar{\bar{\svx}}}
738:
739: \newcommand{\rvbxtil}{\tilde{\rvbx}}
740: \newcommand{\svbxtil}{\tilde{\svbx}}
741: \newcommand{\rvbxhat}{\hat{\rvbx}}
742: \newcommand{\svbxhat}{\hat{\svbx}}
743: \newcommand{\rvbxBar}{\bar{\rvbx}}
744: \newcommand{\svbxBar}{\bar{\svbx}}
745: \newcommand{\rvbxBBar}{\bar{\bar{\rvbx}}}
746: \newcommand{\svbxBBar}{\bar{\bar{\svbx}}}
747:
748: \newcommand{\rvytil}{\tilde{\rvy}}
749: \newcommand{\svytil}{\tilde{\svy}}
750: \newcommand{\rvyhat}{\hat{\rvy}}
751: \newcommand{\svyhat}{\hat{\svy}}
752: \newcommand{\rvyBar}{\bar{\rvy}}
753: \newcommand{\svyBar}{\bar{\svy}}
754: \newcommand{\rvyBBar}{\bar{\bar{\rvy}}}
755: \newcommand{\svyBBar}{\bar{\bar{\svy}}}
756:
757: \newcommand{\rvbytil}{\tilde{\rvby}}
758: \newcommand{\svbytil}{\tilde{\svby}}
759: \newcommand{\rvbyhat}{\hat{\rvby}}
760: \newcommand{\svbyhat}{\hat{\svby}}
761: \newcommand{\rvbyBar}{\bar{\rvby}}
762: \newcommand{\svbyBar}{\bar{\svby}}
763: \newcommand{\rvbyBBar}{\bar{\bar{\rvby}}}
764: \newcommand{\svbyBBar}{\bar{\bar{\svby}}}
765:
766: % The sample variable that occurs if the
767: % suffixes are not a-typical
768: \newcommand{\sveBar}{\bar{\sve}}
769:
770: % A random constant
771: \newcommand{\rvK}{{\mathssf{K}}}
772: \newcommand{\svK}{{K}}
773:
774:
775: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
776: %
777: % ACTUAL SOURCE DISTRIBUTIONS (NOT NECESSARILY TYPES)
778: %
779: \newcommand{\PxyRV}{p_{\rvx \rvy}}
780: \newcommand{\PxCondyRV}{p_{\rvx|\rvy}}
781: \newcommand{\PyCondxRV}{p_{\rvy|\rvx}}
782: \newcommand{\PxRV}{p_{\rvx}}
783: \newcommand{\PyRV}{p_{\rvy}}
784:
785: \newcommand{\pxySV}{p_{\svx \svy}}
786: \newcommand{\pxCondySV}{p_{\svx | \svy}}
787:
788: \newcommand{\qxySV}{q_{\svx \svy}}
789: \newcommand{\qxyCondSV}{q_{\svx | \svy}}
790: \newcommand{\qyxCondSV}{q_{\svy | \svx}}
791: \newcommand{\qySV}{q_{\svy}}
792:
793: \newcommand{\qxyBarSV}{\bar{q}_{\svx \svy}}
794: \newcommand{\qxyBarCondSV}{\bar{q}_{\svx | \svy}}
795: \newcommand{\qyxBarCondSV}{\bar{q}_{\svy | \svx}}
796: \newcommand{\qyBarSV}{\bar{q}_{\svy}}
797: \newcommand{\qxSV}{q_{\svx}}
798: \newcommand{\qxBarSV}{\bar{q}_{\svx}}
799:
800: \newcommand{\PSVast}{p^{\ast}}
801: \newcommand{\PxSVast}{p_{\svx}^{\ast}}
802: \newcommand{\PySVast}{p_{\svy}^{\ast}}
803: \newcommand{\PxySVast}{p_{\svx \svy}^{\ast}}
804: \newcommand{\PxySVastast}{p_{\svx \svy}^{\ast \ast}}
805: \newcommand{\PxCondySVast}{p_{\svx|\svy}^{\ast}}
806: \newcommand{\PyCondxSVastast}{p_{\svy|\svx}^{\ast \ast}}
807:
808: % a conditional probability that comes up often
809: \newcommand{\srcCond}{\; p_{\rvbx|\rve_1}(\svbx|\sveBar)}
810: \newcommand{\srcProb}{\; p_{\rvbx}(\svbx)}
811:
812: \newcommand{\src}[2]{\; p_{\rvx_{#1}^{#2}}(\svx_{#1}^{#2})}
813:
814:
815: \newcommand{\srcL}{\; p_{\rvx^l}(\svx^l)}
816: \newcommand{\srcKL}{\; p_{\rvx_{l+1}^k}(\svx_{l+1}^k)}
817: \newcommand{\srcNK}{\; p_{\rvx_{k+1}^n}(\svx_{k+1}^n)}
818: \newcommand{\srcNL}{\; p_{\rvx_{l+1}^n}(\svx_{l+1}^n)}
819:
820: \newcommand{\srcYKL}{\; p_{\rvy_{l+1}^k}(\svy_{l+1}^k)}
821: \newcommand{\srcYNK}{\; p_{\rvy_{k+1}^n}(\svy_{k+1}^n)}
822:
823: % a conditional probability that comes up often
824: \newcommand{\jointSrcCond}{\; p_{\rvbx,\rvby|\rve_1}(\svbx,\svby|\sveBar)}
825: \newcommand{\jointSrcProb}{\; p_{\rvbx,\rvby}(\svbx,\svby)}
826: \newcommand{\jointSrcL}{\; p_{\rvx^l,\rvy^l}(\svx^l,\svy^l)}
827: \newcommand{\jointSrcKL}{\; p_{\rvx_{l+1}^k,\rvy_{l+1}^k}
828: (\svx_{l+1}^k,\svy_{l+1}^k)}
829: \newcommand{\jointSrcNK}{\; p_{\rvx_{k+1}^n,\rvy_{k+1}^n}
830: (\svx_{k+1}^n,\svy_{k+1}^n)}
831:
832: \newcommand{\condSrcNL}{\; p_{\rvx_{l+1}^n | \rvy_{l+1}^n}
833: (\svx_{l+1}^n|\svy_{l+1}^n)}
834:
835: \newcommand{\jointSource}[4]{\; p_{\rvx_{#1}^{#2}, \rvy_{#3}^{#4}}
836: (\svx_{#1}^{#2}, \svy_{#3}^{#4})}
837: \newcommand{\jointsource}[4]{\; p_{\rvx_{#1}^{#2}, \rvy_{#3}^{#4}}}
838:
839: %%%%%%%%%%%%%%%%%%%%%%%%%%%%
840: %
841: % TYPE DEFINITIONS
842: %
843: % Type class
844: \newcommand{\tclass}{\mathcal{T}}
845:
846: %
847: % Always use capitals to denote types
848: %
849: % Use these to denote types of length-n
850: %
851: \newcommand{\PxySV}{P_{\svx \svy}}
852: \newcommand{\PxCondySV}{P_{\svx|\svy}}
853: \newcommand{\PyCondxSV}{P_{\svy|\svx}}
854: \newcommand{\PxSV}{P_{\svx}}
855: \newcommand{\PySV}{P_{\svy}}
856:
857: \newcommand{\Px}{\PxSV}
858: %
859: % Use these to indicate the (joint, conditional) type
860: % of some subsequence, which one should be understood
861: % from the context
862: %
863: \newcommand{\PxySVNL}{P_{\svx \svy}^{n-l}}
864: \newcommand{\PxCondySVNL}{P_{\svx|\svy}^{n-l}}
865: \newcommand{\PyCondxSVNL}{P_{\svy|\svx}^{n-l}}
866: \newcommand{\PxSVNL}{P_{\svx}^{n-l}}
867: \newcommand{\PySVNL}{P_{\svy}^{n-l}}
868:
869: \newcommand{\Ptil}{\tilde{P}}
870: \newcommand{\Wtil}{\tilde{W}}
871:
872: \newcommand{\PL}{P^{l}}
873: \newcommand{\PKL}{P^{k-l}}
874: \newcommand{\PNL}{P^{n-l}}
875: \newcommand{\PNK}{P^{n-k}}
876: \newcommand{\VL}{V^{l}}
877: \newcommand{\VKL}{V^{k-l}}
878: \newcommand{\VNL}{V^{n-l}}
879: \newcommand{\VNK}{V^{n-k}}
880:
881: %
882: % Type definitions for the suffixes of possibly
883: % misleading sequences
884: %
885: \newcommand{\PtilL}{\tilde{P}^{l}}
886: \newcommand{\PtilKL}{\tilde{P}^{k-l}}
887: \newcommand{\PtilNL}{\tilde{P}^{n-l}}
888: \newcommand{\PtilNK}{\tilde{P}^{n-k}}
889: \newcommand{\VtilL}{\tilde{V}^{l}}
890: \newcommand{\VtilNL}{\tilde{V}^{n-l}}
891: \newcommand{\VtilNK}{\tilde{V}^{n-k}}
892: \newcommand{\VtilKL}{\tilde{V}^{k-l}}
893:
894: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
895: %
896: % ASSORTED OTHER USEFUL MACROS
897: %
898: %
899: % Decoding delay
900: \newcommand{\delay}{\Delta}
901:
902: % Error exponents
903: \newcommand{\expML}{E_{ML}}
904: \newcommand{\expUniv}{E_{UN}}
905: \newcommand{\expMLSI}{E_{ML, SI}}
906: \newcommand{\expUnivSI}{E_{UN, SI}}
907:
908: % Prefix function
909: \newcommand{\prefix}{\pi}
910:
911: % Binning functions
912: \newcommand{\bin}{\cB}
913: \newcommand{\binX}{\cB_x}
914: \newcommand{\binY}{\cB_y}
915:
916: % Rates
917: \newcommand{\Rx}{R_x}
918: \newcommand{\Ry}{R_y}
919: \newcommand{\Rent}{\Rx}
920:
921: % shorthand for a weighted-averaging function
922: \newcommand{\avg}{\mbox{avg}}
923:
924: % Set of suffix-typical sequences
925: \newcommand{\typSet}{\cA_{n, \Delta}}
926:
927: % Scoring functions
928: \newcommand{\score}{S}
929: \newcommand{\scoreXYpre}{\score(\svbx, \svby| \svbxtil, \svbytil)}
930: \newcommand{\scoreXYtilpre}{\score(\svbxtil, \svbytil|\svbx, \svby)}
931: \newcommand{\scorePV}{\score(\PNL, \PNK, \VNL)}
932: \newcommand{\scorePVtil}{\score(\PtilNL, \PtilNK, \VtilNL)}
933:
934: \newcommand{\minEnt}{\score(\PNK, \PKL, \VNK, \VKL)}
935: \newcommand{\minEntTil}{\score(\PtilNK, \PKL, \VtilNK, \VtilKL)}
936:
937: % Indicator function
938: \newcommand{\ind}{\emph{1}}
939:
940: % Other
941: \newcommand{\kast}{k^{\ast}(l)}
942: \newcommand{\BL}{N} %% block-length for block coding
943: \newcommand{\pf}{{\em Proof: }}
944:
945: % Puts in a header with info for conference / submission info
946: %
947: %\newcommand{\status}{\centerline{Submitted to {\em IEEE Int.\ Symp\
948: %Inform.\ Theory}}}
949: %
950: \newcommand{\status}{\centerline{Submitted to IT Transactions}}
951:
952: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
953: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
954: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
955:
956:
957:
958:
959: \begin{document}
960:
961: % paper title
962: \title{Lossless coding for distributed streaming sources\footnote{This
963: material was presented in part at the IEEE Int Symp Inform Theory,
964: Adelaide, Australia, Sept 2005.}}
965:
966: \author{Cheng Chang\footnote{Department of Electrical Engineering and
967: Computer Science, University of California Berkeley, Berkeley, CA
968: 94720}, Stark C.~Draper\footnote{Mitsubishi Electric Research Labs
969: in Cambridge, MA. This work was performed while he was a postdoc
970: at
971: Wireless Foundations in the University of California Berkeley.}, and Anant Sahai\footnote{Wireless Foundations, Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA 94720} \\
972: {\small \texttt cchang@eecs.berkeley.edu, sdraper@eecs.berkeley.edu,
973: sahai@eecs.berkeley.edu} }
974:
975: \maketitle
976:
977: \begin{abstract} Distributed source coding is traditionally viewed in
978: the block coding context --- all the source symbols are known in
979: advance at the encoders. This paper instead considers a streaming
980: setting in which iid source symbol pairs are revealed to the
981: separate encoders in real time and need to be reconstructed at the
982: decoder with some tolerable end-to-end delay using finite rate
983: noiseless channels. A sequential random binning argument is used to
984: derive a lower bound on the error exponent with delay and show that
985: both ML decoding and universal decoding achieve the same positive
986: error exponents inside the traditional Slepian-Wolf rate region. The
987: error events are different from the block-coding error events and
988: give rise to slightly different exponents. Because the sequential
989: random binning scheme is also universal over delays, the resulting
990: code eventually reconstructs every source symbol correctly with
991: probability $1$.
992: \end{abstract}
993:
994: \IEEEpeerreviewmaketitle
995:
996:
997: \section{Introduction}
998:
999: Traditionally, ``lossless'' coding is considered using two distinct
1000: paradigms: fixed block coding and variable-length
1001: coding\footnote{There are actually four different traditional cases:
1002: fixed to fixed, fixed to variable, variable to fixed, and variable
1003: to variable. However, the last three all achieve a probability of
1004: error of zero and so we consider them together.}. As classically
1005: understood, both consider that the source-symbols are known in advance
1006: at the encoder and that they must be mapped into a string of bits
1007: decoded by the receiver. Fixed-block coding accepts a small
1008: probability of error and constrains the length of the bit-string,
1009: while variable-length encoding constrains only the {\em expected}
1010: length of the bit-string in exchange for keeping the probability of
1011: error at zero. In the point-to-point setting, both paradigms apply
1012: generically. In contrast, distributed source coding, has traditionally
1013: been explored within the fixed block context. In
1014: \cite{slepianWolf:73}, Slepian and Wolf even asked:
1015: \begin{quotation}
1016: What is the theory of variable-length encodings for
1017: correlated sources?
1018: \end{quotation}
1019:
1020: In the classical context of source realizations known entirely in
1021: advance, the answer is simple: there is no nontrivial sense of
1022: variable-length encoding that applies generically while still being
1023: interesting.\footnote{At least at sum rates close to the joint source
1024: entropy rate. If the rates of communication are high enough, e.g.,
1025: equaling the log of the cardinalities of the source alphabets,
1026: zero-error communication is possible.} This is easiest to see by
1027: example (Illustrated in Figure~\ref{fig.SWcoding} and revisited as
1028: Example 2 in Section~\ref{sec.numerical}). Suppose that the first
1029: encoder observes the random vector $\rvbx$, which consists of a
1030: sequence of $\BL$ iid uniform binary random variables. Suppose
1031: further that the second encoder observes $\rvby$ which is related to
1032: $\rvbx$ via a memoryless binary symmetric channel with crossover
1033: probability $\rho < 0.5$. The Slepian-Wolf sum-rate bound is $H(\rvx,
1034: \rvy) = 1 + H(\rho) < 2 = H(\rvx) + H(\rvy)$. But since the
1035: individual encoders only see uniformly distributed binary sources,
1036: they do not know when the sources are behaving jointly atypically.
1037: Therefore, they have no basis on which to adjust their encoding rates
1038: to combat joint atypicality. Since all pairs are possible when finite
1039: blocklengths are considered, the individual encoders must use distinct
1040: bit-strings for each of them. Since the expected length depends only
1041: on the uniform marginal distributions, this means that the expected
1042: length must be at least $N$. Thus, variable-length approaches do not,
1043: in general\footnote{One should note that, in analogy to zero-error
1044: channel coding, there are special (non-generic) cases where
1045: zero-error Slepian-Wolf coding is possible~\cite{koulgiEtAl:03}
1046: since certain symbol pairs cannot occur.}, lead to zero-error
1047: Slepian-Wolf codes for interesting rate-points.
1048:
1049: \setlength{\unitlength}{1mm}
1050: \begin{figure}
1051: \begin{picture}(140,40)
1052: \put(50,0){\line(1,0){20}} \put(50,10){\line(1,0){20}}
1053: \put(50,0){\line(0,1){10}} \put(70, 0){\line(0,1){10}}
1054: \put(52,4){Encoder $\svy$}
1055: \put(50,30){\line(1,0){20}} \put(50,40){\line(1,0){20}}
1056: \put(50,30){\line(0,1){10}} \put(70, 30){\line(0,1){10}}
1057: \put(52,34){Encoder $\svx$}
1058: \put(90,15){\line(1,0){20}} \put(90,25){\line(1,0){20}}
1059: \put(90,15){\line(0,1){10}} \put(110, 15){\line(0,1){10}}
1060: \put(94,19){Decoder }
1061: \put(70, 5){\vector(3,2){20}}
1062: \put(77, 5){$R_x$}
1063: \put(70, 35){\vector(3,-2){20}}
1064: \put(77, 35){$R_y$}
1065: \put(40, 5){\vector(1,0){10}} \put(40, 35){\vector(1,0){10}}
1066: \put(110, 20){\vector(1,0){10}}
1067: \put(125,24){ $\hat{\rvx}_1,\hat{\rvx}_2, \ldots, \hat{\rvx}_{\BL}$}
1068: \put(125,16){$\hat{\rvy}_1,\hat{\rvy}_2, \ldots, \hat{\rvy}_{\BL}$}
1069: \put(18, 34){ ${\rvx}_1,{\rvx}_2, \ldots, \rvx_{\BL}$}
1070: \put(18,4){ ${\rvy}_1,{\rvy}_2,\ldots \rvy_{\BL}$}
1071: \put(15, 20){ $(\rvx_i,\rvy_i)\sim p_{\rvx\rvy}$}
1072: \put(27,24){\vector(0,1){8}} \put(27,16){\vector(0,-1){8}}
1073: \end{picture}
1074: \caption[]{Slepian-Wolf distributed encoding and joint decoding of a pair of correlated sources.}
1075: \label{fig.SWcoding}
1076: \end{figure}
1077:
1078: Another view of variable-length coding is as a tool that enables us to
1079: achieve meaningful compression despite not knowing the underlying
1080: probability distribution\footnote{In the point-to-point case, this is
1081: very closely related to achieving a zero-error probability. The same
1082: string can be an atypical realization of one source model while
1083: being a typical realization of another source. Encoding all the
1084: typical sequences correctly without knowing the underlying model
1085: requires getting all the possible sequences correctly for any
1086: specific model.} and allowing the rate used to adapt to the source.
1087: If there is a low-rate, but reliable\footnote{It is clear that our
1088: techniques from \cite{sahaiSimsek:04, draperSahai:06} can also be
1089: adapted to make the system of \cite{draperAllerton:04} work using
1090: only noisy feedback channels.}, feedback link available from the
1091: decoder to the two separate encoders, then this sense of
1092: variable-length Slepian-Wolf coding is possible.
1093: \cite{draperAllerton:04} gives a fixed-to-variable scheme in which the
1094: stopping-time is chosen at the decoder and communicated back to the
1095: encoders over a low-rate feedback link. The goal of
1096: \cite{draperAllerton:04} is not achieving a truly zero probability of
1097: error --- rather it is willing to accept a very small probability of
1098: error in exchange for using a rate that is as small as possible.
1099:
1100: To answer the question posed by Slepian and Wolf in the more classical
1101: sense, we instead want to aim for a probability of error that goes to
1102: zero for every source symbol, but at the cost of a variable delay. To
1103: do this, we propose stepping back and eliminating the modeling
1104: assumption of encoders having access to the entire source realization
1105: in advance. We argue that a ``streaming setting'' is required to
1106: discern the system-level analog to variable-length source coding in
1107: the distributed context. The streaming setting abstracts sources that
1108: are embedded in time as well as the fact that all physically
1109: realizable encoders/decoders must obey some form of causality. Thus
1110: ``rate'' is not just measured in bits per source symbol but in both
1111: source symbols per second and bits per second. The source-rate
1112: (symbols per second) is specified as a part of the problem while the
1113: bit-rate (bits per second) is something that we get to choose. From an
1114: engineering perspective, three desirable qualities\footnote{Of course,
1115: ``implementation complexity'' forms a fourth and very important
1116: consideration, but we will be ignoring that aspect of the problem.}
1117: are:
1118: \begin{itemize}
1119: \item Using a low rate bit-pipe(s)
1120: \item Low end-to-end latency
1121: \item Low probability of error
1122: \end{itemize}
1123: The theory of source-coding should tell us the tradeoffs between these
1124: three desiderata. In addition, we will be interested in to what extent
1125: a streaming code can be made ``universal'' over a class of probability
1126: distributions.
1127:
1128: In the point-to-point streaming setting, regardless of whether block
1129: or variable-length compression is used, the traditional initial step
1130: is the same: group symbols into source blocks. To compress the data
1131: blocks, either use a fixed-rate block code, or a variable-length code.
1132: The resulting encoding is then enqueued for transmission across the
1133: bit-pipe. As long as the source entropy rate is below the data-rate,
1134: the queue will remain stable. When block coding is used for
1135: compression, there is a constant delay through the system, and
1136: atypical source blocks are received in error. The probability of error
1137: is fixed at the system's design-time and so is the end-to-end delay.
1138:
1139: In contrast, variable-length coding induces a variable system delay.
1140: The more unlikely the source blocks, the longer the delay experienced
1141: at run-time. Thus, while {\em asymptotically} there are no errors when
1142: variable-length source codes are used (assuming an infinite buffer
1143: size), the delay till a given symbol can be decoded depends on the
1144: random source realization. Because atypical source realizations are
1145: large deviation events, the probability that some source symbol cannot
1146: be reconstructed $\delay$ samples after it enters the encoder decays
1147: exponentially\footnote{In \cite{Chang:06}, we show that variable
1148: length codes used in this manner actually achieve the best possible
1149: error exponent with delay. This is also related to the analysis of
1150: \cite{jelinek:68}.} in $\delay$. The choice of acceptable end-to-end
1151: delay is left to the receiver/application.
1152:
1153: We show that this type of reliability {\em can} be achieved in a
1154: generic distributed coding context --- the probability of error goes
1155: to zero with end-to-end delay and the choice of the acceptable delay
1156: is entirely up to the decoder. Essentially, every source symbol is
1157: recovered correctly eventually with probability\footnote{The secret
1158: here is that we are considering a probability measure over infinite
1159: sequences. While all pairs of finite strings may be possible, most
1160: pairs of infinite strings collectively have probability zero.} $1$.
1161: The only difference is that unlike the point-to-point case, the
1162: decoder does not necessarily know when the estimate for the symbol has
1163: converged to its final value. Furthermore, just as in the
1164: point-to-point setting\footnote{Sliding-window Lempel-Ziv compression
1165: is one example where data is naturally encoded sequentially. It is
1166: also universal over sources.}, both the encoding and decoding can be
1167: made universal.
1168:
1169: In this paper, we formally define a streaming Slepian-Wolf code, and
1170: develop coding strategies both for situations when source statistics
1171: are known and when they are not. The new tool is a sequential binning
1172: argument that parallels the tree-coding arguments used to study
1173: convolutional codes. We characterize the performance of
1174: the streaming schemes through an error exponent analysis and
1175: demonstrate that the exponents are equal regardless of whether the
1176: system is informed of the source statistics (in which case we use
1177: maximum likelihood decoding) or not (in which case we use universal
1178: decoding). The universal decoder we design for the streaming problem
1179: is somewhat different from those familiar from the block coding
1180: literature, as are the nature of the error exponents.
1181:
1182: \subsection{Potential applications and practical motivation}
1183:
1184: In addition to our core interest in answering some basic questions
1185: about Slepian-Wolf coding, our formulation is also motivated by the
1186: diverse emerging application areas for distributed source coding.
1187: Media (e.g. video-conference) sources naturally have a streaming
1188: character. Consequently, we are motivated to explore what sort of
1189: streaming Slepian-Wolf technique matches naturally to such
1190: situations.\footnote{A secondary aspect in some multimedia settings is
1191: a natural multi-scale nature to the source --- the high order bits
1192: are more important than the low order bits. To the extent that the
1193: high order bits can be made ``early'' and the low-order bits can be
1194: made ``late'', our constructions also naturally give more protection
1195: to the early bits as compared to the later ones. While this
1196: interpretation might eventually be important in practice, it is a
1197: bit questionable within the simplified model this paper considers.}
1198:
1199: \subsection{Outline}
1200:
1201: Section~\ref{sec.notation} summarizes the notation used in the paper.
1202: Section~\ref{sec.mainresults} reviews the classical block-coding error
1203: exponent results for Slepian-Wolf source coding and then we state the
1204: main results of this paper: sequential error exponents for
1205: Slepian-Wolf source coding. Section~\ref{sec.numerical} presents a
1206: numeric study of two example sources. We observe that the sequential
1207: error exponent is often the same as the block coding error exponent.
1208: Sections \ref{sec.entropy}, \ref{sec.incDecSI} and \ref{sec.SW} prove
1209: the theorems in Section~\ref{sec.mainresults}. We start with
1210: sequential source coding for single sources in \ref{sec.entropy}.
1211: This is the simplest case but it provides insights to the nature of
1212: sequential source coding problem and sequential error events. We show
1213: that the sequential error exponent is the same as the random block
1214: source coding error exponent. Section~\ref{sec.incDecSI} moves on to
1215: the case with decoder side-information. Finally, Section~\ref{sec.SW}
1216: presents the proof of the main result of the paper. We derive the
1217: sequential error exponent of distributed source coding for correlated
1218: sources. This error exponent strictly positive everywhere inside the
1219: achievable rate region of \cite{slepianWolf:73}. For all these three
1220: scenarios in Sections \ref{sec.entropy}, \ref{sec.incDecSI} and
1221: \ref{sec.SW}, both ML and universal decoding rules are studied. The
1222: appendix shows that the resulting error exponents are indeed the same.
1223:
1224:
1225:
1226: \section{Notation}\label{sec.notation}
1227:
1228:
1229: We use serifed-fonts, e.g., $\svx$ to indicate sample values, and
1230: sans-serif, e.g., $\rvx$, to indicate random variables. Bolded fonts
1231: are reserved to indicate sample or random vectors, e.g., $\svbx =
1232: \svx^n$ and $\rvbx = \rvx^n$, respectively, where the vector length
1233: ($n$ here) is understood from the context. Subsequences, e.g.,
1234: $\svx_l, \svx_{l+1}, \ldots, \svx_{n}$ are denoted as $\svx_l^n$ where
1235: $\svx_i^j \defeq \emptyset$ if $i<j$. Distributions are indicated
1236: with lower-case $p$, e.g., $\rvx$ is distributed according to
1237: $p_{\rvx}(\svx)$. Sets and their elements are denoted as, e.g., $\svx
1238: \in \cX$, and their cardinality by $|\cX|$. We use calligraphic font
1239: to denote sets, $\mathcal{X}$, $\mathcal{F}$, $\mathcal{W}$ etc, and
1240: reserve $\mathcal{E}$ and $\mathcal{D}$ to denote encoding and
1241: decoding functions, respectively. We use standard notation for types,
1242: see, e.g., \cite{csiszarKorner}. Let $N(a; \svbx)$ denote the number
1243: of symbols in the length-$n$ vector $\svbx$ that take on value $a$.
1244: Then, $\svbx$ is of type $P$ if $P(a) = N(a; \svbx)/n$. The
1245: type-class, or set of length-$n$ vectors of type $P$ is denoted
1246: $\tclass_{P}$. A sequence $\svby$ has conditional type $V$ given
1247: $\svbx$ if $N(a,b; \svbx, \svby) = N(a; \svbx) V(b|a) = P(a) V(b|a)$
1248: for every $a, b$. The set of sequences $\svby$ having conditional type
1249: $V$ with respect to $\svbx$ is called the $V$-shell of $\svbx$ and is
1250: denoted by $\tclass_{V}(\svbx)$. When considered together, the pair
1251: $(\svbx, \svby)$ is said to have joint type $V \times P$. We always
1252: use upper-case, e.g., $P$ and $V$, to denote length-$n$ types and
1253: conditional types. As we often discuss the types of subsequences we
1254: add a superscript notation to remind the reader of the length of the
1255: subsequence in question. If, for instance, the subsequence under
1256: consideration is $\svx_{l}^n$ we write $\svx_{l}^n \in
1257: \tclass_{\PNL}$. Similarly we use $\VNL$ for the conditional type of
1258: length-$(n-l+1)$, and $\VNL \times \PNL$ for the joint type.
1259:
1260: Given a joint type $V \times P$, entropies and conditional entropies
1261: are denoted as $H(P)$ and $H(V|P)$, respectively. The KL divergence
1262: between two distributions $q$ and $p$ is denoted by $D(q \| p)$.
1263:
1264: \section{Main Results}\label{sec.mainresults}
1265:
1266: In this section, we begin by reviewing classical results on the error
1267: exponents of distributed block coding. We then present the main
1268: results of the paper: error exponents for streaming Slepian-Wolf
1269: coding and its special cases: point-to-point coding and source coding
1270: with decoder side information. We analyze both maximum likelihood and
1271: universal decoding and show that the achieved exponents are equal.
1272: Leaving numerical examples and proofs for later sections, we here
1273: compare the form of the streaming exponents with their block coding
1274: counterparts.
1275:
1276:
1277: %%%%%%%%%%%%%%%%%%%%%%%
1278: \subsection{Block source coding and error exponents}
1279:
1280: In the classic block-coding Slepian-Wolf paradigm, full length-$\BL$
1281: vectors $\rvbx$ and $\rvby$ are observed by their respective encoders
1282: before communication commences. In this situation a rate-$(\Rx, \Ry)$
1283: length-$\BL$ block source code consists of an encoder-decoder triplet
1284: $(\mathcal{E}^x_{\BL},\mathcal{E}^y_{\BL}, \mathcal{D}_{\BL})$, as we
1285: will define shortly. For the rate-region considerations, the general
1286: case of distributed encoders can be considered by using time-sharing
1287: among codes that alternate between sending at rates close to the
1288: marginal entropy and those that correspond to perfectly known
1289: side-information. However, it is easy to see that this results in a
1290: substantial loss of error-exponent even in the block-coding case. To
1291: get good exponents, something else is required:
1292:
1293: \begin{defn}\label{def.SWblockCode}
1294: A randomized length-$\BL$ rate-$(\Rx, \Ry)$ block encoder-decoder
1295: triplet $(\mathcal{E}^x_{\BL},\mathcal{E}^y_{\BL},\mathcal{D}_{\BL})$ is a
1296: set of maps
1297: %
1298: \begin{eqnarray*}
1299: \begin{array}{lclcl}
1300: \mathcal{E}^x_{\BL} &: & \mathcal{X}^{\BL} \rightarrow \{0,1\}^{ \BL R_x},
1301: & \mbox{e.g.,} & \mathcal{E}^x_{\BL}(x^{\BL})=a^{ \BL R_x}\\
1302: %
1303: \mathcal{E}^y_{\BL} &: & \mathcal{Y}^{\BL} \rightarrow \{0,1\}^{ \BL R_y},
1304: & \mbox{e.g.,} & \mathcal{E}^y_{\BL}(y^{\BL})=b^{ \BL R_y}\\
1305: %
1306: \mathcal{D}_{\BL} &: & \{0,1 \}^{ \BL R_x }\times \{0,1 \}^{ \BL R_y }
1307: \rightarrow \mathcal{X}^{n}\times \mathcal{Y}^{n}, & \mbox{e.g.,}
1308: &\mathcal{D}_{\BL}(a^{ \BL R_x }, b^{ \BL R_y })=(\hat{x}^{\BL},\hat{y}^{\BL})
1309: \end{array}
1310: \end{eqnarray*}
1311: %
1312: where common randomness, shared between the encoders and the decoder
1313: is assumed. This allows us to randomize the mappings independently of
1314: the source sequences.
1315: \end{defn}
1316:
1317: The error probability typically considered in Slepian-Wolf coding is
1318: the joint error probability, $\Pr[(\rvx^{\BL}, \rvy^{\BL})\neq
1319: (\hat{\rvx}^{\BL},\hat{\rvy}^{\BL})]=\Pr[(\rvx^{\BL},\rvy^{\BL})\neq
1320: \mathcal{D}_{\BL}(\mathcal{E}^x_{\BL}(\rvx^{\BL}),
1321: \mathcal{E}^y_{\BL}(\rvy^{\BL}))]$. This probability is taken over
1322: the random source vectors as well as the randomized mappings. An
1323: error exponent $E$ is said to be achievable if there exists a family
1324: of rate-$(\Rx, \Ry)$ encoders and decoders
1325: $\{(\mathcal{E}^x_{\BL},\mathcal{E}^y_{\BL},\mathcal{D}_{\BL})\}$,
1326: indexed by $\BL$,
1327: such that %\footnote{We use nats and $\log$ in this paper.}
1328: %
1329: \begin{equation}
1330: \lim_{\BL \rightarrow \infty}-\frac{1}{\BL}\log
1331: \Pr[(\rvx^{\BL}, \rvy^{\BL})\neq
1332: (\hat{\rvx}^{\BL},\hat{\rvy}^{\BL})] \geq E. \label{eq.SWblockErrExp}
1333: \end{equation}
1334:
1335: In this paper, we study random source vectors $(\rvbx, \rvby)$ that
1336: are iid across time but may have dependencies at any given time:
1337: \begin{equation*}
1338: p_{\rvx,\rvy}(\svbx,\svby)=\prod_{i=1}^{\BL}p_{\rvx,\rvy}(\svx_i,\svy_i).
1339: \end{equation*}
1340:
1341: For such iid sources, upper and lower bounds on the achievable error
1342: exponents are derived in~\cite{gallagerTech:76,csiszarKorner}. These
1343: results are summarized by the following theorem.
1344:
1345: \begin{thm}\label{THM.INTRO}
1346: (Lower bound) Given a rate pair $(\Rx, \Ry)$ such that $\Rx >
1347: H(\rvx|\rvy)$, $\Ry > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$.
1348: Then, for all
1349: \begin{equation}
1350: E < \min_{\rvxBar,\rvyBar} D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy})+ \big|
1351: \min[R_x+R_y-H(\rvxBar,\rvyBar), R_x-H(\rvxBar|\rvyBar),
1352: R_y-H(\rvyBar|\rvxBar) ]\big|^{+} \label{eq.SWblockLowBnd}
1353: \end{equation}
1354: there exists a family of randomized encoder-decoder mappings as
1355: defined in Definition~\ref{def.SWblockCode} such
1356: that~(\ref{eq.SWblockErrExp}) is satisfied.
1357: In~(\ref{eq.SWblockLowBnd}) the function $|z|^{+} = z$ if $z \geq 0$
1358: and $|z|^{+} = 0$ if $z < 0$.
1359:
1360: (Upper bound) Given a rate pair $(\Rx, \Ry)$ such that $\Rx >
1361: H(\rvx|\rvy)$, $\Ry > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$. Then,
1362: for all
1363: %
1364: \begin{equation}
1365: E >\min \left\{
1366: \min_{ \rvxBar,\rvyBar: R_x<H(\rvxBar|\rvyBar)}
1367: D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}) ,
1368: \min_{ \rvxBar,\rvyBar: R_y<H(\rvyBar|\rvxBar)}
1369: D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}),
1370: \min_{ \rvxBar,\rvyBar: R_x+R_y<H(\rvxBar,\rvyBar)}
1371: D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}) \right\}
1372: \label{eq.SWblockUpBnd}
1373: \end{equation}
1374: %
1375: there does not exists a randomized encoder-decoder mapping as defined
1376: in Definition~\ref{def.SWblockCode} such that~(\ref{eq.SWblockErrExp}) is
1377: satisfied.
1378:
1379: In both bounds $(\rvxBar,\rvyBar)$ are dummy random variables
1380: with joint distribution $p_{\rvxBar,\rvyBar}$.
1381: \end{thm}
1382:
1383:
1384: {\em Remark:} As long as $(R_x,R_y)$ is in the interior of the
1385: achievable region, i.e., $R_x> H(\rvx|\rvy)$, $R_y> H(\rvy|\rvx)$ and
1386: $R_x+R_y> H(\rvx, \rvy)$ then the lower-bound~(\ref{eq.SWblockLowBnd})
1387: is positive. The achievable region is illustrated in
1388: Fig~\ref{fig.SW_region_intro}. As shown in \cite{csiszarKorner},
1389: the upper and lower bounds~(\ref{eq.SWblockUpBnd})
1390: and~(\ref{eq.SWblockLowBnd}) match when the rate pair $(R_x,R_y)$ is
1391: achievable and close to the boundary of the region. This is analogous
1392: to the high rate regime in channel coding where the random coding
1393: bound (analogous to~(\ref{eq.SWblockLowBnd})) and the sphere packing
1394: bound (analogous to~(\ref{eq.SWblockUpBnd})) agree.
1395:
1396: Theorem~\ref{THM.INTRO} can also be used to generate bounds on the
1397: exponent for source coding with decoder side information (i.e.,
1398: $\rvby$ observed at the decoder), and for source coding without side
1399: information (i.e., $\rvby$ is a constant). These corollaries will
1400: prove useful as a basis for comparison as we build up to the complete
1401: solution for streaming Slepian-Wolf coding.
1402:
1403:
1404: \begin{corol}\label{thm.blockSI}
1405: (Source coding with decoder side information) Consider a
1406: Slepian-Wolf problem where $\rvby$ is known by the decoder. Given a
1407: rate $\Rx$ such that $\Rx > H(\rvx|\rvy)$, then for all
1408: %
1409: \begin{equation}
1410: E < \min_{\rvxBar,\rvyBar} D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}) +
1411: |R_x-H(\rvxBar|\rvyBar)|^{+}, \label{eq.SIblockLowBnd}
1412: \end{equation}
1413: %
1414: there exists a family of randomized encoder-decoder mappings as
1415: defined in Definition~\ref{def.SWblockCode} such
1416: that~(\ref{eq.SWblockErrExp}) is satisfied.
1417: \end{corol}
1418:
1419: The proof of Corollary~\ref{thm.blockSI} follows from
1420: Theorem~\ref{THM.INTRO} by letting $\Ry$ be arbitrarily large.
1421: Similarly, by letting $\rvby$ be deterministic so that $H(\rvx|\rvy)
1422: = H(\rvx)$ and $H(\rvy) = 0$, we get the following random-coding
1423: bound for the point-to-point case of a single source $\rvbx$.
1424:
1425: \begin{corol}\label{thm.blockEnt} (point-to-point)
1426: Consider a Slepian-Wolf problem where $\rvby$ is deterministic,
1427: i.e., $\rvby = \svby$. Given a rate $\Rx$ such that $\Rx >
1428: H(\rvx)$, for all
1429: %
1430: \begin{equation}
1431: E < \min_{\rvxBar} D(p_{\rvxBar}\|p_{\rvx})+ |R_x-H(\rvxBar)|^{+}
1432: =E_x(R_x) \label{eq.EntblockLowBnd}
1433: \end{equation}
1434: %
1435: there exists a family of randomized encoder-decoder triplet as defined
1436: in Definition~\ref{def.SWblockCode} such that~(\ref{eq.SWblockErrExp}) is
1437: satisfied.
1438: \end{corol}
1439:
1440:
1441:
1442: \setlength{\unitlength}{1mm}
1443: \begin{figure}[htbp]
1444: \begin{center}
1445: \leavevmode
1446: \begin{picture}(130,80)
1447:
1448: \put(40, 10){\vector(1,0){55}}
1449: \put(40, 10){\vector(0,1){55}}
1450:
1451: \put(40,70){$R_y$} \put(100,10){$R_x$}
1452:
1453: \put(28,40){$H(\rvy)$} \put(28,31){$H(\rvy| \rvx)$}
1454:
1455: \put(68,5){$H(\rvx)$} \put(56, 5){$H(\rvx| \rvy)$}
1456:
1457: \put(28,60){$\log|\mathcal{Y}|$} \put(87,5){$\log|\mathcal{X}|$}
1458:
1459:
1460: \put(90,60){\line(0,-1){29}} \put(90,31){\line(-1,0){20}}
1461: \put(70,31){\line(-1, 1){9}}
1462: \put(90,60){\line(-1,0){29}} \put(61,60){\line(0,-1){20}}
1463:
1464: \put(64, 52){Achievable} \put(66, 48){Region}
1465:
1466: \put(47,
1467: 25){$R_x+R_y=H(\rvx,\rvy)$} \put(60, 28){\vector(1,1){6.5}}
1468:
1469: \end{picture}
1470: \caption{ Achievable region for Slepian-Wolf source coding }
1471: \label{fig.SW_region_intro}
1472: \end{center}
1473: \end{figure}
1474:
1475: \subsection{Sequential Distributed Source Coding}
1476:
1477: We now state our main results for streaming encoding, and contrast
1478: them with the block-coding results of the last section. To begin, we
1479: define a streaming encoder.
1480:
1481: \begin{defn}
1482: \label{def.seqn_coding}
1483: A randomized sequential encoder-decoder triplet
1484: $\mathcal{E}^x,\mathcal{E}^y,\mathcal{D}$ is a sequence of mappings,
1485: $\{\mathcal{E}^x_j\},j=1,2,...$, $\{\mathcal{E}^y_j\},j=1,2,...$ and
1486: $\{\mathcal{D}_j\},j=1,2,...$:
1487:
1488: \begin{equation}
1489: \begin{array}{lclcl}
1490: \mathcal{E}^x_j & : & \mathcal{X}^{j} \longrightarrow
1491: \{0,1\}^{ R_x }, & \mbox{e.g.,} & \mathcal{E}^x_j(x^j)=a_{ (j-1)R_x +1}^{
1492: jR_x }, \\
1493: %
1494: \mathcal{E}^y_j & : & \mathcal{Y}^{j}
1495: \longrightarrow \{0,1\}^{ R_y }, & \mbox{e.g.,} & \mathcal{E}^y_j(y^j)=b_{
1496: (j-1)R_y +1}^{ jR_y }.
1497: \end{array}
1498: \label{eq.xEnc}
1499: \end{equation}
1500: %
1501: Common randomness, shared between encoders and decoder, is assumed.
1502: This allows us to randomize the mappings independently of the source
1503: sequence.
1504: \end{defn}
1505:
1506: In this paper, the sequential encoding maps will always work by
1507: assigning random ``parity bits'' in a causal manner to the observed
1508: source sequence. That is, the $\Rx$ (or $\Ry$) bits generated at each
1509: time in~(\ref{eq.xEnc}), are iid Bernoulli-$(0.5)$.\footnote{We assume
1510: that $\Rx$ and $\Ry$ are integer. To justify this assumption note
1511: that we can always group sets of $\alpha$ successive symbols into
1512: super-symbols. These larger symbols can be encoded at an average
1513: rate $\alpha \Rent$. Generally, if we group $\alpha$ symbols
1514: together, and transmit $\beta$ bits per super-symbol, we can realize
1515: an average rate $\alpha/ \beta$, i.e., a rational rate. If desired,
1516: non-integer average rates are easily implemented by a time-varying
1517: transmission rate. For example, say we want to implement an average
1518: encoding rate of $5/4$ bits per source symbol. Say we generate one
1519: new parity bit per symbol for each symbol observed except for the
1520: fourth symbol, eighth symbol, etc, when we generate two. The
1521: average encoding rate is $5/4$. As long as the decoding delay
1522: $\delay$ we target is long enough so that the decoder received an
1523: ``average'' number of encoded bits -- $\delta \Rent$ -- before we
1524: must make an estimate (e.g., if $\delay \gg 1 / \Rent$), these
1525: small-scale issues even out. In particular, they do not effect the
1526: exponents.} Since parity bits are assigned causally, if two source
1527: sequences share the same length-$l$ prefix, then their first $l
1528: {\Rent}$ parity bits must match. Subsequent parities are drawn
1529: independently. Such a sequential coding strategy is the source-coding
1530: parallel to tree and convolutional codes used for channel coding
1531: \cite{Forney:74}. In fact, we call these ``parity bits'' as they can
1532: be generated using an infinite constraint-length time-varying random
1533: convolutional code.
1534:
1535: \begin{defn}
1536: The decoder mapping
1537: %
1538: \begin{eqnarray}
1539: &&\mathcal{D}_j: \{0,1 \}^{ jR_x }\times\{0,1 \}^{ jR_y }
1540: \longrightarrow \mathcal{X}^j \times \mathcal{Y}^j \nonumber\\
1541: %
1542: &&\mathcal{D}_j(a^{ jR_x },b^{ jR_y
1543: })=(\svxhat_{1}^{j}(j),\svyhat_{1}^{j}(j))\nonumber
1544: \end{eqnarray}
1545: %
1546: At each time $j$ the decoder $\mathcal{D}_j$ outputs estimates of all
1547: the source symbols that have entered the encoder by time $j$.
1548: \end{defn}
1549:
1550: {\em Remark:} While we state Definition~\ref{def.seqn_coding} only for
1551: Slepian-Wolf coding, it immediately specializes to source coding with
1552: decoder side information (dropping the $\mathcal{E}_y$ and revealing
1553: $\rvy^n$ to the decoder), and source coding without side information
1554: (dropping the $\mathcal{E}_y$). We present results for both these
1555: situations as well.
1556:
1557: In this paper we study two error probabilities. We define the pair of
1558: source estimates at time $n$ as $(\hat{\rvx}^n, \hat{\rvy}^n) =
1559: \mathcal{D}_n(\prod_{j=1}^n \mathcal{E}^x_j, \prod_{j=1}^n
1560: \mathcal{E}^y_j)$, where $\prod_{j=1}^n \mathcal{E}^x_j$ indicates the
1561: full $n \Rx$ bit stream from encoder $x$ up to time $n$. We use
1562: $(\hat{\rvx}^{n - \delay}, \hat{\rvy}^{n - \delay})$ to indicate the
1563: first $n - \delay$ symbols of each estimate, where for conciseness of
1564: notation both the estimate time, $n$, and the decoding delay,
1565: $\delay$, are indicated in the superscript. With these definitions
1566: the two error probabilities we study are
1567: %
1568: \begin{align}
1569: \Pr[\rvxhat^{n - \delay} \neq \rvx^{n - \delay}] \;\; \mbox{and} \;\;
1570: \Pr[\rvyhat^{n - \delay} \neq \rvy^{n - \delay}]. \nonumber
1571: \end{align}
1572: %
1573: A pair of exponents $E_x > 0$ and $E_y > 0$ is said to be achievable
1574: if there exists a family of rate-$(\Rx, \Ry)$ encoders and decoders
1575: $\{(\mathcal{E}_j^x, \mathcal{E}_j^y, \mathcal{D}_j)\}$ such that
1576: %
1577: \begin{align}
1578: \lim_{\delay \rightarrow \infty} \lim_{n \rightarrow \infty}
1579: - \frac{1}{\delay} \log \Pr[\hat{\rvx}^{n - \delay} \neq \rvx^{n - \delay}]
1580: &\geq E_x \label{eq.errExpX}\\
1581: %
1582: \lim_{\delay \rightarrow \infty} \lim_{n \rightarrow \infty}
1583: - \frac{1}{\delay} \log \Pr[\hat{\rvy}^{n - \delay} \neq \rvy^{n - \delay}]
1584: &\geq E_y \label{eq.errExpY}
1585: \end{align}
1586:
1587: {\em Remarks:} In contrast to~(\ref{eq.SWblockErrExp}) the error
1588: exponent we look at is in the delay, $\delay$, rather than total
1589: observation time, $n$. The order of the limits is important since the
1590: total time-period $n$ is allowed to go to infinity faster than the
1591: delay $\delay$. While the definitions
1592: of~(\ref{eq.errExpX})--(\ref{eq.errExpY}) and
1593: of~(\ref{eq.SWblockErrExp}) are asymptotic in nature, the results hold
1594: for finite block-lengths and delays as well. Finally, we note that
1595: while in~(\ref{eq.SWblockErrExp}) the error exponent of a joint error
1596: event on either $\rvbx$ or $\rvby$ is considered, we provide a refined
1597: analysis specifying potentially different exponents on either
1598: decision. The results for joint errors are found by taking the
1599: minimum of the individual exponents, i.e.,
1600: %
1601: \begin{equation*}
1602: \lim_{\delay \rightarrow \infty} \lim_{n \rightarrow \infty}
1603: - \frac{1}{\delay} \log \Pr[(\hat{\rvx}^{n-\delay}, \hat{\rvy}^{n - \delay})
1604: \neq (\rvx^{n-\delay}, \rvy^{n - \delay})] \geq
1605: \min\{E_x, E_y\}.
1606: \end{equation*}
1607:
1608:
1609: \subsection{Streaming source coding}
1610:
1611: Our first results concern streaming coding in the point-to-point
1612: setting. The first theorem we state gives random coding error
1613: exponents for maximum likelihood decoding where the source statistics
1614: are known, and the second exponents for universal decoding, where they
1615: are not.
1616: %
1617: \begin{thm} \label{thm.entCodeML}
1618: Given a rate $\Rent > H(\PxRV)$, there exists a randomized streaming
1619: encoder and maximum likelihood decoder pair (per
1620: Definition~\ref{def.seqn_coding}) such that for all $E < \expML(\Rent)$
1621: there is a constant $K > 0$ such that $\Pr[\rvxhat^{n - \delay} \neq
1622: \rvx^{n - \delay}] \leq K \exp\{- \delay \expML(\Rent)\}$ for all
1623: $n, \delay \geq 0$ where
1624: %
1625: \begin{equation}
1626: \expML(\Rent) = \sup_{0 \leq \rho \leq 1} \rho \Rent - (1 + \rho) \log
1627: \left( \sum_{\svx} \PxRV(\svx)^{\frac{1}{1 + \rho}} \right).
1628: \label{eq.errExpML}
1629: \end{equation}
1630: \end{thm}
1631:
1632:
1633: \begin{thm} \label{thm.entCodeUniv}
1634: Given a rate $\Rent > H(\PxRV)$, there exists a randomized streaming
1635: encoder and universal decoder pair (per Definition~\ref{def.seqn_coding})
1636: such that for all $E < \expUniv(\Rent)$ there is a constant $K > 0$
1637: such that $\Pr[\rvxhat^{n - \delay} \neq \rvx^{n - \delay}] \leq K
1638: \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where
1639: %
1640: \begin{equation}
1641: \expUniv(\Rent) = \inf_q D(q \| \PxRV) + |\Rent - H(q)|^{+},
1642: \label{eq.errExpUniv}
1643: \end{equation}
1644: %
1645: where $q$ is an arbitrary probability distribution on $\cX$ and where
1646: $|z|^{+} = z$ if $z \geq 0$ and $|z|^{+} = 0$ if $z < 0$.
1647: \end{thm}
1648:
1649: {\em Remark:} The error exponents of Theorems~\ref{thm.entCodeML}
1650: and~\ref{thm.entCodeUniv} both equal their respective random
1651: block-coding exponents for ML and universal decoders.
1652: For example, compare~(\ref{eq.errExpUniv})
1653: with~(\ref{eq.EntblockLowBnd}). The main difference in the
1654: formulation is that the error probability decays with delay $\delay$
1655: rather than block length $\BL$. Furthermore, it is known
1656: that~(\ref{eq.errExpML}) and~(\ref{eq.errExpUniv}) are equal --- see
1657: \cite{csiszarKorner} exercise $13$ on page $44$. Such equality is
1658: required by the formal definition of a universal scheme, i.e., for the
1659: same source statistics and coding rates, the universal decoder should
1660: asymptotically achieve the same error exponent as the maximum
1661: likelihood decoder. See~\cite{lapidothNarayan:98} for a detailed
1662: discussion of universal versus maximum likelihood decoding in the
1663: context of channel coding.
1664:
1665:
1666: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1667: \subsection{Streaming distributed source coding with decoder side information}
1668:
1669: This section summarizes our results for distributed streaming source
1670: coding when the side information is observed at the decoder, but not
1671: the encoder:
1672:
1673: \begin{thm} \label{thm.decSIML}
1674: Given a rate $\Rent > H(\rvx|\rvy)$, there exists a randomized
1675: encoder decoder pair (per Definition~\ref{def.seqn_coding}) such that for
1676: all $E < \expMLSI(\Rent)$ there is a constant $K > 0$ such that
1677: $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K \exp\{- \delay
1678: E\}$ for all $n, \delay \geq 0$ where
1679: %
1680: \begin{equation}
1681: \expMLSI(\Rent) = \sup_{0 \leq \rho \leq 1} \rho \Rx - \log \Big[
1682: \sum_{\svy} \Big[ \sum_{\svx}
1683: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1+\rho} \Big].
1684: \label{eq.errExpMLSI}
1685: \end{equation}
1686: \end{thm}
1687:
1688: \begin{thm} \label{thm.decSIUniv}
1689: Given a rate $\Rent > H(\rvx|\rvy)$, there exists a randomized
1690: encoder decoder pair (per Definition~\ref{def.seqn_coding} ) such that for
1691: all $E < \expUnivSI(\Rent)$ there is a constant $K > 0$ such that
1692: $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K \exp\{- \delay
1693: E\}$ for all $n, \delay \geq 0$ where
1694: %
1695: \begin{equation}
1696: \expUnivSI(\Rent)
1697: =\inf_{\rvxtil, \rvytil} D(p_{\rvxtil, \rvytil} \| \PxyRV) +
1698: |\Rent - H(\rvxtil | \rvytil)|^{+}, \label{eq.errExpUnivSI}
1699: \end{equation}
1700: %
1701: and $(\rvxtil, \rvytil)$ are random variables with joint distribution
1702: $p_{\rvxtil, \rvytil}$, $H(\rvxtil | \rvytil)$ is their conditional
1703: entropy, and where $|z|^{+} = z$ if $z \geq 0$ and $|z|^{+} = 0$ if $z
1704: < 0$.
1705: \end{thm}
1706:
1707:
1708: {\em Remark:} Similar to the point-to-point case, the error exponents
1709: of Theorems~\ref{thm.decSIML} and~\ref{thm.decSIUniv} both equal their
1710: respective random block-coding exponents. For example,
1711: compare~(\ref{eq.errExpUnivSI}) with~(\ref{eq.SIblockLowBnd}).
1712: Similarly, (\ref{eq.errExpMLSI}) and~(\ref{eq.errExpUnivSI}) can be
1713: shown to be equal.
1714:
1715:
1716: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1717: \subsection{Streaming Slepian-Wolf coding}
1718:
1719: In contrast to streaming point-to-point coding and streaming source
1720: coding with decoder side information, the general case of streaming
1721: Slepian-Wolf coding with two distributed encoders results in error
1722: exponents that differ from their block coding counterparts. In the
1723: streaming setting, fundamentally different error events dominate as
1724: compared to the block setting.
1725:
1726:
1727: \begin{thm} \label{thm.jointCodeML}
1728:
1729: Let $(\Rx, \Ry)$ be a rate pair such that $\Rx > H(\rvx|\rvy)$, $\Ry
1730: > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$. Then, there exists a
1731: randomized encoder pair and maximum likelihood decoder triplet (per
1732: Definition~\ref{def.seqn_coding}) that satisfies the following three
1733: decoding criteria.
1734:
1735: (i) For all $E < E_{ML,SW,x}(\Rx, \Ry)$, there is a constant $K > 0$
1736: such that
1737: $\Pr[ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K
1738: \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where
1739: %
1740: \begin{equation}
1741: E_{ML,SW, x}(\Rx, \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}
1742: E_x^{ML}(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}
1743: \frac{1}{1-\gamma} E_y^{ML}(\Rx, \Ry, \gamma) \Bigg\}.\nonumber
1744: \end{equation}
1745:
1746:
1747: (ii) For all $E < E_{ML,SW,y}(\Rx, \Ry)$ there is a constant $K > 0$
1748: such that $\Pr[\rvyhat^{n-\delay} \neq \rvy^{n-\delay}] \ \leq K
1749: \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where
1750: %
1751: \begin{equation}
1752: E_{ML,SW, y}(\Rx, \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}
1753: \frac{1}{1-\gamma} E_x^{ML}(\Rx, \Ry, \gamma), \inf_{\gamma \in
1754: [0,1]} E_y^{ML}(\Rx, \Ry, \gamma) \Bigg\}.\nonumber
1755: \end{equation}
1756:
1757: (iii) For all $E < E_{ML,SW,xy}(\Rx, \Ry)$ there is a constant $K >
1758: 0$ such that $\Pr[(\rvxhat^{n-\delay}, \rvyhat^{n-\delay}) \neq
1759: (\rvx^{n-\delay}, \rvy^{n-\delay})] \ \leq K \exp\{- \delay E\}$ for
1760: all $n, \delay \geq 0$ where
1761: %
1762: \begin{equation}
1763: E_{ML,SW, xy}(\Rx, \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}
1764: E_x^{ML}(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]} E_y^{ML}(\Rx,
1765: \Ry, \gamma) \Bigg\}.\nonumber
1766: \end{equation}
1767:
1768:
1769: In definitions (i)--(iii),
1770: %
1771: \begin{equation}
1772: \begin{array}{lll}
1773: E_x^{ML}(\Rx, \Ry, \gamma) & = & \sup_{\rho \in [0,1]} [ \gamma
1774: E_{x|y}(\Rx, \rho) + (1-\gamma) E_{xy}(\Rx, \Ry, \rho)]
1775: \vspace{1ex} \\
1776: %
1777: E_y^{ML}(\Rx, \Ry, \gamma) & = & \sup_{\rho \in [0,1]} [ \gamma
1778: E_{y|x}(\Rx, \rho) + (1-\gamma) E_{xy}(\Rx, \Ry, \rho)]
1779: \end{array} \label{eq.compoundExp}
1780: \end{equation}
1781: %
1782: and
1783: %
1784: \begin{equation}
1785: \begin{array}{lll}
1786: E_{xy}(\Rx, \Ry, \rho) & = & \rho (\Rx + \Ry) - \log \Big[ \sum_{\svx, \svy}
1787: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1 + \rho} \vspace{1ex}\\
1788: %
1789: E_{x|y}(\Rx, \rho) & = & \rho \Rx - \log \Big[ \sum_{\svy}
1790: \Big[ \sum_{\svx}
1791: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1+\rho} \Big]\vspace{1ex}\\
1792: %
1793: E_{y|x}(\Ry, \rho) & = & \rho \Ry - \log \Big[ \sum_{\svx} \Big[
1794: \sum_{\svy} p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}}
1795: \Big]^{1+\rho} \Big] \vspace{1ex}
1796: \end{array}\label{eq.defBasicExp}
1797: \end{equation}
1798: %
1799: \end{thm}
1800:
1801: \begin{thm} \label{thm.jointCode}
1802:
1803: Let $(\Rx, \Ry)$ be a rate pair such that $\Rx > H(\rvx|\rvy)$, $\Ry
1804: > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$. Then, there exists a
1805: randomized encoder pair and universal decoder triplet (per
1806: Definition~\ref{def.seqn_coding}) that satisfies the following three
1807: decoding criteria.
1808:
1809: (i) For all $E < E_{UN,SW,x}(\Rx, \Ry)$, there is a constant $K >
1810: 0$ such that $\Pr[ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K
1811: \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where
1812: %
1813: \begin{equation}
1814: E_{UN,SW, x}(\Rx, \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}
1815: E_x^{UN}(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}
1816: \frac{1}{1-\gamma} E_y^{UN}(\Rx, \Ry, \gamma) \Bigg\}.
1817: \end{equation}
1818:
1819:
1820: (ii) For all $E < E_{UN,SW,y}(\Rx, \Ry)$, there is a constant $K >
1821: 0$ such that $\Pr[ \rvyhat^{n-\delay} \neq \rvy^{n-\delay}] \leq K
1822: \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where
1823: %
1824: \begin{equation}
1825: E_{UN,SW, y}(\Rx, \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}
1826: \frac{1}{1-\gamma} E_x^{UN}(\Rx, \Ry, \gamma), \inf_{\gamma \in
1827: [0,1]} E_y^{UN}(\Rx, \Ry, \gamma) \Bigg\}.
1828: \end{equation}
1829:
1830: (iii) For all $E < E_{UN,SW,xy}(\Rx, \Ry)$, there is a constant $K >
1831: 0$ such that $\Pr[ (\rvxhat^{n-\delay}, \rvxhat^{n-\delay}) \neq
1832: (\rvx^{n-\delay}, \rvy^{n-\delay})] \leq K \exp\{- \delay E\}$ for
1833: all $n, \delay \geq 0$ where
1834: %
1835: \begin{equation}
1836: E_{UN,SW, xy}(\Rx, \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}
1837: E_x^{UN}(\Rx, \Ry, \gamma), \inf_{\gamma \in
1838: [0,1]} E_y^{UN}(\Rx, \Ry, \gamma) \Bigg\}.
1839: \label{eq.expSWUnivJoint}
1840: \end{equation}
1841:
1842: In definitions (i)--(iii),
1843: %%
1844: \begin{align}
1845: & E_{x}^{UN}(\Rx, \Ry, \gamma) \nonumber \\
1846: & = \inf_{\tiny \rvxtil, \rvytil,
1847: \rvxBar, \rvyBar} \gamma D(p_{\rvxtil, \rvytil} \| \PxyRV) +
1848: (1-\gamma) D(p_{\rvxBar, \rvyBar} \| \PxyRV)
1849: %
1850: + \left|\gamma [\Rx - H(\rvxtil | \rvytil )]
1851: + (1-\gamma) [\Rx + \Ry - H(\rvxBar, \rvyBar)]\right|^{+}
1852: \nonumber \\
1853: %
1854: & E_{y}^{UN}(\Rx, \Ry, \gamma) \nonumber \\
1855: & = \inf_{\tiny \rvxtil, \rvytil,
1856: \rvxBar, \rvyBar} \gamma D(p_{\rvxtil, \rvytil}\| \PxyRV) +
1857: (1-\gamma) D(p_{\rvxBar, \rvyBar} \| \PxyRV)
1858: %
1859: + \left|\gamma [\Ry - H(\rvytil|\rvxtil)]
1860: + (1-\gamma) [\Rx + \Ry - H(\rvxBar, \rvyBar)]\right|^{+}
1861: \end{align}
1862: %
1863: where the random variables $(\rvxtil, \rvytil)$ and $(\rvxBar,
1864: \rvyBar)$ have joint distributions $p_{\rvxtil, \rvytil}$ and
1865: $p_{\rvxBar, \rvyBar}$, respectively. The function $|z|^{+} = z$ if
1866: $z \geq 0$ and $|z|^{+} = 0$ if $z < 0$.
1867: \end{thm}
1868:
1869:
1870: {\em Remark:} Definitions (i) and (ii) in
1871: Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode} concern
1872: individual decoding error events which might be useful in applications
1873: where the $\rvbx$ and $\rvby$ streams are decoded jointly, but
1874: utilized individually. The more standard joint error event is given
1875: by (iii).
1876:
1877: {\em Remark:} We can compare the joint error event for block and
1878: streaming Slepian-Wolf coding, c.f.~(\ref{eq.expSWUnivJoint})
1879: with~(\ref{eq.SWblockLowBnd}). The streaming exponent differs by the
1880: extra parameter $\gamma$ that must be minimized over. If the
1881: minimizing $\gamma = 1$, then the block and streaming exponents are
1882: the same. The minimization over $\gamma$ results from a fundamental
1883: difference in the types of error-causing events that can occur in
1884: streaming Slepian-Wolf as compared to block Slepian-Wolf.
1885:
1886: {\em Remark:} The error exponents of maximum likelihood and universal
1887: decoding in Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode} are
1888: the same. However, because there are new classes of error events
1889: possible in streaming, this needs proof. The equivalence is
1890: summarized in the following theorem.
1891:
1892: \begin{thm} \label{THM:Universal_ML_SW}
1893: Let $(\Rx, \Rx)$ be a rate pair such that $\Rx > H(\rvx|\rvy)$, $\Ry
1894: > H(\rvy|\rvx)$, and $\Rx + \Ry > H(\rvx, \rvy)$. Then,
1895: %
1896: \begin{equation}
1897: E_{ML, SW, x}(\Rx, \Ry) = E_{UN, SW, x}(\Rx, \Ry),
1898: \end{equation}
1899: %
1900: and
1901: %
1902: \begin{equation}
1903: E_{ML, SW, x}(\Rx, \Ry) = E_{UN, SW, x}(\Rx, \Ry).
1904: \end{equation}
1905: \end{thm}
1906:
1907: Theorem~\ref{THM:Universal_ML_SW} follows directly from the
1908: following lemma, shown in the appendix.
1909: %
1910: \begin{lemma}
1911: For all $\gamma \in [0,1]$
1912: %
1913: \begin{equation}
1914: E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma),
1915: \end{equation}
1916: %
1917: and
1918: %
1919: \begin{equation}
1920: E^{ML}_y(R_x,R_y,\gamma)=E^{UN}_y(R_x,R_y,\gamma).
1921: \end{equation}.
1922: \end{lemma}
1923:
1924: {\em Remark:} This theorem allows us to simplify notation. For
1925: example, we can define $E_x(R_x,R_y,\gamma)$ as
1926: $E_x(R_x,R_y,\gamma)=E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$,
1927: and can similarly define $E_y(R_x,R_y,\gamma)$. Further, since the
1928: ML and universal exponents are the same for the whole rate region we
1929: can define $E_{SW,x}(\Rx, \Ry)$ as $E_{SW,x}(\Rx, \Ry) =
1930: E_{ML,SW,x}(\Rx, \Ry) = E_{UN,SW,x}(\Rx, \Ry)$, and can similarly
1931: define $E_{SW,y}(R_x,R_y)$.
1932:
1933:
1934: \section{Numerical Results}\label{sec.numerical} To build insight
1935: into the differences between the sequential error exponents of Theorem
1936: \ref{thm.entCodeML} - \ref{THM:Universal_ML_SW} and block-coding error
1937: exponents, we give some examples of the exponents for binary sources.
1938:
1939: For the point-to-point case, the error exponents of random sequential
1940: and block source coding are identical everywhere in the achievable
1941: rate region as can be seen by comparing Theorem~\ref{thm.entCodeUniv}
1942: and Corollary~\ref{thm.blockEnt}. The same is true for source coding
1943: with decoder side information (cf.~Theorem~\ref{thm.decSIUniv} and
1944: Corollary~\ref{thm.blockSI}). For distributed Slepian-Wolf source
1945: coding however, the sequential and block error exponents can be
1946: different. The reason for the discrepancy is that a new type of error
1947: event can be dominant in Slepian-Wolf source coding. This is reflected
1948: in Theorem~\ref{thm.jointCodeML} by the minimization over $\gamma$.
1949: Example $2$ illustrates the impact of this $\gamma$ term.
1950:
1951: For Slepian-Wolf source coding at very high rates, where $\Rx >
1952: H(\rvx)$, the decoder can ignore any information from encoder $y$ and
1953: still decode $x$ with with a positive error exponent. However, the
1954: decoder could also choose to decode source $x$ and $y$ jointly.
1955: Fig~\ref{fig.numerical1}.a and \ref{fig.numerical1}.b illustrate that
1956: joint decoding may or surprisingly {\em may not} help decoding source
1957: $x$. This is seen by comparing the error exponent when the decoder
1958: ignores the side information from encoder $y$ (the dotted curves) to
1959: the joint error exponent (the lower solid curves). It seems that when
1960: the rate for source $y$ is low, atypical behaviors of source $y$ can
1961: cause joint decoding errors that end up corrupting $x$ estimates.
1962: This holds for both block and sequential coding.
1963:
1964:
1965: \subsection{Example 1: symmetric source with uniform marginals}
1966:
1967:
1968: \begin{figure}
1969: \begin{center}
1970: \begin{picture}(100,70)
1971: \put(40, 10){\vector(1,0){55}} \put(40, 10){\vector(0,1){55}}
1972: \put(40,70){$\Ry$} \put(97,10){$\Rx$}
1973: %\put(29,31){$H(\rvy| \rvx)$}
1974: %\put(56, 5){$H(\rvx| \rvy)$} \put(30,60){$\log(2)$}
1975: %\put(87,5){$\log(2)$}
1976: \put(90,60){\line(0,-1){27}} \put(90,33){\line(-1,1){27}}
1977: \put(90,60){\line(-1,0){27}} \multiput(40,45)(2,0){25}{$.$}
1978: \put(30,45 ){$0.49$ } \multiput(40,59)(2,0){25}{$.$} \put(30,59
1979: ){$0.67$ } \put(72, 52){Achievable } \put(78, 48){Region } \put(57,
1980: 35){$\Rx+\Ry=H(\rvx,\rvy)$} \put(72, 37){\vector(1,1){6.5}}
1981: \end{picture}
1982: \caption{Rate region for the example 1 source, we focus on the error
1983: exponent on source $x$ for fixed encoder $y$ rates: $R_y=0.49$
1984: and $R_y=0.67$ } \label{fig.SWregion1}
1985: \end{center}
1986: \end{figure}
1987:
1988: Consider a symmetric source where $|\mathcal{X}|=|\mathcal{Y}|=2$,
1989: $p_{\rvx\rvy}(0,0)=0.45$, $p_{\rvx\rvy}(0,1)= p_{\rvx\rvy}(1,0)=0.05$
1990: and $p_{\rvx\rvy}(1,1)=0.45$. This is a marginally-uniform source:
1991: $\rvx$ is Bernoulli(1/2), $\rvy$ is the output from a BSC with input
1992: $\rvx$, thus $\rvy$ is Bernoulli(1/2) as well. For this source
1993: $H(\rvx)=H(\rvy)=\log(2)$, $H(\rvx|\rvy)=H(\rvy|\rvx)=0.32$,
1994: $H(\rvx,\rvy)=1.02$. The achievable rate region is the triangle shown
1995: in Figure(\ref{fig.SWregion1}).
1996:
1997:
1998: For this source, as will be shown later, the dominant sequential
1999: error event is on the diagonal line in Fig~\ref{fig.twoD2}. This is
2000: to say that:
2001: %
2002: \begin{equation}
2003: E_{SW,x}(\Rx, \Ry)= E_{SW,x}^{BLOCK}(\Rx, \Ry)= E^{ML}_x(\Rx, \Ry,
2004: 0) = \sup_{\rho \in [0,1]} [ E_{xy}(\Rx, \Ry, \rho)].
2005: \end{equation}
2006:
2007: Where $E_{SW,x}^{BLOCK}(\Rx, \Ry)=\min\{E^{ML}_x(\Rx, \Ry,
2008: 0),E^{ML}_x(\Rx, \Ry, 1)\} $ as shown in \cite{gallagerTech:76}.
2009:
2010: Similarly for source $y$:
2011: %
2012: \begin{equation}
2013: E_{SW,y}(\Rx, \Ry)= E_{SW,y}^{BLOCK}(\Rx, \Ry)= E^{ML}_y(\Rx, \Ry,
2014: 0) = \sup_{\rho \in [0,1]} [ E_{xy}(\Rx, \Ry, \rho)].
2015: \end{equation}
2016:
2017: We first show that for this source $\forall \rho\geq 0$, $
2018: E_{x|y}(\Rx, \rho) \geq E_{xy}(\Rx, \Ry, \rho)$. By definition:
2019: %
2020: \begin{eqnarray}
2021: E_{x|y}(\Rx, \rho)- E_{xy}(\Rx, \Ry, \rho) & = & \rho \Rx - \log
2022: \Big[ \sum_{\svy} \Big[ \sum_{\svx}
2023: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1+\rho}
2024: \Big]\nonumber\\
2025: %
2026: %
2027: %
2028: &&-\Big(\rho (\Rx + \Ry) - \log \Big[ \sum_{\svx, \svy}
2029: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1 + \rho}
2030: \Big)\nonumber\\
2031: %
2032: %
2033: & = & -\rho \Ry - \log \Big[ 2 \Big[ \sum_{\svx}
2034: p_{\rvx\rvy}(\svx,0)^{\frac{1}{1+\rho}} \Big]^{1+\rho} \Big] + \log
2035: \Big[ 2\sum_{\svx }
2036: p_{\rvx\rvy}(\svx,0)^{\frac{1}{1+\rho}} \Big]^{1 + \rho} \nonumber\\
2037: %
2038: %
2039: %
2040: & = & -\rho \Ry - \log \Big[ 2 \Big] + \log \Big[ 2 \Big]^{1 + \rho} \nonumber\\
2041: %
2042: %
2043: & =& \rho (\log [2] -\Ry)\nonumber\\
2044: & \geq & 0\nonumber
2045: \end{eqnarray}
2046:
2047: The last inequality is true because we only consider the problem when
2048: $\Ry \leq \log|\mathcal{Y}|$. Otherwise, $y$ is better viewed as
2049: perfectly known side-information. Now
2050:
2051: \begin{eqnarray}
2052: E^{ML}_x(\Rx, \Ry, \gamma) &=& \sup_{\rho \in [0,1]} [ \gamma
2053: E_{x|y}(\Rx, \rho) + (1-\gamma) E_{xy}(\Rx, \Ry, \rho)]\nonumber\\
2054: %
2055: &\geq & \sup_{\rho \in [0,1]} [ E_{xy}(\Rx, \Ry, \rho)]\nonumber\\
2056: %
2057: &=& E^{ML}_x(\Rx, \Ry, 0) \nonumber
2058: \end{eqnarray}
2059:
2060: Similarly $E^{ML}_y(\Rx, \Ry, \gamma) \geq E^{ML}_y(\Rx, \Ry, 0)=
2061: E^{ML}_x(\Rx, \Ry, 0)$. Finally,
2062: %
2063: \begin{eqnarray}
2064: E_{SW, x}(\Rx, \Ry)& =& \min \Bigg\{ \inf_{\gamma \in [0,1]}
2065: E_x(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}
2066: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma) \Bigg\}\nonumber\\
2067: &=& E^{ML}_x(\Rx, \Ry, 0)\nonumber
2068: \end{eqnarray}
2069:
2070: Particularly $E_x(\Rx, \Ry, 1) \geq E_x(\Rx, \Ry, 0)$, so
2071: %
2072: \begin{eqnarray}
2073: E_{SW,x}^{BLOCK}(\Rx, \Ry) &=& \min\{E^{ML}_x(\Rx, \Ry,
2074: 0),E^{ML}_x(\Rx, \Ry, 1)\}\nonumber\\
2075: &=& E^{ML}_x(\Rx, \Ry, 0)\nonumber
2076: \end{eqnarray}
2077: %
2078: The same proof holds for source $y$.
2079:
2080:
2081:
2082: In Fig~\ref{fig.numerical3} we plot the joint sequential/block coding
2083: error exponents $E_{SW,x}(\Rx, \Ry)=E_{SW,x}^{BLOCK}(\Rx, \Ry)$, the
2084: error exponents are positive iff $\Rx> H(\rvx\rvy)-\Ry=1.02-\Ry$.
2085:
2086:
2087: \begin{figure}[htbp]
2088: \begin{center}
2089: \leavevmode
2090: \includegraphics[width=100mm]{simu3}
2091: \caption[]{ Error exponents plot: $E_{SW,x}(\Rx, \Ry)$ plotted for $R_y=0.49$ and $R_y=0.67$\\
2092: $E_{SW,x}(\Rx, \Ry)= E_{SW ,x}^{BLOCK}(\Rx,
2093: \Ry)=E_{SW,y}(\Rx, \Ry)= E_{SW,y}^{BLOCK}(\Rx, \Ry)$ and
2094: $E_{x}(\Rx)=0$ }
2095: \label{fig.numerical3}
2096: \end{center}
2097: \end{figure}
2098:
2099:
2100: \subsection{Example 2: non-symmetric source}
2101:
2102: Consider a non-symmetric source where $|\mathcal{X}|=|\mathcal{Y}|=2$,
2103: $p_{\rvx\rvy}(0,0)=0.1$, $p_{\rvx\rvy}(0,1)= p_{\rvx\rvy}(1,0)=0.05$
2104: and $p_{\rvx\rvy}(1,1)=0.8$. For this source $H(\rvx)=H(\rvy)=0.42$,
2105: $H(\rvx|\rvy)=H(\rvy|\rvx)=0.29$ and $H(\rvx,\rvy)=0.71$. The
2106: achievable rate region is shown in Fig~\ref{fig.SWregion}. In
2107: Fig~\ref{fig.numerical1}.a, \ref{fig.numerical1}.b,
2108: \ref{fig.numerical1}.c and \ref{fig.numerical1}.d, we compare the
2109: joint sequential error exponent $E_{SW,x}(\Rx, \Ry)$ the joint block
2110: coding error exponent $E_{SW,x}^{BLOCK}(\Rx, \Ry)=\min\{E_x(\Rx, \Ry,
2111: 0),E_x(\Rx, \Ry, 1)\} $ as shown in \cite{gallagerTech:76} and the
2112: individual error exponent for source $X$, $E_{x}(\Rx)$ as shown in
2113: Corollary~\ref{thm.blockEnt}. Notice that $E_{x}(\Rx)>0$ only if $\Rx>
2114: H(\rvx)$. In Fig~\ref{fig.numerical2}, we compare the sequential error
2115: exponent for source $y$: $E_{SW,y}(\Rx, \Ry)$ and the block coding
2116: error exponent for source $y$: $E_{SW,y}^{BLOCK}(\Rx, \Ry)
2117: =\min\{E_y(\Rx, \Ry, 0),E_y(\Rx, \Ry, 1)\}$ and $E_{y}(\Ry)$ which is
2118: a constant since we fix $\Ry$.
2119:
2120: For $\Ry=0.35$ as shown in Fig~\ref{fig.numerical1}.a.b and
2121: \ref{fig.numerical2}.a.b, the difference between the block coding and
2122: sequential coding error exponents is very small for both source $x$
2123: and $y$. More interestingly, as shown in Fig~\ref{fig.numerical1}.a,
2124: because the rate of source $y$ is low, i.e. it is more likely to get a
2125: decoding error due to the atypical behavior of source $y$. So as $\Rx$
2126: increases, it is sometimes better to ignore source $y$ and decode $x$
2127: individually. This is evident as the dotted curve is above the solid
2128: curves.
2129:
2130:
2131: For $\Ry=0.49$ as shown in Fig~\ref{fig.numerical1}.c.d and
2132: \ref{fig.numerical2}.c.d, since the rate for source $y$ is high
2133: enough, source $y$ can be decoded with a positive error exponent
2134: individually as shown in Fig~\ref{fig.numerical2}.c. But as the rate
2135: of source $x$ increases, joint decoding gives a better error exponent.
2136: When $\Rx$ is very high, then we observe the saturation of the error
2137: exponent on $y$ as if source $x$ is known perfectly to the decoder!
2138: This is illustrated by the flat part of the solid curves in
2139: Fig~\ref{fig.numerical2}.c.
2140:
2141: \begin{figure}
2142: \begin{center}
2143: \begin{picture}(100,70)
2144: \put(40, 10){\vector(1,0){55}} \put(40, 10){\vector(0,1){55}}
2145: \put(40,70){$\Ry$} \put(97,10){$\Rx$}
2146: %\put(30,40){$H(\rvy)$}
2147: %\put(29,31){$H(\rvy| \rvx)$} \put(68,5){$H(\rvx)$}
2148: % \put(56,5){$H(\rvx| \rvy)$} \put(30,60){$\log(2)$} \put(87,5){$\log(2)$}
2149: \put(90,60){\line(0,-1){29}} \put(90,31){\line(-1,0){20}}
2150: \put(70,31){\line(-1, 1){9}} \put(90,60){\line(-1,0){29}}
2151: \put(61,60){\line(0,-1){20}}
2152: % \put(40,32 ){\line(1,0){50}} (5,0){12}{\multiput(0,0)(0,5){12}{\circle*{1.5}}}
2153: \multiput(40,35)(2,0){25}{$.$}
2154: % \put(40,35 ){\line(1,0){50}}
2155: % \put(40,42 ){\line(1,0){50}}
2156: % \put(40,45 ){\line(1,0){50}}
2157: \multiput(40,45)(2,0){25}{$.$}
2158: % \put(40,55 ){\line(1,0){50}}
2159: %\put(92,32 ){$\Ry=0.44$, Fig. \ref{fig.numerical1}}
2160: \put(30,35 ){$0.35$ }
2161: % \put(92,42 ){$\Ry=0.64$, Fig. \ref{fig.numerical3}}
2162: \put(30,45 ){$0.49$ }
2163: % \put(92,55 ){$\Ry=0.90$, Fig. \ref{fig.numerical5}}
2164: \put(64, 52){Achievable} \put(66, 48){Region}
2165:
2166: \put(47, 25){$\Rx+\Ry=H(\rvx,\rvy)$} \put(60, 28){\vector(1,1){6.5}}
2167: \end{picture}
2168: \caption{ Rate region for the example 2 source, we focus on the error
2169: exponent on source $x$ for fixed encoder $y$ rates: $R_y=0.35$
2170: and $R_y=0.49$ } \label{fig.SWregion}
2171: \end{center}
2172: \end{figure}
2173:
2174:
2175:
2176: \begin{figure}[htbp]
2177: \begin{center}
2178: \leavevmode
2179: \includegraphics[width=140mm]{simu1}
2180: \caption[]{ Error exponents plot for source $x$ for fixed $\Ry$ as $\Rx$ varies:\\
2181: $\Ry=0.35$:\\(a) Solid curve: $E_{SW,x}(\Rx, \Ry)$, dashed curve
2182: $ E_{SW,x}^{BLOCK}(\Rx, \Ry)$ and dotted
2183: curve: $E_{x}(\Rx)$, notice that $E_{SW,x}(\Rx, \Ry)\leq
2184: E_{SW,x}^{BLOCK}(\Rx, \Ry)$ but the difference is small.\\(b) $10
2185: \log_{10}(\frac{E_{SW,x}^{BLOCK}(\Rx, \Ry)}{E_{SW,x}(\Rx, \Ry)})$. This shows the difference is there at high rates.\\
2186: $\Ry=0.49$:\\(c) Solid curve $E_{SW,x}(\Rx, \Ry)$, dashed
2187: curve $ E_{SW,x}^{BLOCK}(\Rx, \Ry)$ and
2188: dotted curve: $E_{x}(\Rx)$, again $E_{SW,x}(\Rx, \Ry)\leq
2189: E_{SW,x}^{BLOCK}(\Rx, \Ry)$ but the difference is extremely small.\\(d) $10
2190: \log_{10}(\frac{E_{SW,x}^{BLOCK}(\Rx, \Ry)}{E_{SW,x}(\Rx, \Ry)})$. This shows the difference is there at intermediate low rates. }
2191: \label{fig.numerical1}
2192: \end{center}
2193: \end{figure}
2194:
2195:
2196:
2197: \begin{figure}[htbp]
2198: \begin{center}
2199: \leavevmode
2200: \includegraphics[width=140mm]{simu2}
2201: \caption[]{ Error exponents plot for source $y$ for fixed $\Ry$ as $\Rx$ varies:\\
2202: $\Ry=0.35$:
2203:
2204: (a) Solid curve: $E_{SW,y}(\Rx, \Ry)$
2205: and dashed curve $E_{SW,y}^{BLOCK}(\Rx, \Ry)$, $E_{SW,y}(\Rx, \Ry)\leq
2206: E_{SW,y}^{BLOCK}(\Rx, \Ry)$, the difference is extremely small.
2207: $E_{y}(\Ry)$ is $0$ because $R_y=0.35< H(\rvy)$. (b) $ 10 \log_{10}(\frac{E_{SW,y}^{BLOCK}(\Rx,
2208: \Ry)}{E_{SW,y}(\Rx, \Ry)})$. This shows the two exponents are not identical everywhere. \\$\Ry=0.49$:\\(c) Solid curves:
2209: $E_{SW,y}(\Rx, \Ry)$, dashed curve $ E_{SW,y}^{BLOCK}(\Rx, \Ry)$ and $E_{SW,y}(\Rx,
2210: \Ry)\leq E_{SW,y}^{BLOCK}(\Rx, \Ry)$ and $E_{y}(\Ry)$ is constant
2211: shown in a dotted line.\\(d) $ 10 \log_{10}(\frac{E_{SW,y}^{BLOCK}(\Rx,
2212: \Ry)}{E_{SW,y}(\Rx, \Ry)})$. Notice how the gap goes to infinity when we leave the Slepian-Wolf region. }
2213: \label{fig.numerical2}
2214: \end{center}
2215: \end{figure}
2216:
2217:
2218: %%%%%%%%%%%%%%%%%%%%%
2219:
2220: \section{Streaming point-to-point coding via sequential random binning}
2221: \label{sec.entropy}
2222:
2223:
2224: In this section we prove Theorems~\ref{thm.entCodeML}
2225: and~\ref{thm.entCodeUniv}. While the emphasis of the paper is on
2226: distributed source coding, the basic causal random binning ideas and
2227: analysis techniques can be more easily developed in the point-to-point
2228: context.
2229:
2230: %%%%%
2231: \subsection{Maximum-likelihood decoding}
2232: \label{sec.MLent}
2233:
2234: To show Theorems~\ref{thm.entCodeML} and~\ref{thm.entCodeUniv}, we
2235: first develop the common core of the proof in the context of ML
2236: decoding. The proof strategy is as follows. A decoding error can
2237: only occur if there is some spurious source sequence $\svxtil^n$ that
2238: satisfies three conditions: (i) it must be in the same bin (share the
2239: same parities) as $\svx^n$, i.e., $\svxtil^n \in \binX(\svx^n)$, (ii)
2240: it must be more likely than the true sequence, i.e.,
2241: $p_{\rvbx}(\svxtil^n) > p_{\rvbx}(\svx^n)$, and (iii) $\svxtil_{l}
2242: \neq \svx_{l}$ for some $l \leq n - \delay$.
2243:
2244: The error probability is
2245: %
2246: \begin{align}
2247: \Pr [ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] %%
2248: = & \sum_{\svx^n} \Pr [\rvxhat^{n-\delay} \neq \svx^{n-\delay} |
2249: \rvx^n = \svx^n]
2250: p_\rvbx(\svx^n) \label{eq.condSS} \\
2251: %%
2252: %%
2253: %%
2254: = & \sum_{\svx^n} \sum_{l=1}^{n- \delay} \Pr \big[ \exists \;
2255: \svxtil^n \in \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \;
2256: \mbox{s.t.} \; p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n) \big]
2257: p_\rvbx(\svx^n) \label{eq.decomp}\displaybreak[2]\\
2258: %%
2259: %%
2260: %%
2261: = & \sum_{l=1}^{n- \delay} \Big\{ \sum_{\svx^n} \Pr \big[ \exists
2262: \; \svxtil^n \in \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n)
2263: \; \mbox{s.t.} \; p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n) \big]
2264: p_\rvbx(\svx^n) \Big\} \nonumber \\
2265: %%
2266: %%
2267: =&\sum_{l=1}^{n- \delay} p_n(l).
2268: \label{eq.sufDec}
2269: \end{align}
2270: %
2271: After conditioning on the realized source sequence
2272: in~(\ref{eq.condSS}), the remaining randomness is only in the binning.
2273: In~(\ref{eq.decomp}) we decompose the error event into a number of
2274: mutually exclusive events (see Fig~\ref{fig.oneD1}) by partitioning
2275: all source sequences $\svxtil^n$ into sets $\mathcal{F}_n(l,\svx^n)$
2276: defined by the time $l$ of the first sample in which they differ from
2277: the realized source $\svx^n$,
2278: %
2279: \begin{equation}
2280: \mathcal{F}_n(l,\svx^n) =\{\svxtil^n\in
2281: \mathcal{X}^n|\svxtil^{l-1} = \svx^{l-1}, \svxtil_{l} \neq
2282: \svx_{l}\}, \label{eq.partition}
2283: \end{equation}
2284: %
2285: and define $\mathcal{F}_n(n+1,\svx^n)=\{\svx^n\}$. Finally,
2286: in~(\ref{eq.sufDec}) we define
2287: %
2288: \begin{equation}
2289: p_n(l)= \sum_{\svx^n} \Pr \big[ \exists
2290: \; \svxtil^n \in \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n)
2291: \; \mbox{s.t.} \; p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n) \big]
2292: p_\rvbx(\svx^n).
2293: \label{eq.errTimeL}
2294: \end{equation}
2295:
2296: %
2297: \begin{figure}
2298: \setlength{\unitlength}{1mm}
2299: \begin{picture}(100,20)
2300: \multiput(30,10)(5,0){14}{\circle*{1.5}}
2301: \multiput(30,10)(5,0){9}{\oval(2,3)}
2302: \thicklines% \linethickness{0.5mm}
2303: \put(75,10){\oval(2,3)}
2304: \thinlines
2305: \put(25,10){\vector(1,0){85}}
2306: \put(115,9){$l$}
2307: \put(29,5){$1$}
2308: \put(94,5){$n$} \put(70,5){$n-\Delta$}
2309: \end{picture}
2310: \caption{Decoding error probability at $n-\delay$ can be union
2311: bounded by the sum of probabilities of first decoding error at $l$,
2312: $1\leq l\leq n-\delay$. The dominant error event $p_n(n-\delay)$ is
2313: the one in the highlighted oval(shortest delay).} \label{fig.oneD1}
2314: \end{figure}
2315:
2316:
2317: We now upper bound $p_n(l)$ using a Chernoff bound argument similar to
2318: \cite{gallagerTech:76}.
2319: %
2320: \begin{lemma}\label{Lemma.indivupperbound}
2321: %
2322: $p_n(l)\leq \exp\{-(n-l+1)\expML(\Rent)\}$.
2323: \end{lemma}
2324:
2325: \pf
2326: %
2327: \begin{align}
2328: p_n(l) =&\sum_{\svx^n} \Pr \big[ \exists \; \svxtil^n \in
2329: \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \; \mbox{s.t.} \;
2330: p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n) \big]
2331: p_\rvbx(\svx^n) \nonumber\\
2332: %%
2333: \leq & \sum_{\svx^n} \min \Big[1, \hspace{-1.5em} \sum_{\tiny
2334: \begin{array}{c} \svxtil^n\in \mathcal{F}_n(l,\svx^n) \mbox{s.t.} \\ p_{\rvbx}
2335: (\svx^n)\leq p_{\rvbx} (\svxtil^n) \end{array}} \hspace{-1.5em} \Pr[
2336: \svxtil^n \in \binX(\svx^n)] \Big] p_\rvbx(\svx^n)
2337: \label{eq.setBoundViaScore} \displaybreak[2]\\
2338: %%
2339: %%
2340: = & \sum_{\svx^{l-1}, \svx_{l}^n}
2341: %
2342: \min \Big[1, \hspace{-1em}
2343: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n \; \mbox{s.t.} \\
2344: p_{\rvx }(\svx_{l}^n) < p_{\rvx }(\svxtil_{l}^n)
2345: \end{array}}
2346: %
2347: \exp\{-(n-l + 1) \Rent\} \Big] p_\rvbx(\svx^{l-1}) p_\rvbx(\svx_l^{n})
2348: \label{eq.randBin} \\
2349: %
2350: %
2351: = & \sum_{\svx_{l}^n}
2352: %
2353: \min \Big[1, \hspace{-1em}
2354: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n \; \mbox{s.t.} \\
2355: p_{\rvx }(\svx_{l}^n) < p_{\rvx }(\svxtil_{l}^n)
2356: \end{array}}
2357: %\hspace{-2em} \ldots \nonumber\\
2358: \exp\{-(n-l +1) \Rent\} \Big]
2359: p_\rvbx(\svx_l^{n})\nonumber\\
2360: %%
2361: %%
2362: = &
2363: %
2364: \sum_{\svx_{l}^n}
2365: %
2366: \min \Big[ 1, \sum_{\tiny \svxtil_{l}^n } \ind[ p_{\rvx
2367: }(\svxtil_{l}^n) > p_\rvbx(\svx_l^{n})] \exp\{-(n-l+1) \Rent\} \Big]
2368: p_\rvbx(\svx_l^{n}) \label{eq.indicator}\\
2369: %
2370: %
2371: %
2372: \leq & \sum_{\svx_{l}^n}
2373: %
2374: \min \left[1, \sum_{\tiny \svxtil_{l}^n } \min \left[ 1,
2375: \frac{p_{\rvx }(\svxtil_{l}^n)}{p_{\rvx }(\svx_{l}^n)} \right]
2376: \exp\{-(n-l +1) \Rent\} \right]
2377: p_\rvbx(\svx_l^{n}) \nonumber \displaybreak[2]\\
2378: %
2379: %
2380: %
2381: \leq & \sum_{\svx_{l}^n}
2382: %
2383: \left[ \sum_{\tiny \svxtil_{l}^n } \left[ \frac{p_{\rvx
2384: }(\svxtil_{l}^n)}{p_{\rvx }(\svx_{l}^n)} \right]^{\frac{1}{1+\rho}}
2385: \exp\{-(n-l+1) \Rent\} \right]^{\rho}
2386: p_\rvbx(\svx_l^{n}) \label{eq.limOnRho} \displaybreak[2]\\
2387: %
2388: %
2389: %
2390: = & \sum_{\svx_{l}^n} p_\rvbx(\svx_l^{n})^{\frac{1}{1+\rho}}
2391: %
2392: \left[ \sum_{\tiny \svxtil_{l}^n } \left[ p_{\rvx }(\svxtil_{l}^n)
2393: \right]^{\frac{1}{1+\rho}}\right]^{\rho}
2394: \exp\{-(n-l +1) \rho \Rent\} \nonumber \displaybreak[2] \\
2395: %
2396: %
2397: %
2398: = & \left[\sum_{\svx} \PxRV(x)^{\frac{1}{1+\rho}}\right]^{(n-l+1)}
2399: %
2400: \left[
2401: \sum_{\svx} \PxRV(\svx)^{\frac{1}{1+\rho}}\right]^{(n-l+1)\rho}
2402: \exp\{-(n-l +1) \rho \Rent\} \label{eq.iid} \displaybreak[2]\\
2403: %
2404: %
2405: %
2406: = & \left[ \sum_{\svx}
2407: \PxRV(\svx)^{\frac{1}{1+\rho}}\right]^{(n-l+1)(1+\rho)}
2408: \exp\{-(n-l+1) \rho \Rent\} \nonumber\\
2409: %
2410: %
2411: = & \exp\left\{-(n-l+1) \left[\rho \Rent - (1+\rho) \ln
2412: \left(\sum_{\svx} \PxRV(\svx)^{\frac{1}{1+\rho}}\right) \right]
2413: \right\}. \label{eq.rhoBnd}
2414: \end{align}
2415:
2416: In~(\ref{eq.setBoundViaScore}) the union bound is applied.
2417: In~(\ref{eq.randBin}) we use the fact that after the first symbol in
2418: which two sequences differ, the remaining parity bits are independent,
2419: and the fact that only the likelihood of the differing suffixes
2420: matter. That is, if $\svx^{l-1} = \svxtil^{l-1}$, then $p_{\rvbx}
2421: (\svx^n)< p_{\rvbx} (\svxtil^n)$ if and only if $p_{\rvbx}(\svx_{l}^n)
2422: < p_{\rvbx}(\svxtil_{l}^n)$. In~(\ref{eq.indicator}) $\ind(\cdot)$ is
2423: the indicator function, taking the value one if the argument is true,
2424: and zero if it is false. We get~(\ref{eq.limOnRho}) by limiting $\rho$
2425: to the range $0 \leq \rho \leq 1$ since the arguments of the
2426: minimization are both positive and upper-bounded by one. We use the
2427: iid property of the source, exchanging sums and products to
2428: get~(\ref{eq.iid}). The bound in~(\ref{eq.rhoBnd}) is true for all
2429: $\rho$ in the range $0 \leq \rho \leq 1$. Maximizing~(\ref{eq.rhoBnd})
2430: over $\rho$ gives $p_n(l)\leq \exp\{-(n-l+1)\expML(\Rent)\}$ where
2431: $\expML(\Rent)\}$ is defined in Theorem~\ref{thm.entCodeML}, in
2432: particular~(\ref{eq.errExpML}). \hfill$\blacksquare$
2433: %
2434:
2435: Using Lemma~\ref{Lemma.indivupperbound} in~(\ref{eq.sufDec}) gives
2436: %
2437: \begin{align}
2438: \Pr [ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] %%
2439: \leq & \sum_{l=1}^{n-\delay} \exp\{- (n-l+1) E_{ML}(\Rent)\}
2440: \label{eq.delayTerm}\\
2441: %
2442: = & \sum_{l=1}^{n-\delay} \exp\{- (n-l+1-\delay) E_{ML}(\Rent)\}
2443: \exp\{- \delay E_{ML}(\Rent)\} \nonumber \\
2444: %
2445: \leq & K_0 \exp\{- \delay E_{ML}(\Rent)\} \label{eq.pullOutExp}
2446: \end{align}
2447: %
2448: In~(\ref{eq.pullOutExp}) we pull out the exponent in $\delay$. The
2449: remaining summation is a sum over decaying exponentials, can thus
2450: can be bounded by some constant $K_0$. This proves Theorem~\ref{thm.entCodeML}.
2451:
2452:
2453: \subsection{Error events and sequential decoding}
2454: \label{sec.entMLseq}
2455:
2456:
2457: To better understand the dominant error event in the
2458: sum~(\ref{eq.delayTerm}), consider constructing the ML estimate in a
2459: symbol-by-symbol sequential manner. The decoder starts by first
2460: identifying as candidates those sequences whose parities match the
2461: received bit stream up to time $n$. If the encoder observes the
2462: length-$n$ sequence $\rvbx = \svbx$, this is $\{ \svbxBar \;
2463: \mbox{s.t.} \; \svbxBar \in \binX(\svbx)\}$. The $l$th symbol of the
2464: estimate, $\rvxhat_l$, is defined as
2465: %
2466: \begin{equation}
2467: \svxhat_l = \svw_l \;\;\; \mbox{where} \;\;\;
2468: \svbw = \argmax_{\svbxBar \in \binX(\svbx) \;\; \mbox{s.t.} \;\;
2469: \svxBar^{l-1} = \svxhat^{l-1}} p_{\rvx_{l}^n}(\svxBar_{l}^n).
2470: \label{eq.defSeqDec}
2471: \end{equation}
2472: %
2473: The estimate thus produced is the maximum likelihood estimate because
2474: the decision regarding which pair of sequences is more likely depends
2475: only on which one's suffix is more likely.
2476:
2477: This is a decision-directed decoder. Semi-hard\footnote{Decisions are
2478: only ``hard'' for computational time. As soon as the next set of
2479: parities arrive and real-time advances, all the computations are
2480: done again.} estimate are made sequentially for each symbol. These
2481: estimates are then fixed, and taken as true when estimating subsequent
2482: symbols. Each such hard-decision is analogous to a classic
2483: block-coding Slepian-Wolf problem. This is because we only need to
2484: decide between sequences that start to differ in the symbol we are
2485: trying to estimate---previous symbols have been fixed, and subsequent
2486: symbols are not yet in question. Thus, all sequences that could lead
2487: to different estimates of symbol $l$ are binned independently for the
2488: remainder of the block. This is why the error exponent we derive
2489: in~(\ref{eq.pullOutExp}) equals Gallager's block coding
2490: exponent~\cite{gallagerTech:76}. Since the error exponent for each
2491: block-decoding problem is the same, the dominant error event is the
2492: hard-decision with the shortest block-length. This symbol is the last
2493: symbol we need to estimate. Its block-length equals the estimation
2494: delay $\delay$. We revisit this story in Section~\ref{sec.SW} when we
2495: consider Slepian-Wolf coding. In that context the dominant error
2496: event has some features that do not arise in block coding.
2497:
2498:
2499: %%%%%%%%%%%
2500: \subsection{Universal decoding}
2501: \label{sec.univEnt}
2502:
2503: In this section we prove Theorem~\ref{thm.entCodeUniv}. We use the
2504: sequential decoder introduced in Section~\ref{sec.entMLseq}, but with
2505: minimum-entropy, rather than maximum-likelihood, decoding. That is,
2506: %
2507: \begin{equation}
2508: \svxhat_l = \svw_l[l] \;\;\; \mbox{where} \;\;\; \svw^n[l] =
2509: \argmin_{\svxBar^n \in \binX(\svx^n) \;\; \mbox{s.t.} \;\;
2510: \svxBar^{l-1} = \svxhat^{l-1}} H(\svxBar_{l}^n).
2511: \label{eq.defSeqUniv}
2512: \end{equation}
2513: %
2514: We term this a minimum suffix-entropy decoder. The reason for using
2515: this decoder instead of the standard minimum block-entropy decoder is
2516: that the block-entropy decoder has a polynomial term in $n$ (resulting
2517: from summing over the type classes) that multiplies the exponential
2518: decay in $\delay$. For $n$ large, this polynomial can dominate.
2519: Using the minimum suffix-entropy decoder results in a polynomial term
2520: in $\delay$.
2521:
2522: With this decoder, errors can only occur if there is some sequence
2523: $\svxtil^n$ such that (i) $\svxtil^n \in \binX(\svx^n)$, (ii)
2524: $\rvxtil^{l-1} = \rvx^{l-1}$, and $\rvxtil_l \neq \rvx_l$, for some $l
2525: \leq n-\delay$, and (iii) the empirical suffix entropy of
2526: $\svxtil_l^n$ is such that $H(\rvxtil_{l}^n) < H(\svx_l^n)$. Building
2527: on the common core of the
2528: achievability~(\ref{eq.condSS})--(\ref{eq.sufDec}) with the
2529: substitution of universal decoding in the place of maximum likelihood
2530: results in the following definition of $p_n(l)$ (cf.~(\ref{eq.pnUniv})
2531: with~(\ref{eq.errTimeL}),
2532:
2533: \begin{align}
2534: p_n(l)=\sum_{\svx^n} \Pr \big[ \exists \; \svxtil^n \in
2535: \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \; \mbox{s.t.} \;
2536: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \big]
2537: p_\rvbx(\svx^n) \label{eq.pnUniv}
2538: \end{align}
2539:
2540:
2541: The following lemma gives a bound on $p_n(l)$.
2542: %
2543: \begin{lemma}\label{Lemma.indivUniv}
2544: For minimum suffix-entropy decoding, $p_n(l)\leq (n-l+2)^{2|\cX|}
2545: \exp\{-(n - l +1) E_{UN}(\Rent)\}.$
2546: \end{lemma}
2547:
2548: \pf We define $\PNL$ to be the type of length-$(n-l+1)$ sequence
2549: $x_{l}^n$, and $\tclass_{\PNL}$ to be the corresponding type class so
2550: that $x_{l}^n \in \tclass_{\PNL}$. Analogous definitions hold for
2551: $\PtilNL$ and $\tilde{x}_{l}^n$. We rewrite the constraint
2552: $H(\svxtil_{l}^n) < H(\svxtil_{l}^n)$ as $H(\PtilNL) < H(\PNL)$.
2553: Thus,
2554: %
2555: \begin{align}
2556: p_n(l)=&\sum_{\svx^n} \Pr \big[ \exists \; \svxtil^n \in
2557: \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \; \mbox{s.t.} \;
2558: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \big]
2559: p_\rvbx(\svx^n)\nonumber\\
2560: %
2561: %
2562: \leq & %
2563: \sum_{\svx_{1}^n}
2564: %
2565: \min \Big[1, \hspace{-1em}
2566: \sum_{\tiny \begin{array}{c} \svxtil_{1}^n \in
2567: \mathcal{F}_n(l,\svx^n)\; \mbox{s.t.} \\
2568: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \end{array}}
2569: %\hspace{-2em} \ldots \nonumber\\
2570: \Pr[\svxtil_{1}^n\in \mathcal{B}_x(\svx_{1}^n)] \Big]
2571: p_\rvbx(\svx^n)\nonumber\\
2572: %
2573: %
2574: %
2575: %
2576: =& %
2577: \sum_{\svx_{1}^{l-1},\svx_{l}^{n}}
2578: %
2579: \min \Big[1, \hspace{-1em}
2580: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n \; \mbox{s.t.} \\
2581: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \end{array}}
2582: %\hspace{-2em} \ldots \nonumber\\
2583: \exp\{-(n-l +1) \Rent\} \Big] p_\rvbx(\svx^{l-1})p_\rvbx(\svx_l^{n})
2584: \nonumber\\
2585: %
2586: %
2587: %
2588: %
2589: = & %
2590: \sum_{\svx_{l}^n}
2591: %
2592: \min \Big[1, \hspace{-1em}
2593: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n \; \mbox{s.t.} \\
2594: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \end{array}}
2595: %\hspace{-2em} \ldots \nonumber\\
2596: \exp\{-(n-l +1) \Rent\} \Big] p_\rvbx(\svx_l^{n})\label{eq.nonBlock}\\
2597: %
2598: %
2599: =&
2600: %
2601: \sum_{\PNL}
2602: %
2603: \sum_{\tiny \svx_{l}^n \in \tclass_{\PNL}}
2604: %
2605: \min \Big[ 1, \hspace{-2em}
2606: \sum_{\tiny \begin{array}{c}\PtilNL \; \mbox{s.t.}\\
2607: H (\PtilNL) \leq H(\PNL) \end{array}}
2608: %\hspace{-2em} \ldots \nonumber \\
2609: %
2610: %&
2611: \sum_{\tilde{x}_{l}^n \in \tclass_{\PtilNL}} \exp\{-(n-l+1) \Rent\}
2612: \Big] p_\rvbx(\svx_l^{n})
2613: \label{eq.tildeType} \displaybreak[2]\\
2614: %%
2615: %%
2616: \leq &
2617: %
2618: \sum_{\PNL}
2619: %
2620: \sum_{\tiny \svx_{l+1}^n \in \tclass_{\PNL}}
2621: %
2622: \min \Big[ 1, (n-l+2)^{|\cX|}
2623: %\ldots \nonumber \\
2624: %
2625: %&
2626: \exp\{-(n-l) [\Rent - H(\PNL)]\} \Big]
2627: p_\rvbx(\svx_l^{n})\label{eq.entBnd} \displaybreak[2]\\
2628: %%
2629: %%
2630: \leq& (n-l+2)^{|\cX|}
2631: %
2632: \sum_{\PNL} \sum_{\svx_{l}^n \in \tclass_{\PNL}}
2633: %
2634: \exp\{-(n-l+1) [ |\Rent \! - \! H(\PNL)|^{+} ]\} \nonumber\\
2635: &\hspace{1in} \exp\{-(n-l+1) [D(\PNL \| \PxRV) + H(\PNL)]\}
2636: %
2637: \label{eq.incExp} \displaybreak[2]\\
2638: %%
2639: %%
2640: %%
2641: \leq& (n-l+2)^{|\cX|}
2642: %
2643: \sum_{\PNL}
2644: %
2645: \exp\{-(n-l+1)
2646: %\ldots \nonumber \\
2647: %& \hspace{1em}
2648: \inf_{q}[D(q \| \PxRV) + |\Rent -
2649: H(q)|^{+}]\} \label{eq.optErrExp} \displaybreak[2]\\
2650: %%
2651: %%
2652: %%
2653: \leq& (n-l+2)^{2|\cX|}
2654: %
2655: \exp\{-(n - l +1) E_{UN}(\Rent)\} \label{eq.defEr} \displaybreak[2]
2656: %%
2657: %%
2658: %%
2659: \end{align}
2660: %
2661: In going from~(\ref{eq.tildeType}) to~(\ref{eq.entBnd}) first note
2662: that the argument of the inner-most summation (over $\svxtil_{l}^n$)
2663: does not depend on $\svbx$. We then use the following relations: (i)
2664: $\sum_{\svxtil_{l}^n \in \tclass_{\PtilNL}} = |\tclass_{\PtilNL}| \leq
2665: \exp\{(n-l+1) H(\PtilNL)\}$, which is a standard bound on the size of
2666: the type class, (ii) $H(\PtilNL) \leq H(\PNL)$ by the
2667: minimum-suffix-entropy decoding rule, and (iii) the polynomial bound
2668: on the number of types, $|\{\PtilNL\}| \leq (n-l+2)^{|\cX|}$.
2669: In~(\ref{eq.incExp}) we recall the function definition $|\cdot|^+
2670: \defeq \max\{0, \cdot\}$. We pull the polynomial term out of the
2671: minimization and use $p_\rvbx(\svx_l^{n}) = \exp\{-(n-l+1) [ D(\PNL \|
2672: \PxRV) + H(\PNL)]\}$ for all $p_\rvbx(\svx_l^{n}) \in \tclass_{\PNL}$.
2673: It is also in~(\ref{eq.incExp}) that we see why we use a minimum
2674: suffix-entropy decoding rule instead of a minimum entropy decoding
2675: rule. If we had not marginalized out over $\svx^{l-1}$ in
2676: ~(\ref{eq.nonBlock}) then we would have a polynomial term out front in
2677: terms of $n$ rather than $n-l$, which for large $n$ could dominate the
2678: exponential decay in $n-l$. As the expression in~(\ref{eq.optErrExp})
2679: no longer depends on $\svx_{l}^n$, we simplify by using
2680: $|\tclass_{\PNL}| \leq \exp\{(n-l+1) H(\PNL)\}$. In~(\ref{eq.defEr})
2681: we use the definition of the universal error exponent $E_{UN}(\Rent)$
2682: from~(\ref{eq.errExpUniv}) of Theorem~\ref{thm.entCodeUniv}, and the
2683: polynomial bound on the number of types. \hfill $\blacksquare$
2684:
2685: Lemma~\ref{Lemma.indivUniv} and $\Pr [ \rvxhat^{n-\delay} \neq
2686: \rvx^{n-\delay}]\leq \sum_{l=1}^{n-\delay} p_n(l)$ imply that:
2687: \begin{align}
2688: \Pr [ \rvxhat^{n - \delay} \neq \rvx^{n - \delay}] \leq&
2689: \sum_{l=1}^{n-\delay} (n-l+2)^{2|\cX|}
2690: \exp\{-(n - l +1) E_{UN}(\Rent)\}\nonumber\\
2691: %%%
2692: %%%
2693: %%%
2694: \leq&
2695: \sum_{l=1}^{n-\delay} K_1
2696: %
2697: \exp\{-(n -l + 1 ) [E_{UN}(\Rent) - \gamma]\}
2698: \label{eq.polyIntoExp} \displaybreak[2]\\
2699: %%
2700: %%
2701: \leq & K_2 \exp\{ - \delay [E_{UN}(\Rent) - \gamma] \}
2702: \label{eq.entErrExp}
2703: \end{align}
2704: In~(\ref{eq.polyIntoExp}) we
2705: incorporate the polynomial into the exponent. Namely, for all $a
2706: >0$, $b>0$, there exists a $C$ such that $z^a \leq C \exp \{b (z
2707: -1)\}$ for all $z \geq 1$.
2708:
2709: We then make explicit the delay-dependent term. Pulling out the
2710: exponent in $\delay$, the remaining summation is a sum over decaying
2711: exponentials, and can be bounded by a constant. Together with $K_1$,
2712: this gives the constant $K_2$ in~(\ref{eq.entErrExp}). This proves
2713: Theorem~\ref{thm.entCodeUniv}. Note that the $\gamma$
2714: in~(\ref{eq.entErrExp}) does not enter the optimization because
2715: $\gamma > 0$ can be picked equal to any constant. The choice of
2716: $\gamma$ effects the constant $K$ in Theorem~\ref{thm.entCodeUniv}.
2717:
2718: %%% SOURCE CODING WITH DECODER SI
2719:
2720: \section{Streaming source coding with side information at the
2721: decoder}
2722: \label{sec.incDecSI}
2723:
2724: If a random sequence $\rvy^n$, related to the source $\rvx^n$ through
2725: a discrete memoryless channel, is observed at the decoder, then this
2726: side information can be used to reduce the rate of the source code.
2727: In this model $p_{\rvbx, \rvby}(\svx^n, \svy^n) = \prod_{i=1}^n \PxyRV
2728: (\svx_i, \svy_i) = \prod_{i=1}^n \PxCondyRV (\svx_i | \svy_i)
2729: \PyRV(\svy_i)$. The source $\rvx^n$ is observed at the encoder, and
2730: the decoder, which observes $\rvy^n$ and a bit stream from the
2731: encoder, wants to estimate each source symbol $\rvx_i$ with a
2732: probability of error that decreases exponentially in the decoding
2733: delay $\delay$.
2734:
2735: We can apply the analysis of Section~\ref{sec.entropy} to this problem
2736: with a few minor modifications. For ML decoding, we need to pick the
2737: sequence with the maximum conditional probability given $\rvy^n$. The
2738: error exponent can be derived using a similar Chernoff bounding
2739: argument as in section ~\ref{sec.entropy}. For universal decoding,
2740: the only change is that we now use a minimum suffix
2741: conditional-entropy decoder that compares sequence pairs $(\svxBar^n,
2742: \svy^n)$ and $(\svxBBar^n, \svy^n)$. In terms of the analysis, one
2743: change enters in~(\ref{eq.condSS}) where we must also sum over the
2744: possible side information sequences. And in~(\ref{eq.tildeType}) the
2745: entropy condition in the summation over $\svbxtil$ changes to
2746: $H(\svxtil_{l+1}^n|\svy_{l+1}^n) < H(\svx_{l+1}^n| \svy_{l+1}^n)$ (or
2747: the equivalent type notation). Since there is no ambiguity in the
2748: side information, since $\rvy^n$ is observed at the decoder, this
2749: condition is equivalent to $H(\svxtil_{l+1}^n, \svy_{l+1}^n) <
2750: H(\svx_{l+1}^n, \svy_{l+1}^n)$.
2751:
2752: These results are summarized in Theorems~\ref{thm.decSIML}
2753: and~\ref{thm.decSIUniv}. We do not include the full derivation of
2754: these theorems as no new ideas are required.
2755:
2756: %%% GENERAL SW
2757: \section{Streaming Slepian-Wolf source coding}
2758: \label{sec.SW}
2759:
2760: In this section we provide the proofs of
2761: Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode}, which consider
2762: the two-user\footnote{The multiuser case is essentially the same, just
2763: with a lot more notation and minimization parameters
2764: $\gamma_1,\gamma_2,\ldots$.} Slepian-Wolf problem. As with the
2765: proofs of Theorems~\ref{thm.entCodeML} and~\ref{thm.entCodeUniv} in
2766: Sections~\ref{sec.MLent} and~\ref{sec.univEnt}, we start by developing
2767: the common core of the proof in the context of maximum likelihood
2768: decoding. This allows us to develop the results for universal
2769: decoding more quickly and transparently. Furthermore, as shown in
2770: Theorem~\ref{THM:Universal_ML_SW}, maximum likelihood decoding and
2771: universal decoding provide the same reliability with delay.
2772:
2773: %%%%%%%%%%%%%%%%%
2774: \subsection{Maximum Likelihood Decoding}
2775: \label{sec.MLSW}
2776:
2777: In Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode} three error
2778: events are considered: (i) $\Pr[\rvx^{n - \delay} \neq
2779: \rvxhat^{n-\delay}]$, (ii) $\Pr[\rvy^{n - \delay} \neq
2780: \rvyhat^{n-\delay}]$, and (iii) $\Pr[(\rvx^{n - \delay}, \rvy^{n -
2781: \delay}) \neq (\rvxhat^{n-\delay}, \rvyhat^{n-\delay})]$. We
2782: develop the error exponent for case (i). The error exponent for case
2783: (ii) follows from a similar derivation, and that of case (iii) from an
2784: application of the union bound resulting in an exponent that is the
2785: minimum of the exponents of cases (i) and (ii).
2786:
2787:
2788: To lead to the decoding error $\Pr[\rvx^{n - \delay} \neq
2789: \rvxhat^{n-\delay}]$ there must be some spurious source pair
2790: $(\svxtil^n, \svytil^n)$ that satisfies three conditions: (i)
2791: $\svxtil^n \in \binX(\svx^n)$ and $\svytil^n \in \binY(\svy^n)$, (ii)
2792: it must be more likely than the true pair $p_{\rvbx, \rvby}(\svxtil^n,
2793: \svytil^n) > p_{\rvbx, \rvby}(\svx^n, \svy^n)$, and (iii) $\svxtil_{l}
2794: \neq \svx_{l}$ for some $l \leq n - \delay$.
2795:
2796: The error probability is
2797: %
2798: \begin{align}
2799: \Pr[&\rvxhat^{n-\delay} \neq \rvx^{n-\delay}] %%
2800: = \sum_{\svx^n, \svy^n} \Pr [\rvxhat^{n-\delay} \neq \svx^{n-\delay}
2801: | \rvx^n = \svx^n, \rvy^n = \svy^n]
2802: p_{\rvbx,\rvby}(\svx^n, \svy^n) \nonumber\\
2803: %%
2804: %%
2805: &\leq \sum_{\svx^n, \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n)\Big\{
2806: \sum_{l=1}^{n - \delay}
2807: \sum_{k=1}^{n+1} \nonumber \\
2808: %%
2809: %%
2810: & \hspace{0.75in}
2811: \Pr \big[ \exists \; (\svxtil^n, \svytil^n) \in
2812: \binX(\svx^n) \times \binY(\svy^n)\cap
2813: \mathcal{F}_n(l,k,\svx^n, \svy^n) \; \mbox{s.t.} \;
2814: p_{\rvbx,\rvby}(\svxtil^n, \svytil^n) \geq
2815: p_{\rvbx,\rvby}(\svx^n, \svy^n)\big] \Big\}
2816: \label{eq.diffTime} \\
2817: %%
2818: %%
2819: & = \sum_{l=1}^{n - \delay} \sum_{k=1}^{n+1} \Big\{ \sum_{\svx^n,
2820: \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n)\nonumber \\
2821: & \hspace{0.75in}
2822: \Pr \big[ \exists \; (\svxtil^n, \svytil^n)\in
2823: \binX(\svx^n)\times \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svx^n,
2824: \svy^n) \; \mbox{s.t.} \;
2825: p_{\rvbx,\rvby}(\svxtil^n, \svytil^n) \geq
2826: p_{\rvbx,\rvby}(\svx^n, \svy^n) \big] \Big\}
2827: \nonumber\\
2828: %%
2829: %%
2830: = & \sum_{l=1}^{n - \delay} \sum_{k=1}^{n+1} p_n(l,k).
2831: \label{eq.defPn}
2832: \end{align}
2833: %
2834: In~(\ref{eq.diffTime}) we decompose the error event into a number of
2835: mutually exclusive events by partitioning all source pairs
2836: $(\svxtil^n, \svytil^n)$ into sets $\mathcal{F}_n(l, k,\svx^n,
2837: \svy^n)$ defined by the times $l$ and $k$ at which $\svxtil^n$ and
2838: $\svytil^n$ diverge from the realized source sequences. The set
2839: $\mathcal{F}_n(l, k,\svx^n, \svy^n)$ is defined as
2840: %
2841: \begin{equation}
2842: \mathcal{F}_n(l,k,x^n,y^n)=\{(\svxBar^n,\svytil^n)\in
2843: \mathcal{X}^{n} \times\mathcal{Y}^{n} \; \mbox{s.t.} \;
2844: \svxBar^{l-1} = x^{l-1},\svxBar_l \neq x_l,\svyBar^{k-1}= y^{k-1},
2845: \svyBar_k\neq y_k\}, \label{eq.jointPart}
2846: \end{equation}
2847: In contrast to streaming point-to-point or side-information coding
2848: (cf.~(\ref{eq.jointPart}) with~(\ref{eq.partition})), the partition is
2849: now doubly-indexed. To find the dominant error event, we must search
2850: over both indices. Having two dimensions to search over results in an
2851: extra minimization when calculating the error exponent (and leads to
2852: the infimum over $\gamma$ in Theorem~\ref{thm.jointCodeML}).
2853:
2854: Finally, to get~(\ref{eq.defPn}) we define $p_n(l,k)$ as
2855: %
2856: \begin{eqnarray*}
2857: & & p_n(l,k) \\
2858: &=& \sum_{\svx^n, \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n) \Pr \Big[
2859: \exists \; (\svxtil^n, \svytil^n)\in \binX(\svx^n)\times
2860: \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svx^n, \svy^n) \; \mbox{s.t.} \;
2861: p_{\rvbx,\rvby}(\svxtil^n, \svytil^n) \geq
2862: p_{\rvbx,\rvby}(\svx^n, \svy^n)\Big].
2863: \end{eqnarray*}
2864: %
2865: The following lemma provides an upper bound on $p_n(l,k)$:
2866: %
2867: \begin{lemma} \label{lemm.jointPn}
2868: %
2869: \begin{equation}
2870: \begin{array}{lllll}
2871: p_n(l,k) & \leq & \exp\{-(n-l+1) E_x(\Rx, \Ry, \frac{k-l}{n-l+1})\} &
2872: \mbox{if} & l \leq k, \vspace{1ex} \\
2873: %
2874: p_n(l,k) & \leq & \exp\{-(n-k+1) E_y(\Rx, \Ry, \frac{l-k}{n-k+1})\} &
2875: \mbox{if} & l \geq k,
2876: \end{array} \label{eq.mlSWbnd}
2877: \end{equation}
2878: %
2879: where $E_x(\Rx, \Ry, \gamma)$ and $E_y(\Rx, \Ry, \gamma)$ are
2880: defined in ~(\ref{eq.compoundExp}) and~(\ref{eq.defBasicExp})
2881: respectively. Notice that $l,k \leq n$, for $l\leq k$: $
2882: \frac{k-l}{n-l+1}\in [0,1]$ serves as $\gamma$ in the error exponent
2883: $E_x(\Rx, \Ry, \gamma)$. Similarly for $l\geq k$.
2884: \end{lemma}
2885:
2886:
2887: \pf The bound depends on whether $l \leq k$ or $l \geq k$. Consider
2888: the case for $l \leq k$,
2889: %
2890: \begin{align}
2891: %%
2892: & p_n(l,k) \nonumber \\
2893: &=\sum_{\svx^n, \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n) \Pr[
2894: \exists \; (\svxtil^n, \svytil^n)\in \binX(\svx^n)\times
2895: \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svx^n, \svy^n) \; \mbox{s.t.} \;
2896: p_{\rvbx,\rvby}(\svx^n, \svy^n) < p_{\rvbx,\rvby}(\svxtil^n,
2897: \svytil^n)]\nonumber\\
2898: %
2899: %
2900: %
2901: &\leq \sum_{\svx^n, \svy^n}
2902: %\hspace{-1em}
2903: \min\Big[1, \sum_{\tiny \begin{array}{c} (\svxtil^n, \svytil^n) \in
2904: \mathcal{F}_n(l,k,\svx^n, \svy^n)\; \\
2905: p_{\rvbx,\rvby}(\svx^n, \svy^n) <
2906: p_{\rvbx,\rvby}(\svxtil^n, \svytil^n)
2907: \end{array}}
2908: %\hspace{-2em}
2909: \Pr[ \svxtil^n \in \binX(\svx^n), \svytil^n \in \binY(\svy^n)]\Big]
2910: p_{\rvbx,\rvby}(\svx^n, \svy^n) \label{eq.enumJoint} \displaybreak[2]\\
2911: %%
2912: %%
2913: %%
2914: %%
2915: %%
2916: &\leq \sum_{\svx_l^n, \svy_l^n}
2917: %\hspace{-2em}
2918: \min \Big[1, \sum_{\tiny \begin{array}{c} (\svxtil_l^n,
2919: \svytil_l^n) \; \mbox{s.t.} \; \svytil^{k-1}=\svy^{k-1} \; \\
2920: p_{\rvbx,\rvby}(\svx_l^n, \svy_l^n) < p_{\rvbx,\rvby}(\svxtil_l^n,
2921: \svytil_l^n)
2922: \end{array}}
2923: %\hspace{-2em}
2924: \exp\{-(n-l +1) \Rx -(n-k+1) \Ry\} \Big]
2925: p_{\rvbx,\rvby}(\svx_l^n,\svy_l^n) \label{eq.indepBin} \\
2926: %%
2927: %%
2928: &= \sum_{\svx_l^n, \svy_l^n}
2929: %\hspace{-2em}
2930: \min \Big[1, \sum_{\svxtil_l^n, \svytil_k^n}
2931: \exp\{-(n-l+1) \Rx -(n-k+1) \Ry\} \nonumber \\\
2932: %
2933: & \hspace{0.75in} \ind [ p_{\rvbx,\rvby}(\svxtil_l^{k-1}, \svy_l^{k-1})
2934: p_{\rvbx,\rvby}(\svxtil_k^{n}, \svytil_k^{n}) > p_{\rvbx,\rvby}
2935: (\svx_l^{n}, \svy_l^{n})]
2936: \Big] p_{\rvbx,\rvby}(x_l^n,y_l^n) \nonumber \\% \label{eq.indAgain}\\
2937: %%
2938: %%
2939: &\leq
2940: \sum_{\svx_l^n, \svy_l^n}
2941: %\hspace{-2em}
2942: \min \Bigg[1, \sum_{\svxtil_l^n, \svytil_k^n}
2943: \exp\{-(n-l+1) \Rx -(n-k+1) \Ry\} \nonumber \\\
2944: %
2945: & \hspace{0.5in} \min \Bigg[1, \frac{p_{\rvbx,\rvby}(\svxtil_l^{k-1},
2946: \svy_l^{k-1}) p_{\rvbx,\rvby} (\svxtil_k^{n}, \svytil_k^{n})}{
2947: p_{\rvbx,\rvby}(\svx_l^{n}, \svy_l^{n})} \Bigg] \Bigg]
2948: p_{\rvbx,\rvby}(\svx_l^{n}, \svy_l^{n}) \nonumber
2949: \displaybreak[2] \\
2950: %
2951: %
2952: %
2953: &\leq
2954: \sum_{\svx_l^n, \svy_l^n}
2955: %\hspace{-2em}
2956: \Bigg[\sum_{\svxtil_l^n, \svytil_k^n}
2957: e^{-(n-l+1) \Rx -(n-k+1) \Ry}
2958: %
2959: \Bigg[
2960: \frac{p_{\rvbx,\rvby}(\svxtil_l^{k-1}, \svy_l^{k-1}) p_{\rvbx,\rvby}
2961: (\svxtil_k^{n}, \svytil_k^{n})}{ p_{\rvbx,\rvby}(\svx_l^{n},
2962: \svy_l^{n})} \Bigg]^{\frac{1}{1+\rho}} \Bigg]^{\rho}
2963: p_{\rvbx,\rvby}(\svx_l^{n}, \svy_l^{n})
2964: \displaybreak[2] \label{eq.gallagerRho} \\
2965: %
2966: %
2967: %
2968: &=
2969: e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry}
2970: \sum_{\svx_l^n, \svy_l^n}
2971: %\hspace{-2em}
2972: \Bigg[\sum_{\svxtil_l^n, \svytil_k^n}
2973: %
2974: [p_{\rvbx,\rvby}(\svxtil_l^{k-1}, \svy_l^{k-1}) p_{\rvbx,\rvby}
2975: (\svxtil_k^{n}, \svytil_k^{n}) ]^{\frac{1}{1+\rho}} \Bigg]^{\rho}
2976: p_{\rvbx,\rvby}(\svx_l^n,\svy_l^n)^{\frac{1}{1+\rho}}
2977: \nonumber \displaybreak[2]\\
2978: %
2979: %
2980: %
2981: &= e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry} \sum_{\svy_l^{k-1}}
2982: \Big[ \sum_{\svx_l^{k-1}}
2983: p_{\rvbx,\rvby}(\svx_l^{k-1},\svy_l^{k-1})^{\frac{1}{1+\rho}}\Big]
2984: %\hspace{-2em}
2985: \Big[\sum_{\svxtil_l^{k-1}}
2986: %
2987: p_{\rvbx,\rvby} (\svxtil_l^{k-1}, \svy_l^{k-1})^{\frac{1}{1+\rho}}
2988: \Big]^{\rho}
2989: \nonumber \\
2990: %
2991: & \hspace{0.5in} \Big[ \sum_{\svxtil_k^n, \svytil_k^n} p_{\rvbx,\rvby}
2992: (\svxtil_k^{n}, \svytil_k^{n})^{\frac{1}{1+\rho}} \Big]^{\rho}
2993: \sum_{\svx_k^n, \svy_k^n} p_{\rvbx,\rvby}(\svx_k^{n},
2994: \svy_k^{n})^{\frac{1}{1+\rho}}
2995: \nonumber \displaybreak[2] \\
2996: %
2997: %
2998: %
2999: &= e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry}
3000: \Bigg[\sum_{\svy_l^{k-1}} \Big[ \sum_{\svx_l^{k-1}}
3001: p_{\rvbx,\rvby}(\svx_l^{k-1}, \svy_l^{k-1})^{\frac{1}{1+\rho}} \Big]^{1
3002: + \rho} \Bigg]
3003: %\hspace{-2em}
3004: \Big[\sum_{\svx_k^n, \svy_k^n}
3005: %
3006: p_{\rvbx,\rvby}(\svx_k^{n}, \svy_k^{n})^{\frac{1}{1+\rho}} \Big]^{1
3007: +\rho}
3008: \nonumber \displaybreak[2]\\
3009: %
3010: %
3011: %
3012: &= e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry} \Bigg[\sum_{\svy} \Big[
3013: \sum_{\svx} p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1 +
3014: \rho} \Bigg]^{k-l}
3015: %\hspace{-2em}
3016: \Big[\sum_{\svx, \svy}
3017: %
3018: p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{(1 +\rho)(n-k+1)}
3019: \label{eq.rearranging}\displaybreak[2] \\
3020: %
3021: %
3022: %
3023: &= \exp\left\{-(k-l) \Bigg[ \rho \Rx - \log \Big[ \sum_{\svy} \Big[
3024: \sum_{\svx} p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}}
3025: \Big]^{1+\rho} \Big] \Bigg]
3026: \right\} \nonumber \\
3027: %%
3028: & \hspace{0.5in} \exp\left\{ -(n-k+1) \Bigg[ \rho (\Rx + \Ry) -
3029: (1+\rho) \log \Big[ \sum_{\svx, \svy}
3030: p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big] \Bigg] \right\}
3031: \nonumber \displaybreak[2] \\
3032: %
3033: %
3034: &= \exp\left\{-(k-l) E_{x|y}(\Rx, \rho)
3035: -(n-k+1) E_{xy}(\Rx, \Ry, \rho) \right\} \label{eq.defElk}\\
3036: %
3037: %
3038: %
3039: &= \exp \left\{ -(n-l+1) \Big[ \frac{k-l}{n-l+1} E_{x|y}(\Rx,\rho) +
3040: \frac{n-k+1}{n-l+1} E_{xy}(\Rx, \Ry, \rho)\Big] \right\} \label{eq.defEl2}\\
3041: %
3042: %
3043: %
3044: &\leq \exp \left\{ -(n-l+1) \sup_{\rho \in [0,1]}
3045: \Big[ \frac{k-l}{n-l+1} E_{x|y}(\Rx,\rho) +
3046: \frac{n-k+1}{n-l+1} E_{xy}(\Rx, \Ry, \rho)\Big] \right\}
3047: \label{eq.jointMLoptRho}\\
3048: %
3049: %
3050: %
3051: &= \exp \left\{ -(n-l+1) E_{x}^{ML} \left(\Rx, \Ry,
3052: \frac{k-l}{n-l+1}\right) \right\} = \exp \left\{ -(n-l+1)
3053: E_{x}(\Rx, \Ry, \frac{k-l}{n-l+1}) \right\}.
3054: \label{eq.subDefsEx}
3055: %%
3056: %& \leq \exp \left\{ -(n-l+1) E_x(\Rx,\Ry,\rho,\frac{k-l}{n-l+1}) \right\}
3057: %\label{eq.compoundDef}
3058: \end{align}
3059:
3060: In~(\ref{eq.enumJoint}) we explicitly indicate the three conditions
3061: that a suffix pair $(\svxtil_{l}^n, \svytil_{k}^n)$ must satisfy to
3062: result in a decoding error. In~(\ref{eq.indepBin}) we sum out over
3063: the common prefixes $(\svx^{l-1}, \svy^{l-1})$, and use the fact that
3064: the random binning is done independently at each encoder, see
3065: Definition.~\ref{def.seqn_coding}. We get~(\ref{eq.gallagerRho}) by
3066: limiting $\rho$ to the interval $0 \leq \rho \leq 1$, as
3067: in~(\ref{eq.limOnRho}). Getting~(\ref{eq.rearranging})
3068: from~(\ref{eq.gallagerRho}) follows by a number of basic
3069: manipulations. In~(\ref{eq.rearranging}) we get the single letter
3070: expression by again using the memoryless property of the sources.
3071: In~(\ref{eq.defElk}) we use the definitions of $E_{x|y}$ and $E_{xy}$
3072: from~(\ref{eq.defBasicExp}) of Theorem~\ref{thm.jointCodeML}. Noting
3073: that the bound holds for all $\rho \in [0,1]$ optimizing over $\rho$
3074: results in~(\ref{eq.jointMLoptRho}). Finally, using the definition
3075: of~(\ref{eq.compoundExp}) and the remark following
3076: Theorem~\ref{THM:Universal_ML_SW} that the maximum-likelihood and
3077: universal exponents are equal gives~(\ref{eq.subDefsEx}). The bound
3078: on $p_n(l,k)$ when $l > k$, is developed in an analogous
3079: fashion.\hfill $\blacksquare$
3080:
3081: We use Lemma~\ref{lemm.jointPn} together with~(\ref{eq.defPn}) to
3082: bound $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}]$ for two distinct
3083: cases. The first, simpler case, is when $\inf_{\gamma \in [0,1] }
3084: E_y(\Rx, \Ry, \gamma) > {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry,
3085: \gamma)}$. To bound $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}]$
3086: in this case, we split the sum over the $p_n(l,k)$ into two terms,
3087: as visualized in Fig~\ref{fig.twoD2}. There are $(n+1)\times
3088: (n-\delay)$ such events to account for
3089: (those inside the box). The probability of the event within each oval are
3090: summed together to give an upper bound on $\Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}]$.
3091: We add extra probabilities outside of the box but within the ovals
3092: to make the summation symmetric thus simpler. Those extra
3093: error events do not impact the error exponent because $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry, \rho,
3094: \gamma) \geq {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry, \rho,\gamma)}$.
3095: The possible dominant error events are highlighted in Figure \ref{fig.twoD2} . Thus,
3096: %
3097: \begin{align}
3098: & \Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}] \leq \sum_{l=1}^{n
3099: - \delay} \sum_{k=l}^{n+1} p_n(l,k) + \sum_{k=1}^{n - \delay}
3100: \sum_{l=k}^{n+1} p_n(l,k) \label{eq.twoTerms} \\
3101: %%
3102: %%
3103: &\leq \sum_{l=1}^{n - \delay} \sum_{k=l}^{n+1} \exp\{ -(n-l+1)
3104: \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) \}
3105: %
3106: + \sum_{k=1}^{n-\delay}\sum_{l=k}^{n+1}
3107: \exp\{-(n-k+1) \inf_{\gamma \in [0,1]} E_y(\Rx, \Ry, \gamma)\}
3108: \label{eq.usinglemma} \\
3109: %%
3110: %%
3111: & = \sum_{l=1}^{n - \delay} \Big[ (n-l+2) \exp\{ -(n-l+1)
3112: \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) \} \nonumber \\
3113: %%
3114: %%
3115: & \ \ \ \ \ + \sum_{k=1}^{n - \delay} \Big[ (n-k+2) \exp\{ -(n-k+1)
3116: \inf_{\gamma \in [0,1]}
3117: E_y(\Rx, \Ry, \gamma) \} \nonumber\\%\label{eq.ineq}\\
3118: %%
3119: %%
3120: & \leq 2 \sum_{l=1}^{n - \delay} \Big[ (n-l+2) \exp\{ -(n-l+1)
3121: \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) \}
3122: %
3123: \label{eq.sumTerms} \\
3124: %%
3125: %%
3126: %%
3127: & \leq \sum_{l=1}^{n - \delay} C_1 \exp\{ -(n-l+2)[ \inf_{\gamma
3128: \in [0,1]} E_x(\Rx, \Ry, \gamma) -\alpha]\}
3129: %
3130: \label{eq.smallerExp}\\
3131: %%
3132: %%
3133: & \leq C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry,
3134: \gamma) -\alpha]\} \label{eq.boundEyBigEx}
3135: \end{align}
3136:
3137:
3138: Equation (\ref{eq.twoTerms}) follows directly from (\ref{eq.defPn}),
3139: in the first term $l\leq k $, in the second
3140: term $l\geq k$. In~(\ref{eq.usinglemma}), we use Lemma~\ref{lemm.jointPn}. In~(\ref{eq.sumTerms}) we
3141: use the assumption that $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry,
3142: \gamma) > \inf_{\gamma \in [0,1] } E_x(\Rx, \Ry, \gamma)$.
3143: In~(\ref{eq.smallerExp}) the $\alpha > 0$ results from incorporating
3144: the polynomial into the first exponent, and can be chosen as small
3145: as desired. Combining terms and summing out the decaying
3146: exponential yield the bound~(\ref{eq.boundEyBigEx}).
3147:
3148:
3149:
3150: \begin{figure}
3151: \begin{picture}(100,100)
3152: \multiput(50,20)(5,0){12}{\multiput(0,0)(0,5){12}{\circle{0.5}}}
3153: \put(45, 15){\vector(1,0){70}} \put(45, 15){\vector(0,1){70}}
3154: \put(45,87){$k$} \put(117,15){$l$}
3155:
3156: \put(39,74) {\rotatebox{90}{ $n+1$}}
3157: \put(39,60){\rotatebox{90}{$n-\Delta$}}
3158:
3159: \put(30,15) {\rotatebox{90}{Index at which $ \rvy^n$ and $
3160: \rvytil^n$ first diverge}}
3161:
3162: \put(45,5) {Index at which $ \rvx^n$ and $ \rvxtil^n$ first
3163: diverge}
3164: \put(104,10){$n+1$}
3165: \put(90,10){$n-\Delta$}
3166:
3167: \linethickness{0.5mm}\put(97.5,15){\line(0,1){63}} % the box
3168: \put(45,78){\line(1,0){52.5}} % the box
3169:
3170: \multiput(95,65)(0, 5){ 3 }{\circle*{1.5}}
3171:
3172: \thinlines \put(50,47.5){\oval(2,60)} \put(55,50){\oval(2,55)}
3173: \put(60,52.5){\oval(2,50)} \put(65,55){\oval(2,45)}
3174: \put(70,57.5){\oval(2,40)} \put(75,60){\oval(2,35)}
3175: \put(80,62.5){\oval(2,30)} \put(85,65){\oval(2,25)}
3176: \put(90,67.5){\oval(2,20)} %\thicklines %\linethickness{1.5mm}
3177: \put(95,70){\oval(2,15)} \thinlines \put(77.5,20){\oval(60,2)}
3178: \put(80,25){\oval( 55,2)} \put(82.5,30){\oval(50,2)}
3179: \put(85,35){\oval(45,2)} \put(87.5,40){\oval(40,2)}
3180: \put(90,45){\oval( 35,2)} \put(92.5,50){\oval(30,2)}
3181: \put(95,55){\oval( 25,2)} \put(97.5,60){\oval(20,2)}
3182: %\thicklines %\linethickness{0.5mm}
3183: \put(100,65){\oval( 15,2)} \thinlines
3184: \end{picture}
3185: %
3186: \caption{Two dimensional plot of the error probabilities $p_n(l,k)$,
3187: corresponding to error events $(l,k)$,
3188: contributing to $\Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}]$ in
3189: the situation where $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry, \rho,
3190: \gamma) \geq {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry, \rho,\gamma)}$.
3191: } \label{fig.twoD2}
3192: \end{figure}
3193:
3194:
3195: The second, more involved case, is when $\inf_{\gamma \in [0,1] }
3196: E_y(\Rx, \Ry, \rho, \gamma) < {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry,
3197: \rho, \gamma)}$. To bound $\Pr[ \rvxhat^{n - \delay} \neq
3198: \rvx^{n-\delay}]$, we could use the same bounding technique used in
3199: the first case. This gives the error exponent $\inf_{\gamma \in [0,1]
3200: } E_y(\Rx, \Ry, \gamma)$ which is generally smaller than what we can
3201: get by dividing the error events in a new scheme as shown in Figure
3202: \ref{fig.errEvents}. In this situation we split~(\ref{eq.defPn}) into
3203: three terms, as visualized in Fig~\ref{fig.errEvents}. Just as in the
3204: first case shown in Fig~\ref{fig.twoD2}, there are $(n+1)\times
3205: (n-\delay)$ such events to account for (those inside the box). The
3206: error events are partitioned into 3 regions. Region 2 and 3 are
3207: separated by $k^*(l)$ using a dotted line. In region 3, we add extra
3208: probabilities outside of the box but within the ovals to make the
3209: summation simpler. Those extra error events do not affect the error
3210: exponent as shown in the proof. The possible dominant error events
3211: are highlighted shown in Fig~\ref{fig.errEvents}. Thus,
3212: %
3213: \begin{equation}
3214: \Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}] \leq \sum_{l=1}^{n -
3215: \delay} \sum_{k=l}^{n+1} p_n(l,k) + \sum_{l=1}^{n - \delay}
3216: \sum_{k=k^{\ast}(l)}^{l-1} p_n(l,k) + \sum_{l=1}^{n - \delay}
3217: \sum_{k=1}^{k^{\ast}(l)-1} p_n(l,k) \label{eq.threeTerms}
3218: \end{equation}
3219: %
3220: Where $\sum_{k=1}^{0} p_k=0$. The lower boundary of Region 2 is
3221: $k^{\ast}(l) \geq 1$ as a function of $n$ and $l$:
3222: %
3223: \begin{equation}
3224: \kast = \max\left\{1, n +1- \ceil{\frac{ \inf_{\gamma \in [0,1]}
3225: E_x(\Rx, \Ry, \gamma)}{ \inf_{\gamma \in [0,1] } E_y(\Rx, \Ry,
3226: \gamma)}} (n+1-l )\right\} = \max\left\{1, n+1 - G
3227: (n+1-l)\right\}\label{eq.kast}
3228: \end{equation}
3229: %
3230: where we use $G$ to denote the ceiling of the ratio of exponents.
3231: Note that when $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry, \gamma)
3232: > {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry, \gamma)}$ then $G =
3233: 1$ and region two of Fig.~\ref{fig.errEvents} disappears. In other
3234: words, the middle term of~(\ref{eq.threeTerms}) equals zero. This
3235: is the first case considered. We now consider the cases when $G
3236: \geq 2$ (because of the ceiling function $G$ is a positive integer).
3237:
3238:
3239:
3240: \begin{figure}[t]
3241: %\caption{}
3242: \begin{picture}(100,100)
3243:
3244: \multiput(50,20)(5,0){12}{\multiput(0,0)(0,5){12}{\circle{0.5}}}
3245: \put(45, 15){\vector(1,0){70}} \put(45, 15){\vector(0,1){70}}
3246: \put(45,87){$k$} \put(117,15){$l$}
3247:
3248: \put(39,74) {\rotatebox{90}{ $n+1$}}
3249: \put(39,60){\rotatebox{90}{$n-\Delta$}}
3250:
3251: \put(30,15) {\rotatebox{90}{Index at which $ \rvy^n$ and $
3252: \rvyhat^n$ first diverge}}
3253:
3254: \put(45,5) {Index at which $ \rvx^n$ and $ \rvxhat^n$ first
3255: diverge}
3256:
3257:
3258: \put(110,49){$k^*(n-\Delta)-1$}
3259: \put(104,10){$n+1$} \put(90,10){$n-\Delta$}
3260: \multiput(95,55)(0,5){
3261: 5 }{\circle*{1.5}}
3262:
3263: \linethickness{0.5mm}\put(97.5,15){\line(0,1){63}} % the box
3264: \put(45,78){\line(1,0){52.5}} % the box
3265:
3266: \thinlines
3267: \put(105,75){\line(-1,-1){58}} \dottedline{1}(105,75)(77.5,20)
3268:
3269: \dottedline{1}(77.5,20)(50,20)
3270:
3271: %\put(105,75){\line(-1,-2){29}}
3272: \put(50,47.5){\oval(2,60)} \put(55,50){\oval(2,55)}
3273: \put(60,52.5){\oval(2,50)} \put(65,55){\oval(2,45)}
3274: \put(70,57.5){\oval(2,40)} \put(75,60){\oval(2,35)}
3275: \put(80,62.5){\oval(2,30)} \put(85,65){\oval(2,25)}
3276: \put(90,67.5){\oval(2,20)}
3277: \put(95,70){\oval(2,15)}
3278: \put(55,20){\oval(2,5)} \put(60,22.5){\oval(2,10)}
3279: \put(65,25){\oval(2,15)} \put(70,27.5){\oval(2,20)}
3280: \put(75,30){\oval(2,25)} \put(80,35){\oval(2,25)}
3281: \put(85,42.5){\oval(2,20)} \put(90,50){\oval(2,15)} \put(95,57
3282: ){\oval(2,10)} \put(92.5,20){\oval( 30,2)} \put(95,25){\oval(
3283: 25,2)} \put(95,30){\oval(25,2)} \put(97.5,35){\oval(20,2)}
3284: \put(97.5,40){\oval(20,2)} \put(100,45){\oval( 15,2)}
3285: \put(100,50){\oval(15,2)} \thinlines \put(61,56){Region
3286: 1}\put(65,27){Region 2} \put(96,27){Region 3} \put(108,
3287: 61){$k^*(l)$} \put(107, 62){\vector(-1,0){8.5}}
3288: \end{picture}
3289: \caption{Two dimensional plot of the error probabilities $p_n(l,k)$,
3290: corresponding to error events $(l,k)$,
3291: contributing to $\Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}]$ in
3292: the situation where $\inf_{\gamma \in
3293: [0,1] } E_y(\Rx, \Ry, \gamma) < {\inf_{\gamma \in [0,1] }
3294: E_x(\Rx, \Ry, \gamma)}$. }\label{fig.errEvents}
3295: \end{figure}
3296:
3297:
3298:
3299: The first term of~(\ref{eq.threeTerms}), i.e., region one in
3300: Fig.~\ref{fig.errEvents} where $ l\leq k$, is bounded in the same
3301: way that the first term of~(\ref{eq.twoTerms}) is, giving
3302: %
3303: \begin{equation}
3304: \sum_{l=1}^{n - \delay} \sum_{k=l}^{n+1} p_n(l,k) \leq
3305: C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) -\alpha]\}.
3306: \label{eq.firstTerm}
3307: \end{equation}
3308:
3309:
3310: In Fig.~\ref{fig.errEvents}, region two is upper bounded by the
3311: 45-degree line, and lower bounded by $k^{\ast}(l)$. The second term
3312: of~(\ref{eq.threeTerms}), corresponding to this region where $ l\geq
3313: k$,
3314: %
3315: \begin{align}
3316: \sum_{l=1}^{n - \delay} \sum_{k=\kast}^{l-1} p_n(l,k)
3317: &\leq \sum_{l=1}^{n - \delay}\sum_{k=\kast}^{l-1} \exp\{-(n-k+1)
3318: E_y(\Rx, \Ry, \frac{l-k}{n-k+1})\}
3319: \nonumber \\
3320: %%
3321: &= \sum_{l=1}^{n - \delay}\sum_{k=\kast}^{l-1}
3322: \exp\{-(n-k+1) \frac{n-l+1}{n-l+1} E_y(\Rx, \Ry,
3323: \frac{l-k}{n-k+1})\}
3324: \label{eq.gammaInv} \\
3325: & \leq \sum_{l=1}^{n - \delay}\sum_{k=\kast}^{l-1}
3326: \exp\{ -(n-l+1) \inf_{\gamma \in [0,1]}
3327: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\}
3328: \label{eq.defGamma}\\
3329: & = \sum_{l=1}^{n - \delay} (l-\kast)
3330: \exp\{ -(n-l+1) \inf_{\gamma \in [0,1]}
3331: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\} \label{eq.secTerm}
3332: \end{align}
3333: %
3334: In~(\ref{eq.gammaInv}) we note that $l\geq k $, so define
3335: $\frac{l-k}{n-k+1}=\gamma$ as in~(\ref{eq.defGamma}). Then
3336: $\frac{n-k+1}{n-l+1} = \frac{1}{1-\gamma}$.
3337:
3338: The third term of~(\ref{eq.threeTerms}), i.e., the intersection of
3339: region three and the ``box'' in Fig.~\ref{fig.errEvents} where $
3340: l\geq k$, can be bounded as,
3341: %
3342: \begin{align}
3343: \sum_{l=1}^{n - \delay}\sum_{k = 1}^{\kast-1}
3344: p_n(l,k) &\leq \sum_{l=1}^{n + 1}\sum_{k = 1}^{\min\{l, k^*(n-\delay)-1\}}
3345: p_n(l,k) \label{eq.changeOrder}\\
3346: %
3347: %
3348: &=\sum_{k=1}^{k^*(n-\Delta)-1}
3349: \sum_{l=k}^{n+1}p_n(l,k) \label{eq.changeOrder1}\\
3350: %
3351: &\leq \sum_{k=1}^{k^*(n-\Delta)-1}
3352: \sum_{l=k}^{n+1}\exp\{-(n-k+1)E_y(R_x,R_y,\frac{l-k}{n-k+1})\}\nonumber\\
3353: %
3354: &\leq \sum_{k=1}^{k^*(n-\Delta)-1}
3355: \sum_{l=k}^{n+1}\exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\}
3356: \nonumber\\
3357: %
3358: &\leq \sum_{k=1}^{k^*(n-\Delta)-1}(n-k+2)
3359: \exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\}
3360: \label{eq.thirdTerm}
3361: \end{align}
3362:
3363: In (\ref{eq.changeOrder}) we note that $l\leq n-\delay$ thus
3364: $k^*(n-\delay) -1 \geq \kast -1$, also $l\geq 1$, so $l\geq
3365: \kast-1$. This can be visualized in Fig~\ref{fig.errEvents} as we
3366: extend the summation from the intersection of the ``box'' and region
3367: 3 to the whole region under the diagonal line and the horizontal
3368: line $k=k^*(n-\delay)-1$. In (\ref{eq.changeOrder1}) we simply
3369: switch the order of the summation.
3370:
3371:
3372:
3373: Finally when $G \geq 2$, we substitute~(\ref{eq.firstTerm}),
3374: (\ref{eq.secTerm}), and~(\ref{eq.thirdTerm})
3375: into~(\ref{eq.threeTerms}) to give
3376: %
3377: \begin{align}
3378: %
3379: %
3380: \Pr[\rvxhat^{n - \delay} \neq \rvx^{n-\delay}] &\leq C_2
3381: \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma)
3382: -\alpha]\} \nonumber \\
3383: %
3384: &+ \sum_{l=1}^{n - \delay} (l-\kast) \exp\{ -(n-l+1) \inf_{\gamma
3385: \in [0,1]} \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\}\\
3386: %
3387: &+\sum_{k=1}^{k^*(n-\Delta)-1}(n-k+2)\exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\}\nonumber\\
3388: %%
3389: %%
3390: %%
3391: &\leq C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry,
3392: \gamma) -\alpha]\}
3393: \nonumber \\
3394: %
3395: &+ \sum_{l=1}^{n - \delay} (l-n-1+G(n+1-l))
3396: \exp\{ -(n-l+1) \inf_{\gamma \in [0,1]}
3397: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\}\nonumber\\
3398: %
3399: &+\sum_{k=1}^{n +1- G(\delay+1)}(n-k+2)\exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\} \label{eq.largerSum}\\
3400: %%
3401: %%
3402: %%
3403: &\leq C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry,
3404: \gamma) -\alpha]\}
3405: \nonumber \\
3406: %
3407: &+ (G-1)C_3
3408: \exp\{ -\delay \big[\inf_{\gamma \in [0,1]}
3409: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)-\alpha\big]\}\nonumber\\
3410: %
3411: &+ C_4\exp\{-\big[\delay
3412: G\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)-\alpha \big]\}\nonumber\\
3413: %%
3414: %%
3415: %%
3416: &\leq C_5 \exp \Big\{ - \delay \Big[\min \Big\{ \inf_{\gamma \in
3417: [0,1]} E_x(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}
3418: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma) \Big\} - \alpha
3419: \Big]\Big\}. \label{eq.finResMLSW}
3420: \end{align}
3421: %
3422: To get (\ref{eq.largerSum}), we use the fact that $k^*(l)\geq
3423: n+1-G(n+1-l)$ from the definition of $k^*(l)$ in (\ref{eq.kast}) to
3424: upper bound the second term. We exploit the definition of $G$ to
3425: convert the exponent in the third term to $\inf_{\gamma \in [0,1]}
3426: E_x(\Rx, \Ry, \gamma)$. Finally, to get~(\ref{eq.finResMLSW}) we
3427: gather the constants together, sum out over the decaying
3428: exponentials, and are limited by the smaller of the two exponents.
3429:
3430: Note: in the proof of Theorem~\ref{thm.jointCodeML}, we regularly
3431: double count the error events or add smaller extra probabilities to
3432: make the summations simpler. But it should be clear that the error
3433: exponent is not affected.
3434:
3435:
3436: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3437: \subsection{Universal Decoding}
3438:
3439:
3440: As discussed in Section~\ref{sec.univEnt}, we do not use a pairwise
3441: minimum joint-entropy decoder because of polynomial term in $n$ would
3442: multiply the exponential decay in $\delay$. Analogous to the
3443: sequential decoder used there, we use a ``weighted suffix entropy''
3444: decoder. The decoding starts by first identifying candidate sequence
3445: pairs as those that agree with the encoding bit streams up to time
3446: $n$, i.e., $\svxBar^n \in \binX(\svx^n), \svyBar^n \in \binY(\svy^n)$.
3447: For any one of the $|\binX(\svx^n)| |\binY(\svy^n)|$ sequence pairs in
3448: the candidate set, i.e., $(\svxBar^n, \svyBar^n) \in \binX(\svx^n)
3449: \times \binY(\svy^n)$ we compute $(n+1)\times (n+1)$ weighted
3450: entropies:
3451:
3452:
3453: \begin{eqnarray}
3454: &&H_S(l,k,\svxBar^n, \svyBar^n)=H(\svxBar_{l}^{(n+1-l)},\svyBar_{l}^{(n+1-l)}),\ \ \ \ l=k\nonumber\\
3455: &&H_S(l,k,\svxBar^n, \svyBar^n)=\frac{k-l}{n+1-l}H({\svxBar}_{l}^{k-1}|{\svyBar}_{l}^{k-1})+\frac{n+1-k}{n+1-l}H({\svxBar}_{k}^{n},{\svyBar}_{k}^{n}),\ \ \ \ l<k\nonumber\\
3456: &&H_S(l,k,\svxBar^n,\svyBar^n)=\frac{l-k}{n+1-k}H({\svyBar}_{k}^{l-1}|{\svxBar}_{k}^{l-1})+\frac{n+1-l}{n+1-k}H({\svxBar}_{l}^{n},{\svyBar}_{l}^{n}),\
3457: \ \ \ l>k.\nonumber
3458: \end{eqnarray}
3459:
3460: We define the \textit{score} of $({\svxBar}^n, {\svyBar}^n)$ as the
3461: pair of integers $i_x(\svxBar^n,\svyBar^n)$, $
3462: i_y(\svxBar^n,\svyBar^n)$ s.t.,
3463: \begin{eqnarray}
3464: i_x(\svxBar^n,\svyBar^n)&=&\max\{i:H_S(l,k,(\svxBar^n,\svyBar^n))<
3465: H_S(l,k,\svxtil^n,\svytil^n) \forall k=1,2,...n+1, \forall
3466: l=1,2,...i, \nonumber\\
3467: && \forall (\svxtil^n,\svytil^n)\in\binX(\svx^n) \times
3468: \binY(\svy^n)\cap
3469: \mathcal{F}_n(l,k,\svxBar^n,\svyBar^n) \}\\
3470: i_y(\svxBar^n,\svyBar^n)&=&\max\{i:H_S(l,k,(\svxBar^n,\svyBar^n))<
3471: H_S(l,k,\svxtil^n,\svytil^n) \forall l=1,2,...n+1, \forall
3472: k=1,2,...i,\nonumber\\
3473: && \forall (\svxtil^n,\svytil^n)\in \binX(\svx^n) \times
3474: \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svxBar^n,\svyBar^n) \}
3475: \end{eqnarray}
3476: While $\mathcal{F}_n(l,k,\svx^n,\svy^n)$ is the same set as defined
3477: in (\ref{eq.jointPart}), we repeat the definition here for
3478: convenience,
3479:
3480:
3481: \begin{equation}
3482: \mathcal{F}_n(l,k,x^n,y^n)=\{(\svxBar^n,\svytil^n)\in
3483: \mathcal{X}^{n} \times\mathcal{Y}^{n} \; \mbox{s.t.} \;
3484: \svxBar^{l-1} = x^{l-1},\svxBar_l \neq x_l,\svyBar^{k-1}= y^{k-1},
3485: \svyBar_k\neq y_k\}.\nonumber
3486: \end{equation}
3487:
3488:
3489:
3490: The definition of
3491: $(i_x(\svxBar^n,\svyBar^n),i_y(\svxBar^n,\svyBar^n))$ can be
3492: visualized in the following procedure. As shown in
3493: Fig.~\ref{fig.scoresheet}, for all $ 1\leq l,k \leq n+1$, if there
3494: exists $({\svxBBar}^n, {\svyBBar}^n)\in
3495: \mathcal{F}_n(l,k,(\svxBar^n,\svyBar^n))\cap \binX(\svx^n) \times
3496: \binY(\svy^n) $ s.t. $H_S(l,k,\svxBar^n, \svyBar^n)\geq
3497: H_S(l,k,\svxBBar^n,\svyBBar^n)$ , then we mark $(l,k)$ on the plane
3498: as shown in Fig.\ref{fig.scoresheet}. Eventually we pick the maximum
3499: integer which is smaller than all marked $x$-coordinates as
3500: $i_x(\svxBar^n,\svyBar^n)$ and the maximum integer which is smaller
3501: than all marked $y$-coordinates as $i_y(\svxBar^n,\svyBar^n)$. The
3502: score of $({\svxBar}^n, {\svyBar}^n)$ tells us the first
3503: branch(either $x$ or $y$) point where a ``better sequence pair''
3504: (with a smaller weighted entropy) exists.
3505:
3506:
3507:
3508: Define the set of the winners as the sequences (not sequence pair)
3509: with the maximum score:
3510:
3511: $$\mathcal{W}_n^x=\{\svxBar^n\in \binX(\svx^n) :\exists \svyBar^n\in \mathcal{B}_y(y^n), s.t.
3512: i_x(\svxBar^n,\svyBar^n)\geq i_x(\svxtil^n,\svytil^n), \forall
3513: (\svxtil^n,\svytil^n)\in \binX(\svx^n) \times \binY(\svy^n)\}$$
3514: $$\mathcal{W}_n^y=\{\svyBar^n\in \binY(\svy^n) :\exists \svxBar^n\in
3515: \binX(\svx^n) , s.t. i_y(\svxBar^n,\svyBar^n)\geq
3516: i_y(\svxtil^n,\svytil^n), \forall (\svxtil^n,\svytil^n)\in
3517: \binX(\svx^n) \times \binY(\svy^n)\}$$
3518:
3519: Then arbitrarily pick one sequence from $\mathcal{W}_n^x$ and one
3520: from $\mathcal{W}_n^y$ as the decision $(\svxhat^n,\svyhat^n)$.
3521:
3522:
3523:
3524:
3525: \setlength{\unitlength}{1mm}
3526:
3527: \begin{figure}
3528: \begin{picture}(100,100)
3529:
3530: \multiput(50,20)(5,0){12}{\multiput(0,0)(0,5){12}{\circle {1.5}}}
3531: \put(45, 15){\vector(1,0){70}} \put(45, 15){\vector(0,1){70}}
3532: \put(45,87){$k$} \put(117,15){$l$} \put(45, 39){\line(1,0){65}}
3533: \put(74, 15){\line(0,1){65}}
3534:
3535: \put(37,74){\rotatebox{90}{$n+1$}}
3536:
3537: \put(104,10){$n+1$}
3538: \put(48,10){$1$}
3539: \put(37,20){\rotatebox{90}{$1$}}
3540:
3541:
3542: \put(41, 33){\rotatebox{90}{$i_y$}}
3543: \put(68, 11){$i_x$}
3544:
3545: \put(95,40){\circle*{1.5}} \put(75,45){\circle*{1.5}}
3546: \put(105,50){\circle*{1.5}} \put(100,65){\circle*{1.5}}
3547: \put(100,60){\circle*{1.5}} \put(95,60){\circle*{1.5}}
3548: \put(95,65){\circle*{1.5}} \put(95,70){\circle*{1.5}}
3549: \put(100,60){\circle*{1.5}} \put(75,65){\circle*{1.5}}
3550: \put(95,75){\circle*{1.5}} \put(85,55){\circle*{1.5}}
3551: \put(105,75){\circle*{1.5}}
3552: \end{picture}
3553: \caption[]{2D interpretation of the \textit{score}, $(i_x(\svxBar^n,\svyBar^n),
3554: i_y(\svxBar^n,\svyBar^n))$, of a sequence
3555: pair $(\svxBar^n,\svyBar^n)$. If there exists a sequence pair in
3556: $\mathcal{F}_n(l,k,\svxBar^n,\svyBar^n)$ with less or the same score, then $(l,k)$ is marked with a solid dot.
3557: The \textit{score} $i_x(\svxBar^n,\svyBar^n)$ is the largest
3558: integer which is smaller than all the $x$-coordinates of the
3559: marked points. Similarly for $i_y(\svxBar^n,\svyBar^n),$
3560: }
3561: \label{fig.scoresheet}
3562: \end{figure}
3563:
3564:
3565: We bound the probability that there exists a sequence pair in
3566: $\mathcal{F}_n(l,k,(\rvx^n,\rvy^n))\cap \binX(\svx^n) \times
3567: \binY(\svy^n)$ with smaller weighted minimum-entropy suffix score
3568: as:
3569: \begin{eqnarray}
3570: p_n(l,k)&=&\sum_{x^n}\sum_{y^n}p_{\rvx\rvy}(x^n,y^n)
3571: P(\exists(\svxtil_{1}^{n},\svytil_{1}^{n})\in \binX(\svx^n) \times
3572: \binY(\svy^n)\cap \mathcal{F}_n(l,k,x^n,y^n),\nonumber\\
3573: && s.t. H_S(l,k,\svxtil^n,\svytil^n)\leq
3574: H_S(l,k,(x^n,y^n)))\nonumber
3575: \end{eqnarray}
3576: Note that the $p_n(l,k)$ here differs from the $p_n(l,k)$ defined in
3577: the ML decoding by replacing $p_{\rvx\rvy}(\svx^n, \svy^n) \leq
3578: p_{\rvx\rvy}(\svxtil^n, \svytil^n)$ with
3579: $H_S(l,k,\svxtil^n,\svytil^n)\leq H_S(l,k,(x^n,y^n))$.
3580:
3581: The following lemma, analogous to (\ref{eq.defPn}) for ML decoding,
3582: tells us that the ``suffix weighted entropy'' decoding rule is a
3583: good one.
3584:
3585: \begin{lemma} Upper bound on symbol-wise decoding error
3586: $P_{ex}(k,k+d)$ :\label{Lemma_3_UNI_SW}
3587:
3588: \begin{eqnarray}
3589: \Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq
3590: \sum_{l=1}^{n-\delay}\sum_{k=1}^{n+1}p_n(l,k)\nonumber
3591: \end{eqnarray}
3592:
3593: \end{lemma}
3594:
3595: \pf According to the decoding rule, $\svxhat^{n-\delay}\neq
3596: \svx^{n-\delay}$ implies that there exists a sequence $\svxtil^n\in
3597: \mathcal{W}_n^x$ s.t.$\svxtil^{n-\delay}\neq x^{n-\delay}$. This
3598: means that there exists a sequence $\svytil^n\in
3599: \binY(\svy^n)$, s.t. $i_x(\svxtil^n,\svytil^n)\geq
3600: i_x(\svx^n,\svy^n)$. Suppose that $(\svxtil^n,\svytil^n)\in
3601: \mathcal{F}_n(l,k , x^n,y^n)$, then $l\leq n-\delay $ because
3602: $\svxtil^{n-\delay}\neq x^{n-\delay}$. By the definition of $i_x$,
3603: we know that $H_S(l,k,\svxtil^n,\svytil^n)\leq H_S(l,k, x^n,y^n )$.
3604: And using the union bound argument we get the desired inequality.
3605: \hfill $\blacksquare$
3606:
3607:
3608: We only need to bound each single error probability $p_n(l,k)$ to
3609: finish the proof.
3610:
3611: \begin{lemma}{Upper bound on $p_n(l,k)$, $l\leq k$:} $\forall \gamma>0$, $\exists K_1 <
3612: \infty$, s.t.
3613: $$p_n(l,k)\leq \exp\{-(n-l+1) [E_{x}
3614: (\Rx, \Ry, \lambda) - \gamma]\}$$ where $\lambda = (k-l)/(n-l+1)
3615: \in [0,1]$. \label{Lemma:UpperBoundon2_UN}
3616: \end{lemma}
3617: %
3618: \pf Here the error probability $p_n(l,k)$ can be thought as
3619: starting from~(\ref{eq.indepBin}) with the condition $(k-l)
3620: H(\svxtil_{l}^{k-1}|\svytil_{l}^{k-1}) + (n-k+1) H(\svxtil_k^n,
3621: \svytil_{k}^n) < (k-l) H(\svx_{l}^{k-1}|\svy_{l}^{k-1}) + (n-k+1)
3622: H(\svx_k^n, \svy_{k}^n)$ substituted for $p(\svxtil_l^n,
3623: \svytil_l^n) > p(\svx_l^n, \svy_l^n)$, we get
3624: %
3625: \begin{align}
3626: %%
3627: p_n(l,k) = &
3628: %
3629: \sum_{\PNK, \PKL}
3630: %
3631: \sum_{\VNK, \VKL}
3632: %
3633: \sum_{\tiny \begin{array}{c}
3634: \svy_{l}^{k-1} \in \tclass_{\PKL},\\
3635: \svy_{k}^n \in \tclass_{\PNK}
3636: \end{array}}
3637: %
3638: \sum_{\tiny \begin{array}{c}
3639: \svx_l^{k-1} \in \tclass_{\VKL}(\svy_l^{k-1}), \\
3640: \svx_{k}^n \in \tclass_{\VNK(\svy_{k}^n)} \end{array}}
3641: \min \Big[1,
3642: %%
3643: %%\hspace{-1em}
3644: \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \; \mbox{s.t.}\\
3645: \minEntTil < \\\minEnt
3646: \end{array}} \nonumber\\
3647: %
3648: & \sum_{\svytil_{k}^n \in \tclass_{\PtilNK}} \sum_{\svxtil_{l}^{k-1}
3649: \in \tclass_{\VtilKL}(\svy_{l}^{k-1})} \sum_{\svxtil_{k}^n \in
3650: \tclass_{\VtilNK}(\svytil_{k}^n)} \exp\{-(n-l +1) \Rx - (n-k+1)
3651: \Ry\} \Big] p_{\rvx\rvy}(x^n, y^n) \label{eq.enumTil}
3652: \end{align}
3653: %
3654: In~(\ref{eq.enumTil}) we enumerate all the source sequences in a way
3655: that allows us to focus on the types of the important subsequences.
3656: We enumerate the possibly misleading candidate sequences in terms of
3657: their suffixes types. We restrict the sum to those pairs
3658: $(\svxtil^n, \svytil^n)$ that could lead to mistaken decoding,
3659: defining the compact notation $\minEnt \defeq (k-l) H(\VKL|\PKL) +
3660: (n-k+1) H(\PNK \times \VNK)$, which is the weighted suffix entropy
3661: condition rewritten in terms of types.
3662:
3663: Note that the summations within the minimization
3664: in~(\ref{eq.enumTil}) do not depend on the arguments within these
3665: sums. Thus, we can bound this sum separately to get a bound on the
3666: number of possibly misleading source pairs $(\svbxtil, \svbytil)$.
3667: %
3668: \begin{align}
3669: & \hspace{-5em}
3670: \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \; \mbox{s.t.}\\
3671: \minEntTil < \\ \minEnt \end{array}}
3672: %
3673: \sum_{\svytil_{k}^n \in \tclass_{\PtilNK}}
3674: \sum_{\svxtil_{l}^{k-1} \in \tclass_{\VtilKL}(\svy_{l}^{k-1})}
3675: \sum_{\svxtil_{k}^n \in \tclass_{\VtilNK}(\svytil_{k}^n)} \nonumber\\
3676: %%
3677: %%
3678: %%
3679: & \leq \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK
3680: \; \mbox{s.t.}\\ \minEntTil < \\ \minEnt \end{array}}
3681: \sum_{\svytil_{k}^n \in \tclass_{\PtilNK}}
3682: |\tclass_{\VtilKL(\svy_{l}^{k-1})}|
3683: |\tclass_{\VtilNK(\svytil_{k}^n)}| \label{eq.condTypeSize}\\
3684: %%
3685: %%
3686: %%
3687: \leq & \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \;
3688: \mbox{s.t.}\\
3689: \minEntTil < \\ \minEnt \end{array}}
3690: |\tclass_{\PtilNK}|
3691: \exp\{(k-l)H(\VtilKL|\PKL)\}
3692: \exp\{(n-k+1)H(\VtilNK|\PtilNK)\}
3693: \displaybreak[2]
3694: \label{eq.condTypeBnd}\\
3695: %%
3696: %%
3697: %%
3698: \leq &\sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \; \mbox{s.t.}\\
3699: \minEntTil < \\ \minEnt \end{array}}
3700: %
3701: \exp\{(k-l)H(\VtilKL|\PKL) + (n-k+1) H(\PtilNK \times \VtilNK) \}
3702: \displaybreak[2]
3703: \label{eq.margTypeBnd} \displaybreak[2]\\
3704: %%
3705: %%
3706: %%
3707: \leq & \sum_{\VtilNK, \VtilKL, \PtilNK }
3708: \exp\{(k-l)H(\VKL|\PKL) + (n-k+1) H(\PNK \times \VNK) \}
3709: \displaybreak[2] \label{eq.scoring} \displaybreak[2] \\
3710: %%
3711: %%
3712: %%
3713: \leq & \; (n-l+2)^{2 |\cX| |\cY|}
3714: \exp\{(k-l)H(\VKL|\PKL) + (n-k+1) H(\PNK \times \VNK) \}
3715: \label{eq.numTypes}
3716: \end{align}
3717: %
3718: In~(\ref{eq.condTypeSize}) we sum over all $\svxtil_{l}^{k-1} \in
3719: \tclass_{\VtilKL}(\svy_{l}^{k-1})$.
3720: In~(\ref{eq.condTypeBnd}) we use standard bounds, e.g., $|
3721: \tclass_{\VtilKL}(\svy_{l}^{k-1})| \leq \exp\{(k-l)
3722: H(\VtilKL|\PKL)\}$ since $\svy_{l}^{k-1} \in \tclass_{\PKL}$.
3723: We also sum over all $\svxtil_{k}^{n} \in
3724: \tclass_{\VtilNK}(\svytil_{k}^{n})$ and over all
3725: $\svytil_{k}^n \in
3726: \tclass_{\PtilNK}$ in~(\ref{eq.condTypeBnd}). By definition of the
3727: decoding rule $(\svbxtil, \svbytil)$ can only lead to a decoding error
3728: if $(k-l) H(\VtilKL|\PKL)] + (n-k+1) H(\PtilNK \times \VtilNK) <
3729: (k-l)H(\VKL|\PKL) + (n-k+1) H(\PNK \times \VNK)$.
3730: In~(\ref{eq.numTypes}) we apply the polynomial bound on the number of
3731: types.
3732:
3733:
3734: We substitute~(\ref{eq.numTypes}) into~(\ref{eq.enumTil}) and pull out
3735: the polynomial term, giving
3736: %
3737: %
3738: %%
3739: \begin{align}
3740: &\hspace{-1em} p_n(l,k) \leq
3741: (n-l+2)^{2 |\cX| |\cY|}
3742: %
3743: \sum_{\PNK, \PKL}
3744: %
3745: \sum_{\VNK, \VKL}
3746: %
3747: \sum_{\tiny \begin{array}{c}
3748: \svy_{l}^{k-1} \in \tclass_{\PKL},\\
3749: \svy_{k}^n \in \tclass_{\PNK}
3750: \end{array}}
3751: %
3752: \sum_{\tiny \begin{array}{c}
3753: \svx_l^{k-1} \in \tclass_{\VKL}(\svy_l^{k-1}), \\
3754: \svx_{k}^n \in \tclass_{\VNK(\svy_{k}^n)} \end{array}}
3755: \nonumber \\
3756: %
3757: &\min \Big[1, \exp\{-(k-l)[\Rx - H(\VKL|\PKL)]
3758: - (n-k+1) [\Rx + \Ry - H(\VNK \times \PNK)] \} \Big]
3759: \jointSource{l}{n}{l}{n} \nonumber \\
3760: %%
3761: %%
3762: \leq &
3763: (n-l+2)^{2 |\cX| |\cY|}
3764: %
3765: \sum_{\PNK, \PKL}
3766: %
3767: \sum_{\VNK, \VKL} \nonumber \\
3768: %
3769: & \exp\Big\{\max \Big[0, -(k-l)[\Rx - H(\VKL|\PKL)]
3770: - (n-k+1) [\Rx + \Ry - H(\VNK \times \PNK)] \Big]\Big\}
3771: \nonumber \\
3772: %
3773: &\exp\left\{-(k-l)D(\VKL \times \PKL \| \PxyRV)
3774: - (n-k+1) D(\VNK \times \PNK \| \PxyRV) \right\}
3775: \label{eq.srcProb} \displaybreak[2]\\
3776: %%
3777: %%
3778: \leq &
3779: (n-l+2)^{2 |\cX| |\cY|}
3780: %
3781: \sum_{\PNK, \PKL}
3782: %
3783: \sum_{\VNK, \VKL}
3784: %
3785: \exp\Big\{-(n-l+1) \Big[\lambda D(\VKL \times \PKL \| \PxyRV)
3786: + \bar{\lambda} D(\VNK \times \PNK \| \PxyRV)
3787: \nonumber \\
3788: %
3789: &+ \left|\lambda [\Rx - H(\VKL|\PKL)]
3790: + \bar{\lambda} [\Rx + \Ry - H(\VNK \times \PNK)]\right|^{+}
3791: \Big] \Big\}
3792: \label{eq.combineDiv} \displaybreak[2]\\
3793: %%
3794: %%
3795: %%
3796: \leq &
3797: (n-l+2)^{2 |\cX| |\cY|}
3798: %
3799: \sum_{\PNK, \PKL}
3800: %
3801: \sum_{\VNK, \VKL}
3802: %
3803: \exp \Big\{-(n-l+1) \inf_{\tiny \rvxtil, \rvytil,
3804: \rvxBar, \rvyBar}
3805: \Big[\lambda D(p_{\rvxtil, \rvytil} \| \PxyRV)
3806: + \bar{\lambda} D(p_{\rvxBar, \rvyBar} \| \PxyRV) \nonumber \\
3807: %
3808: & + \left|\lambda [\Rx - H(\rvxtil|\rvytil)]
3809: + \bar{\lambda}
3810: [\Rx + \Ry - H(\rvxBar, \rvyBar)]\right|^{+} \Big] \Big\}
3811: \label{eq.infExp} \displaybreak[2]\\
3812: %%
3813: %%
3814: %%
3815: \leq &
3816: (n-l+2)^{4 |\cX| |\cY|} \exp\{-(n-l+1) E_{x} (\Rx, \Ry, \lambda)\}
3817: %%
3818: \leq K_1 \exp\{-(n-l+1) [E_{x}
3819: (\Rx, \Ry, \lambda) - \gamma]\} \label{eq.defExp} \displaybreak[2]\\
3820: %%
3821: %\nonumber
3822: \end{align}
3823: %
3824: In~(\ref{eq.srcProb}) we use the memoryless property of the source,
3825: and exponential bounds on the probability of observing
3826: $(\svx_{l}^{k-1}, \svy_l^{k-1})$ and $(\svx_k^n, \svy_k^n)$.
3827: In~(\ref{eq.combineDiv}) we pull out $(n-l+1)$ from all terms,
3828: noticing that $\lambda = (k-l)/(n-l+1) \in [0,1]$ and $\bar{\lambda}
3829: \defeq 1- \lambda = (n-k+1)/(n-l+1)$. In~(\ref{eq.infExp}) we
3830: minimize the exponent over all choices of distributions $p_{\rvxtil,
3831: \rvytil}$ and $p_{\rvxBar, \rvyBar}$. In~(\ref{eq.defExp}) we
3832: define the universal random coding exponent $E_{x}(\Rx, \Ry,
3833: \lambda) \defeq \inf_{\tiny \rvxtil, \rvytil, \rvxBar, \rvyBar} \{
3834: \lambda D(p_{\rvxtil,\rvytil} \| \PxyRV) + \bar{\lambda}
3835: D(p_{\rvxBar, \rvyBar} \| \PxyRV) + \left|\lambda [\Rx -
3836: H(\rvxtil|\rvytil)] + \bar{\lambda} [\Rx + \Ry - H(\rvxBar,
3837: \rvyBar)]\right|^{+}\}$ where $0 \leq \lambda \leq 1$ and
3838: $\bar{\lambda} = 1 - \lambda$. We also incorporate the number of
3839: conditional and marginal types into the polynomial bound, as well as
3840: the sum over $k$, and then push the polynomial into the exponent
3841: since for any polynomial $F$, $\forall E, \epsilon >0$, there exists
3842: $C>0$, s.t. $F(\delay)e^{-\delay E}\leq Ce^{-\delay(E-\epsilon)}$ .
3843: \hfill $\blacksquare$
3844:
3845: A similar derivation yields a bound on $p_n(l, k)$ for $l \geq k$.
3846:
3847:
3848:
3849: Combining Lemmas \ref{Lemma:UpperBoundon2_UN} and
3850: \ref{Lemma_3_UNI_SW}, and then following the same derivation for ML
3851: decoding yields Theorem~\ref{thm.jointCode}.
3852:
3853:
3854: %%%%%%%%%%%%%%%%%%%%
3855: \section{Future Directions}
3856:
3857: \subsection{Stationary-ergodic sources and universality}
3858:
3859: \cite{cover:75} extends the block-coding proofs to the Slepian-Wolf
3860: problem for stationary-ergodic sources using AEP arguments. To have a
3861: similar extension to the streaming context, possibly additional
3862: regularity conditions will be required so that error exponents can be
3863: achieved. To achieve universality over sources, it is possible that
3864: further technical restrictions will be required. For the case of
3865: distributed Markov sources however, it seems quite clear that all the
3866: arguments in this paper will easily generalize. In that case,
3867: following the approach we take in \cite{SahaiUnstable}, the source can
3868: be ``segmented'' into small blocks and the endpoints\footnote{For a
3869: Markov source of known order $k$, the endpoint is just $k$
3870: successive symbols at the end of the block.} of the blocks can be
3871: encoded perfectly at essentially zero rate. Conditioned on these
3872: endpoints, the blocks are then iid, with the endpoints representing a
3873: third stream of perfectly known side-information.
3874:
3875: \subsection{Upper bounds and demonstrating optimal delays}
3876:
3877: This paper dealt entirely with achievability of certain error
3878: exponents. Ideally, we would have corresponding upper bounds
3879: demonstrating that no higher exponents are possible. In the
3880: block-coding case, problem 3.7.1 in \cite{csiszarKorner} provides a
3881: simple upper-bound. However, the nature of the error exponents in the
3882: streaming case might be more complicated. \cite{Chang:06} provides an
3883: upper bound and matching achievable scheme for point-to-point
3884: source-coding with delay and this bound extends naturally to the case
3885: where side-information is known at both the encoder and the decoder.
3886: \cite{ChangISIT:06} provides an upper bound for the case of
3887: side-information known only at the decoder, and this bound is tight
3888: for certain symmetric cases. However, both of these extended single
3889: encoder arguments from \cite{SahaiBlockLength} that do not immediately
3890: generalize to the case of multiple encoders.
3891:
3892: \subsection{Trading off error exponents for the different source terminals}
3893: For multiple terminal systems, different error exponents can be
3894: achieved for different users or sources. For channel coding, the
3895: encoders can choose different distributions while generating the
3896: randomized code book to achieve an error exponent trade-off among
3897: different users. In \cite{Weng:05}, the error exponent region is
3898: studied for the Gaussian multiple access channel and the broadcast
3899: channel within the block-coding paradigm. It is unclear whether
3900: similar tradeoffs are possible within the streaming Slepian Wolf
3901: problems considered here since there is nothing immediately comparable
3902: to the flexibility we have in choosing the ``input distribution'' for
3903: channel coding problems.
3904:
3905: \subsection{Adaptation and limited feedback}
3906: An interesting extension is to adaptive universal streaming Slepian
3907: Wolf encoders. The decoders we use in this paper are based on
3908: empirical statistics. Therefore they can be used even if source
3909: statistics are unknown. The current proposal will work regardless of
3910: source and side information statistics as long as the conditional
3911: entropy $H(\rvx|\rvy)$ is less than the encoding rate. Even if there
3912: is uncertainty in statistics, the anytime nature of the coding system
3913: should enable the system to adapt on-line to the unknown entropy rate
3914: if some feedback channel is available. The feedback channel would be
3915: used to order increases (or decreases) in the binning rate. An
3916: increase (or decrease) could be triggered by examining the difference
3917: between two quantities: the minimal empirical joint entropy between
3918: the decoded sequence and observation, and the empirical joint entropy
3919: between the particular sequence and observation yielding the
3920: second-lowest joint entropy. If there is a large difference between
3921: these two entropies, we are using rate excessively, and the rate of
3922: communication can be reduced. If the difference is negligible, then
3923: it's likely we are not decoding correctly. Our target should be to
3924: keep this difference at roughly $\epsilon$. In the current context,
3925: this is analogous to the rate margin by which we choose to exceed the
3926: known conditional entropy.
3927:
3928: \section*{Acknowledgments}
3929: The authors wish to acknowledge a desire expressed by Zixiang Xiong
3930: and subsequent hallway discussions during ITW 2004 that helped
3931: precipitate the current line of research. This work was supported in
3932: part by NSF ITR Grant No.~CNS-0326503.
3933:
3934: \appendix
3935:
3936: \newcommand{\pBar}{\bar{p}}
3937:
3938: \section{Proof of Theorem \ref{THM:Universal_ML_SW}}
3939:
3940: In this section we show that the maximum likelihood (ML) error
3941: exponent equals the universal error exponent. We show that for all
3942: $\gamma$,
3943: $$E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$$
3944: Where the ML error exponent:
3945: \begin{eqnarray}\label{eqn:LEMMAAPPDC0_SW}
3946: E^{ML}_x(R_x,R_y,\gamma)&=&\sup_{\rho\in[0,1]}\{\gamma
3947: E_{x|y}(R_x,\rho)+(1-\gamma)E_{xy}(R_x,R_y,\rho)\}\nonumber\\
3948: &=&\sup_{\rho\in[0,1]}\{\rho R^{(\gamma)} -\gamma \log(\sum_{y
3949: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})-
3950: (1-\gamma)(1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})\}\nonumber\\
3951: &=&\sup_{\rho\in[0,1]}\{E^{ML}_x(R_x,R_y,\gamma,\rho)\}\nonumber
3952: \end{eqnarray}
3953:
3954: Write the function inside the $\sup$ argument as
3955: $E^{ML}_x(R_x,R_y,\gamma,\rho)$. The universal error exponent:
3956: \begin{eqnarray}
3957: E^{UN}_x(R_x,R_y,\gamma)&=&\inf_{ q_{xy},o_{xy}} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\
3958: &&+\max\{0,\gamma (R_x-H(q_{x|y}))
3959: +(1-\gamma)(R_x+R_y-H(o_{xy}))\}\}\nonumber\\
3960: &=&\inf_{ q_{xy},o_{xy}} \{\gamma
3961: D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max\{0,R^{(\gamma)}-\gamma
3962: H(q_{x|y}) -(1-\gamma)H(o_{xy})\}\}\nonumber
3963: \end{eqnarray}
3964: Here we define $R^{(\gamma)}=\gamma R_x +(1-\gamma)(R_x+R_y)>\gamma
3965: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})$. For notational
3966: simplicity, we write $q_{xy}$ and $o_{xy}$ as two arbitrary joint
3967: distributions on $\mathcal{X}\times\mathcal{Y}$ instead of
3968: $p_{\rvxBar\rvyBar}$ and $p_{\rvxBBar\rvyBBar}$. We still write
3969: $p_{\rvx\rvy}$ as the distribution of the source.
3970:
3971:
3972:
3973: Before the proof, we define a pair of distributions that we will need. \\
3974: %------------------------------------ Definitions ------------------------------------------
3975:
3976: \begin{defn}{Tilted distribution of $p_{\rvx\rvy}$}: $p^\rho_{\rvx\rvy}$, for all $ \rho\in [-1,\infty)$
3977:
3978: $$p^\rho_{\rvx\rvy}(x,y)=\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_t\sum_s
3979: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}}$$ The entropy of the tilted
3980: distribution is written as $H(p^\rho_{\rvx\rvy})$. Obviously
3981: $p^0_{\rvx\rvy}=p_{\rvx\rvy}$.\\
3982: \end{defn}
3983:
3984:
3985: \begin{defn} {$\rvx-\rvy$ tilted distribution of $p_{\rvx\rvy}$}: $\pBar^\rho_{\rvx\rvy}$, for all $\rho \in
3986: [-1,+\infty)$
3987: \begin{eqnarray}
3988: \pBar^\rho_{\rvx\rvy}(x,y) &=&\frac{[\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}]^{1+\rho}}{\sum_t[\sum_s p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}]^{1+\rho}}\times\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}} \nonumber\\
3989: &=&\frac{A(y,\rho)}{B(\rho)}\times\frac{C(x,y,\rho)}{D(y,\rho)}\nonumber
3990: \end{eqnarray}
3991: Where
3992: \begin{eqnarray}
3993: A(y,\rho)&=&[\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}]^{1+\rho}=D(y,\rho)^{1+\rho}\nonumber\\
3994: B(\rho)&=& \sum_s[\sum_t p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}]^{1+\rho} = \sum_y A(y,\rho) \nonumber\\
3995: C(x,y,\rho)&=&p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}\nonumber\\
3996: D(y,\rho)&=&\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}} =\sum_x
3997: C(x,y,\rho)\nonumber
3998: \end{eqnarray}
3999: \end{defn}
4000:
4001: The marginal distribution for $\rvy$ is $\frac{A(y,\rho)}{B(\rho)}$.
4002: Obviously $\pBar^0_{\rvx\rvy}=p_{\rvx\rvy}$. Write the conditional
4003: distribution of $\rvx$ given $\rvy$ under distribution
4004: $\pBar^\rho_{\rvx\rvy}$ as $\pBar^\rho_{\rvx|\rvy}$, where
4005: $\pBar^\rho_{\rvx|\rvy}(x,y)=\frac{C(x,y,\rho)}{D(y,\rho)}$, and
4006: the conditional entropy of $\rvx$ given $\rvy$ under distribution
4007: $\pBar^\rho_{\rvx\rvy}$ as
4008: $H(\pBar^\rho_{\rvx|\rvy})$. Obviously $H(\pBar^0_{\rvx|\rvy})=H(p_{\rvx|\rvy})$.\\
4009: The conditional entropy of $\rvx$ given $\svy$ for the $\rvx-\rvy$
4010: tilted distribution is
4011: $$ H(\pBar^\rho_{\rvx|\rvy=\svy})=-\sum_x
4012: \frac{C(x,y,\rho)}{D(y,\rho)}\log(\frac{C(x,y,\rho)}{D(y,\rho)})$$\\
4013:
4014: We introduce $ A(y,\rho)$, $ B(\rho)$, $ C(x, y,\rho)$, $ D(y,\rho)$
4015: to simplify the notations. Some of their properties are shown in
4016: Lemma~\ref{LEMMAAPP1_SI}.
4017:
4018:
4019: While tilted distributions are common optimal distributions in large
4020: deviation theory, it is useful to contemplate why we need to introduce
4021: these {\em two} tilted distributions. In the proof of Theorem
4022: \ref{THM:Universal_ML_SW}, through a Lagrange multiplier argument, we
4023: will show that $\{p^\rho_{\rvx\rvy}:\rho\in [-1,+\infty)\}$ is the
4024: family of distributions that minimize the Kullback$-$Leibler distance
4025: to $p_{\rvx\rvy}$ with fixed \textit{entropy} and
4026: $\{\pBar^\rho_{\rvx\rvy}:\rho\in [-1,+\infty)\}$ is the family of
4027: distributions that minimize the Kullback$-$Leibler distance to
4028: $p_{\rvx\rvy}$ with fixed \textit{conditional entropy}. Using a
4029: Lagrange multiplier argument, we parametrize the universal error
4030: exponent $E^{UN}_x(R_x,R_y,\gamma)$ in terms of $\rho$ and show the
4031: equivalence of the universal and maximum likelihood error exponents.
4032: %-------------------------END of Definitions ------------------------------------------
4033:
4034: Now we are ready to prove Theorem~\ref{THM:Universal_ML_SW}:
4035: $E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$.
4036:
4037:
4038: \pf
4039:
4040:
4041: \subsection{case 1: $\gamma
4042: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})< R^{(\gamma)} < \gamma
4043: H(\pBar^1_{\rvx|\rvy} )+(1-\gamma)H(p^1_{\rvx\rvy}
4044: )$.}\label{case:1}
4045:
4046: First, from Lemma~\ref{LEMMA_APP10} and Lemma~\ref{LEMMA_APP11}:
4047:
4048: $$\frac{\partial E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial
4049: \rho }=R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy}
4050: )-(1-\gamma)H(p^\rho_{\rvx\rvy})$$
4051:
4052: Then, using Lemma~\ref{LEMMAAPP2} and Lemma~\ref{LEMMAAPP2_SI}, we
4053: have:
4054:
4055: $$\frac{\partial^2 E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial
4056: \rho } \leq 0$$.
4057:
4058: So $\rho$ maximize $E^{ML}_x(R_x,R_y,\gamma,\rho)$, if and only
4059: if:
4060:
4061: \begin{eqnarray}
4062: 0=\frac{\partial E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial
4063: \rho}=R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy}
4064: )-(1-\gamma)H(p^\rho_{\rvx\rvy})
4065: \end{eqnarray}
4066:
4067: Because $R^{(\gamma)}$ is in the interval $[\gamma
4068: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}), \gamma
4069: H(\pBar^1_{\rvx|\rvy})+(1-\gamma)H(p^1_{\rvx\rvy})]$ and the
4070: entropy functions monotonically-increase over $\rho$,
4071: we can find $\rho^*\in (0,1)$, s.t.
4072: $$\gamma H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})=R^{(\gamma)}$$
4073:
4074: Using Lemma~\ref{LEMMA_APP8} and Lemma~\ref{LEMMA_APP9} we get:
4075: \begin{eqnarray}
4076: E^{ML}_x(R_x,R_y,\gamma)&=&\gamma
4077: D(\pBar^{\rho^*}_{\rvx\rvy}\|p_{\rvx\rvy})+(1-\gamma)
4078: D(p^{\rho^*}_{\rvx\rvy}\|p_{\rvx\rvy})\label{eqn:ML_error_expression}
4079: \end{eqnarray}
4080: Where $\gamma
4081: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})=R^{(\gamma)}$
4082: , $\rho^*$ is generally unique because
4083: both $H(\pBar^\rho_{\rvx|\rvy})$ and $H(p^\rho_{\rvx\rvy})$ are strictly increasing with
4084: $\rho$.\\
4085:
4086: Secondly
4087: \begin{eqnarray}\label{eqn:LEMMAAPPDC2_SW}
4088: & & E^{UN}_x(R_x,R_y,\gamma)\nonumber\\
4089: &=&\inf_{ q_{xy},o_{xy}} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max\{0,R^{(\gamma)}-\gamma H(q_{x|y}) -(1-\gamma)H(o_{xy})\}\}\nonumber\\
4090: &=& \inf_{b} \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\
4091: &=& \inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})}
4092: \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) +
4093: (1-\gamma)H(o_{xy})=b}\{\gamma
4094: D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\
4095: &&+\max(0,R^{(\gamma)}-b)\}\}\label{eqn:optimization_equality}
4096: \end{eqnarray}
4097: The last equality is true because, for $b< \gamma
4098: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})<R^{(\gamma)}$,
4099: \begin{eqnarray}
4100: &&\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\
4101: &\geq& 0 + R^{(\gamma)}-b \nonumber\\
4102: &=&\inf_{q_{xy},o_{xy}:H(q_{x|y})=H(p_{\rvx|\rvy}),H(o_{xy})=H(p_{\rvx\rvy})}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\
4103: &\geq&\inf_{q_{xy},o_{xy}:H(q_{x|y})=H(p_{\rvx|\rvy}),H(o_{xy})=H(p_{\rvx\rvy})}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\
4104: &&+\max(0,R^{(\gamma)}-\gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}))\}\}\nonumber\\
4105: &\geq&\inf_{q_{xy},o_{xy}: \gamma H(q_{x|y})+(1-\gamma)H(o_{xy}) =\gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}) }\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\
4106: &&+\max(0,R^{(\gamma)}-\gamma
4107: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}))\}\}\nonumber
4108: \end{eqnarray}
4109: Fixing $b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})$, the
4110: inner infimum in (\ref{eqn:optimization_equality}) is an
4111: optimization problem on $q_{xy}, o_{xy}$ with equality constraints
4112: $\sum_x\sum_y q_{xy}(x,y)=1$, $\sum_x\sum_y o_{xy}(x,y)=1$ and
4113: $\gamma H(q_{x|y})+(1-\gamma)H(o_{xy})=b$ and the obvious inequality
4114: constraints $ 0\leq q_{xy}(x,y)\leq 1, 0\leq o_{xy}(x,y)\leq 1,
4115: \forall x,y$. In the following formulation of the optimization
4116: problem, we relax one equality constraint to an inequality
4117: constraint $\gamma H(q_{x|y})+(1-\gamma)H(o_{xy})\geq b$ to make the
4118: optimization problem $convex$. It turns out later that the optimal
4119: solution to the relaxed problem is also the optimal solution to the
4120: original problem because $b\geq \gamma
4121: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}) $. The resulting
4122: optimization problem is:
4123: \begin{eqnarray}
4124: &&\inf_{q_{xy}, o_{xy}} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\} \nonumber\\
4125: &&\mbox{s.t.}\sum_x\sum_y q_{xy}(x,y)=1\nonumber\\
4126: &&\sum_x\sum_y o_{xy}(x,y)=1\nonumber\\
4127: && b- \gamma H(q_{x|y})-(1-\gamma)H(o_{xy})\leq 0 \nonumber\\
4128: && 0\leq q_{xy}(x,y)\leq 1, \ \ \forall (x,y)\in
4129: \mathcal{X}\times\mathcal{Y}\nonumber\\
4130: && 0\leq o_{xy}(x,y)\leq 1, \ \ \forall (x,y)\in
4131: \mathcal{X}\times\mathcal{Y}\label{eqn:convex_opt_setup}
4132: \end{eqnarray}
4133: The above optimization problem is {\em convex} because the objective
4134: function and the inequality constraint functions are convex and the
4135: equality constraint functions are affine\cite{Boyd2004}. The
4136: Lagrange multiplier function for this convex optimization problem is:
4137:
4138: \begin{eqnarray}
4139: &&L(q_{xy},o_{xy},\rho,\mu_1,\mu_2, {\nu}_1, {\nu}_2, {\nu}_3 , {\nu}_4)\nonumber\\
4140: &=& \gamma
4141: D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\
4142: %
4143: %
4144: &&+\mu_1(\sum_x\sum_y q_{xy}(x,y)-1) +\mu_2(\sum_x\sum_y
4145: o_{xy}(x,y)-1)\nonumber\\
4146: %
4147: %
4148: &&+\rho(b-\gamma H(q_{x|y})-(1-\gamma)H(o_{xy}))\nonumber\\
4149: %
4150: %
4151: &&+\sum_x\sum_y\big\{ {\nu}_1(x,y)(-q_{xy}(x,y))+
4152: {\nu}_2(x,y)(1-q_{xy}(x,y)) +{\nu}_3(x,y)(-o_{xy}(x,y))
4153: +{\nu}_4(x,y)(1-o_{xy}(x,y))\big\}\nonumber\\
4154: \end{eqnarray}
4155: Where $\rho,\mu_1,\mu_2$ are real numbers and ${\nu}_i\in R^{
4156: |\mathcal{X}||\mathcal{Y}|}$, $i=1,2,3,4$.
4157:
4158: According to the KKT conditions for convex
4159: optimization\cite{Boyd2004}, $q_{xy}, o_{xy}$ minimize the
4160: convex optimization problem in (\ref{eqn:convex_opt_setup}) if and
4161: only if the following conditions are simultaneously satisfied for
4162: some $q_{xy}$, $o_{xy}$, $\mu_1$, $\mu_2$, $\nu_1$, $\nu_2$,
4163: $\nu_3$, $\nu_4$ and $\rho$:
4164:
4165:
4166: \begin{eqnarray}
4167: 0&=&\frac{\partial L(q_{xy},o_{xy},\rho,\mu_1,\mu_2,{\nu}_1, {\nu}_2, {\nu}_3 , {\nu}_4)}{\partial q_{xy}(x,y)} \nonumber\\
4168: &=& \gamma[-\log (p_{\rvx\rvy}(x,y))+(1+\rho) (1+\log(q_{xy}(x,y)))+ \rho \log(\sum_{s}q_{xy}(s,y))] +\mu_1- \nu_1(x,y)- \nu_2(x,y)\nonumber\\
4169: %
4170: %
4171: %
4172: 0&=&\frac{\partial L(q_{xy},o_{xy},\rho,\mu_1,\mu_2,{\nu}_1, {\nu}_2, {\nu}_3 , {\nu}_4)}{\partial o_{xy}(x,y)} \nonumber\\
4173: &=& (1-\gamma) [-\log (p_{\rvx\rvy}(x,y))+(1+\rho)
4174: (1+\log(o_{xy}(x,y)))]+\mu_2- \nu_3(x,y)- \nu_4(x,y)
4175: \label{eqn:multiplier1}
4176: \end{eqnarray}
4177: For all $x$, $y$ and
4178: %
4179: %
4180: \begin{eqnarray}
4181: &&\sum_x\sum_y q_{xy}(x,y)=1\nonumber\\
4182: &&\sum_x\sum_y o_{xy}(x,y)=1\nonumber\\
4183: &&\rho( \gamma H(q_{x|y})+(1-\gamma)H(o_{xy})-b)=0\nonumber\\
4184: && \rho \geq 0\nonumber\\
4185: && \nu_1(x,y) (-q_{xy}(x,y))=0, \ \ \ \nu_2(x,y) (1-q_{xy}(x,y))=0\
4186: \ \ \forall x,y \nonumber\\
4187: && \nu_3(x,y) (-o_{xy}(x,y))=0, \ \ \ \nu_4(x,y) (1-o_{xy}(x,y))=0\
4188: \ \ \forall x,y \nonumber\\
4189: &&\nu_i(x,y)\geq 0, \ \ \ \forall x,y, i=1,2,3,4
4190: \label{eqn:multiplier2}
4191: \end{eqnarray}
4192:
4193: Solving the above standard Lagrange multiplier equations
4194: (\ref{eqn:multiplier1}) and (\ref{eqn:multiplier2}), we have:
4195:
4196: \begin{eqnarray}
4197: q_{xy}(x,y)&=&\frac{[\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho_b}}]^{1+\rho_b}}{\sum_t[\sum_s p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho_b}}]^{1+\rho_b}}\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho_b}}}{\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho_b}}} \nonumber\\
4198: &=& {\pBar^{\rho_b}_{\rvx\rvy}(x,y)}\nonumber\\
4199: o_{xy}(x,y)&=&\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho_b}}}{\sum_t\sum_s p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho_b}}} \nonumber\\
4200: &=& {p^{\rho_b}_{\rvx\rvy}(x,y)}\nonumber\\
4201: %
4202: %
4203: \nu_i(x,y)&=&0\ \ \ \forall x,y, i=1,2,3,4\nonumber\\
4204: \rho &=&\rho_b
4205: \end{eqnarray}
4206: Where $\rho_b$ satisfies the following condition $$\gamma
4207: H(\pBar^{\rho_b}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho_b}_{\rvx\rvy})=b
4208: \geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})$$
4209: and thus
4210: $\rho_b\geq 0$ because both $H(\pBar^{\rho}_{\rvx|\rvy})$ and
4211: $H(p^{\rho}_{\rvx\rvy})$ are monotonically increasing with $\rho$ as
4212: shown in Lemma~\ref{LEMMAAPP2} and Lemma~\ref{LEMMAAPP2_SI}.
4213:
4214: Notice that all the KKT conditions are simultaneously satisfied with
4215: the inequality constraint $\gamma H(q_{x|y})+(1-\gamma)H(o_{xy})\geq
4216: b$ being met with equality. Thus, the relaxed optimization problem has
4217: the same optimal solution as the original problem as promised. The
4218: optimal $q_{xy}$ and $o_{xy}$ are the $\rvx-\rvy$ tilted distribution
4219: $\pBar^{\rho_b}_{\rvx\rvy}$ and standard tilted distribution
4220: $p^{\rho_b}_{\rvx\rvy}$ of $p_{\rvx\rvy}$ with the same parameter
4221: $\rho_b\geq 0$. chosen s.t.
4222: $$\gamma H(\pBar^{\rho_b}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho_b}_{\rvx\rvy})=b$$
4223: Now we have :
4224: \begin{eqnarray}
4225: &&E^{UN}_x(R_x,R_y,\gamma)\nonumber\\
4226: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})} \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\
4227: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})}\{\gamma D(\pBar^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\nonumber\\
4228: &=& \min [\inf_{\rho\geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p_{\rvx\rvy_{\rho}}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})\},\nonumber\\
4229: &&\inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma
4230: H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma
4231: D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4232: )D(p_{\rvx\rvy_{\rho}}||p_{\rvx\rvy})\}]\label{eqn:big_two_conditions}
4233: \end{eqnarray}
4234: Notice that $H(p^\rho_{\rvx\rvy})$, $H(\pBar^\rho_{\rvx|\rvy})$,
4235: $D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})$ and
4236: $D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})$ are all strictly increasing with
4237: $\rho>0$ as shown in Lemma~\ref{LEMMAAPP2_SI},
4238: Lemma~\ref{LEMMAAPP3_SI}, Lemma~\ref{LEMMAAPP2} and
4239: Lemma~\ref{LEMMAAPP3} later in this appendix. We have:
4240: \begin{eqnarray}
4241: & & \inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma
4242: H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma
4243: D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4244: )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})\} \nonumber\\
4245: &=&\gamma
4246: D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4247: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\label{eqn:condition1}
4248: \end{eqnarray}
4249: where $R^{(\gamma)} =\gamma
4250: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})$.
4251: Applying the results in Lemma~\ref{LEMMAAPP4_SI} and
4252: Lemma~\ref{LEMMAAPP4}, we get:
4253: \begin{eqnarray}
4254: &&\inf_{\rho \geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})\}\nonumber\\
4255: &&=\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4256: )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy}) +R^{(\gamma)}-\gamma
4257: H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})|_{\rho=\rho^*}\nonumber\\
4258: &&=\gamma D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4259: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\label{eqn:condition2}
4260: \end{eqnarray} This is true because for $\rho : R^{(\gamma)} \geq
4261: \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})$,
4262: we know $\rho\leq 1$ because of the range of $R^{(\gamma)}$:
4263: $R^{(\gamma)} < \gamma H(\pBar^1_{\rvx|\rvy}
4264: )+(1-\gamma)H(p^1_{\rvx\rvy} )$. Substituting (\ref{eqn:condition1})
4265: and (\ref{eqn:condition2}) into (\ref{eqn:big_two_conditions}), we
4266: get
4267: \begin{eqnarray}
4268: E^{UN}_x(R_x,R_y,\gamma)&=&\gamma
4269: D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4270: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\nonumber\\
4271: && \mbox{where } \ \ R^{(\gamma)} =\gamma
4272: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})
4273: \end{eqnarray}
4274: So for $\gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})\leq
4275: R^{(\gamma)} \leq \gamma
4276: H(\pBar^1_{\rvx|\rvy})+(1-\gamma)H(p^1_{\rvx\rvy})$, from
4277: (\ref{eqn:ML_error_expression}) we have the desired property:
4278: $$E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$$
4279:
4280:
4281: \subsection{case 2: $ R^{(\gamma)} \geq \gamma
4282: H(\pBar^1_{\rvx|\rvy})+(1-\gamma)H(p^1_{\rvx\rvy})$.}\label{case:2}
4283:
4284:
4285: In this case, for all $0\leq \rho\leq 1$
4286: $$\frac{\partial E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial
4287: \rho }=R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy}
4288: )-(1-\gamma)H(p^\rho_{\rvx\rvy})\geq R^{(\gamma)}-\gamma
4289: H(\pBar^1_{\rvx|\rvy} )-(1-\gamma)H(p^1_{\rvx\rvy})\geq 0$$
4290:
4291: So $\rho$ takes value $1$ to maximize the error exponent
4292: $E^{ML}_x(R_x,R_y,\gamma,\rho)$, thus
4293: \begin{eqnarray}
4294: E^{ML}_x(R_x,R_y,\gamma)=R^{(\gamma)} -\gamma
4295: \log(\sum_{y}(\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{2}})^{2})-
4296: 2(1-\gamma)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{2}})
4297: \end{eqnarray}
4298:
4299: Using the same convex optimization techniques as case \ref{case:1}, we
4300: notice the fact that $\rho^*\geq 1$ for $R^{(\gamma)}
4301: =\gamma
4302: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})$.
4303: Then applying Lemma~\ref{LEMMAAPP4_SI} and Lemma~\ref{LEMMAAPP4}, we
4304: have:
4305: \begin{eqnarray}
4306: &&\inf_{\rho\geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p_{\rvx\rvy_\rho})\},\nonumber\\
4307: &&=\gamma D(\pBar^1_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4308: )D(p^1_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma
4309: H(\pBar^{1}_{\rvx|\rvy})-(1-\gamma)H(p^1_{\rvx\rvy})\nonumber
4310: \end{eqnarray}
4311: And
4312: %%
4313: %%
4314: \begin{eqnarray}
4315: &&\inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})\}]\nonumber\\
4316: &&=\gamma D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4317: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\nonumber\\
4318: &&=\gamma D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4319: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma
4320: H(\pBar^{\rho^*}_{\rvx|\rvy})-(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})\nonumber\\
4321: &&\leq \gamma D(\pBar^1_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4322: )D(p^1_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma
4323: H(\pBar^{1}_{\rvx|\rvy})-(1-\gamma)H(p^1_{\rvx\rvy})\nonumber
4324: \end{eqnarray}
4325:
4326: Finally:
4327: \begin{eqnarray}
4328: &&E^{UN}_x(R_x,R_y,\gamma)\nonumber\\
4329: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})} \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\
4330: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})}\{\gamma D(\pBar^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\nonumber\\
4331: &=&\min [\inf_{\rho\geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})\},\nonumber\\
4332: &&\inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})\}]\nonumber\\
4333: &=&\gamma D(\pBar^1_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma
4334: )D(p^1_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma
4335: H(\pBar^1_{\rvx|\rvy})-(1-\gamma)H(p^1_{\rvx\rvy})\nonumber\\
4336: &=&R^{(\gamma)} -\gamma \log(\sum_{y }(\sum_{x
4337: }p_{\rvx\rvy}(x,y)^{\frac{1}{2}})^{2})- 2(1-\gamma)\log(\sum_{y
4338: }\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{2}})
4339: \end{eqnarray}
4340: The last equality is true by setting $\rho =1$ in
4341: Lemma~\ref{LEMMA_APP8} and Lemma~\ref{LEMMA_APP9}.
4342:
4343:
4344: Again, $E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$, thus we finish the
4345: proof.\hfill$\blacksquare$\\
4346:
4347:
4348:
4349: %------------------------------------ Lemmas ------------------------------------------
4350: \subsection{Technical Lemmas}
4351: Some technical lemmas we used in the above proof of
4352: Theorem~\ref{THM:Universal_ML_SW} are now discussed:
4353:
4354: \begin{lemma}\label{LEMMAAPP2}
4355: $\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho}\geq0$
4356: \end{lemma}
4357: \pf From the definition of the tilted distribution we have the
4358: following observation:
4359:
4360: $\log(p^\rho_{\rvx\rvy}(x_1,y_1))-\log
4361: (p^\rho_{\rvx\rvy}(x_2,y_2))=\log(p_{\rvx\rvy}(x_1,y_1)^{\frac{1}{1+
4362: \rho}})-\log(p_{\rvx\rvy}(x_2,y_2)^{\frac{1}{1+
4363: \rho}})$\\ Using the above equality, we first derive the derivative
4364: of the tilted distribution, for all $x,y$
4365:
4366: \begin{eqnarray}
4367: \frac{\partial p^\rho_{\rvx\rvy}(x,y) }{\partial \rho}
4368: &=&\frac{-1}{(1+\rho)^2}
4369: \frac{ p_{\rvx\rvy}(x,y)^{\frac{1}{1+
4370: \rho}}\log(p_{\rvx\rvy}(x,y)) (\sum_t\sum_s
4371: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})}{(\sum_t\sum_s
4372: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})^2}
4373: \nonumber\\
4374: &&-\frac{-1}{(1+\rho)^2}
4375: \frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+
4376: \rho}} (\sum_t\sum_s
4377: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}
4378: \log(p_{\rvx\rvy}(s,t)))}{(\sum_t\sum_s
4379: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})^2}
4380: \nonumber\\
4381: &=&\frac{-1}{1+\rho}
4382: p^\rho_{\rvx\rvy}(x,y)[ \log(p_{\rvx\rvy}(x,y)^{\frac{1}{1+
4383: \rho}})-\sum_t\sum_s
4384: p^\rho_{\rvx\rvy}(s,t)\log(p_{\rvx\rvy}(s,t)^{\frac{1}{1+
4385: \rho}})]
4386: \nonumber\\
4387: &=&\frac{-1}{1+\rho}
4388: p^\rho_{\rvx\rvy}(x,y)[ \log(p^\rho_{\rvx\rvy}(x,y))-\sum_t\sum_s
4389: p^\rho_{\rvx\rvy}(s,t)\log(p^\rho_{\rvx\rvy}(s,t))]\nonumber\\
4390: &=&-\frac{p^\rho_{\rvx\rvy}(x,y)}{1+\rho}[\log(p^\rho_{\rvx\rvy}(x,y))+H(p^\rho_{\rvx\rvy})]
4391: \end{eqnarray}
4392: Then:
4393:
4394: \begin{eqnarray}
4395: \frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho}&=&-\frac{\partial \sum_{x,y} p^\rho_{\rvx\rvy}(x,y) \log( p^\rho_{\rvx\rvy}(x,y))}{\partial \rho}\nonumber\\
4396: &=&-\sum_{x,y} (1+\log(p^\rho_{\rvx\rvy}(x,y)))\frac{\partial p^\rho_{\rvx\rvy}(x,y)}{\partial \rho}\nonumber \\
4397: &=&\sum_{x,y} (1+\log(p^\rho_{\rvx\rvy}(x,y)))\frac{p^\rho_{\rvx\rvy}(x,y)}{1+\rho}(\log(p^\rho_{\rvx\rvy}(x,y))+H(p^\rho_{\rvx\rvy}))\nonumber\\
4398: &=&\frac{1}{1+\rho}\sum_{x,y} p^\rho_{\rvx\rvy}(x,y) \log(p^\rho_{\rvx\rvy}(x,y)) (\log(p^\rho_{\rvx\rvy}(x,y))+H(p^\rho_{\rvx\rvy}))\nonumber\\
4399: &=&\frac{1}{1+\rho}[\sum_{x,y}p^\rho_{\rvx\rvy}(x,y) (\log(p^\rho_{\rvx\rvy}(x,y)))^2-H(p^\rho_{\rvx\rvy})^2]\nonumber\\
4400: &=&\frac{1}{1+\rho}[\sum_{x,y} p^\rho_{\rvx\rvy}(x,y) (\log(p^\rho_{\rvx\rvy}(x,y)))^2\sum_{x,y} p^\rho_{\rvx\rvy}(x,y)-H(p^\rho_{\rvx\rvy})^2]\nonumber\\
4401: &\geq_{(a)}&\frac{1}{1+\rho}[(\sum_{x,y}p^\rho_{\rvx\rvy}(x,y) \log(p^\rho_{\rvx\rvy}(x,y)))^2-H(p^\rho_{\rvx\rvy})^2]\nonumber\\
4402: &=& 0
4403: \end{eqnarray}
4404: where (a) is true by the Cauchy-Schwartz inequality. \hfill$\blacksquare$\\
4405:
4406:
4407:
4408: \begin{lemma}\label{LEMMAAPP3}
4409: $\frac{\partial D(p^\rho_{\rvx\rvy}\|P)}{\partial
4410: \rho}=\rho\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho} $
4411: \end{lemma}
4412: \pf As shown in Lemma~\ref{LEMMA_APP8} and Lemma~\ref{LEMMA_APP10}
4413: respectively:
4414: $$D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})=\rho
4415: H(p^\rho_{\rvx\rvy})-(1+\rho) \log(\sum_{x,y
4416: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})$$
4417: $$ H(p^\rho_{\rvx\rvy})=\frac{\partial (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial \rho} $$
4418:
4419: We have:
4420: \begin{eqnarray}
4421: \frac{\partial D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})}{\partial \rho}&=&
4422: H(p^\rho_{\rvx\rvy}) +\rho\frac{\partial
4423: H(p^\rho_{\rvx\rvy})}{\partial \rho}-\frac{\partial (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial \rho}\nonumber\\
4424: &=& H(p^\rho_{\rvx\rvy}) +\rho\frac{\partial
4425: H(p^\rho_{\rvx\rvy})}{\partial \rho}-H(p^\rho_{\rvx\rvy}) \nonumber\\
4426: &=&\rho\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho}
4427: \end{eqnarray}
4428: \hfill$\blacksquare$\\
4429:
4430:
4431: \begin{lemma}\label{LEMMAAPP4}
4432: $sign\frac{\partial
4433: [D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(p^\rho_{\rvx\rvy})]}{\partial
4434: \rho}=sign(\rho-1)$.
4435:
4436: \end{lemma}
4437: \pf Combining the results of the previous two lemmas, we have:
4438: \begin{eqnarray}
4439: &&\frac{\partial
4440: D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(p^\rho_{\rvx\rvy})}{\partial
4441: \rho}=(\rho-1)\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial
4442: \rho}=sign(\rho-1)\nonumber
4443: \end{eqnarray} \hfill$\blacksquare$\\
4444:
4445:
4446: \begin{lemma}\label{LEMMAAPP1_SI} Properties of
4447: $\frac{\partial A(y,\rho)}{\partial \rho}$, $\frac{\partial
4448: B(\rho)}{\partial \rho}$, $\frac{\partial C(x, y,\rho)}{\partial
4449: \rho}$, $\frac{\partial D(y,\rho)}{\partial \rho}$ and
4450: $\frac{\partial H(\pBar^\rho_{\rvx|\rvy=\svy})}{\partial \rho}$
4451: \end{lemma}
4452:
4453: First,
4454: \begin{eqnarray}
4455: \frac{\partial C(x, y,\rho)}{\partial \rho}&=&\frac{\partial p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\partial \rho}= -\frac{1}{1+\rho}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}} \log(p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})\nonumber\\
4456: &=&-\frac{C(x,y,\rho)}{1+\rho}\log(C(x,y,\rho))\nonumber\\
4457: \frac{\partial D(y,\rho)}{\partial \rho}&=&\frac{\partial \sum_ s
4458: p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}}{\partial
4459: \rho}=-\frac{1}{1+\rho}\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}
4460: \log(p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}})\nonumber\\&=&-\frac{\sum_x
4461: C(x,y,\rho)\log(C(x,y,\rho))}{1+\rho}
4462: \end{eqnarray}
4463:
4464: For a differentiable function $f(\rho)$,
4465: $$\frac{\partial f(\rho)^{1+\rho}}{\partial \rho}=f(\rho)^{1+\rho}\log(f(\rho))+ (1+\rho)f(\rho)^\rho\frac{\partial f(\rho)}{\partial
4466: \rho}$$ So
4467: \begin{eqnarray}
4468: \frac{\partial A(y,\rho)}{\partial \rho}&=&\frac{\partial D(y,\rho)^{1+\rho}}{\partial \rho}= D(y,\rho)^{1+\rho}\log( D(y,\rho))+ (1+\rho) D(y,\rho)^\rho\frac{\partial D(y,\rho)}{\partial \rho}\nonumber\\
4469: &=&D(y,\rho)^{1+\rho}(\log(D(y,\rho))- \sum_x \frac{C(x,y,\rho)}{D(y,\rho)}\log(C(x,y,\rho)))\nonumber\\
4470: &=&D(y,\rho)^{1+\rho} (- \sum_x \frac{C(x,y,\rho)}{D(y,\rho)}\log(\frac{C(x,y,\rho)}{D(y,\rho))}))\nonumber\\
4471: &=& A(y,\rho) H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\
4472: \frac{\partial B(\rho)}{\partial \rho}&=&\sum_y \frac{\partial
4473: A(y,\rho)}{\partial \rho}=\sum_y
4474: A(y,\rho)H(\pBar^\rho_{\rvx|\rvy=\svy})=B(\rho)\sum_y
4475: \frac{A(y,\rho)}{B(\rho)}H(\pBar^\rho_{\rvx|\rvy=\svy})
4476: =B(\rho)H(\pBar^\rho_{\rvx|\rvy})\nonumber
4477: \end{eqnarray}
4478: And last:
4479:
4480: \begin{eqnarray}
4481: & & \frac{\partial H(\pBar^\rho_{\rvx|\rvy=\svy})}{\partial
4482: \rho} \nonumber\\
4483: &=&-\sum_x [\frac{\frac{\partial C(x,y,\rho)}{\partial \rho}}{D(y,\rho)}-\frac{C(x,y,\rho)\frac{\partial D(y,\rho)}{\partial \rho}}{D(y,\rho)^2}][1+\log(\frac{C(x,y,\rho)}{D(y,\rho)})]\nonumber\\
4484: &=&-\sum_x
4485: [\frac{-\frac{C(x,y,\rho)}{1+\rho}\log(C(x,y,\rho))}{D(y,\rho)}+\frac{C(x,y,\rho)\frac{\sum_s
4486: C(s,y,\rho)\log(C(s,y,\rho))}{1+\rho}}{D(y,\rho)^2}][1+\log(\frac{C(x,y,\rho)}{D(y,\rho)})]\nonumber\\
4487: &=&\frac{1}{1+\rho}\sum_x
4488: [\pBar^\rho_{\rvx|\rvy}(x,y)\log(C(x,y,\rho))-\pBar^\rho_{\rvx|\rvy}(x,y) \sum_s
4489: \pBar^\rho_{\rvx|\rvy}(s,y)\log(C(s,y,\rho))][1+\log(\pBar^\rho_{\rvx|\rvy}(x,y))]\nonumber\\
4490: &=&\frac{1}{1+\rho}\sum_x
4491: \pBar^\rho_{\rvx|\rvy}(x,y)[\log(\pBar^\rho_{\rvx|\rvy}(x,y))- \sum_s
4492: \pBar^\rho_{\rvx|\rvy}(s,y)\log(\pBar^\rho_{\rvx|\rvy}(s,y))][1+\log(\pBar^\rho_{\rvx|\rvy}(x,y))]\nonumber\\
4493: &=&\frac{1}{1+\rho}\sum_x
4494: \pBar^\rho_{\rvx|\rvy}(x,y)\log(\pBar^\rho_{\rvx|\rvy}(x,y))[\log(\pBar^\rho_{\rvx|\rvy}(x,y))- \sum_s
4495: \pBar^\rho_{\rvx|\rvy}(s,y)\log(\pBar^\rho_{\rvx|\rvy}(s,y))]\nonumber\\
4496: &=&\frac{1}{1+\rho}\sum_x
4497: \pBar^\rho_{\rvx|\rvy}(x,y)\log(\pBar^\rho_{\rvx|\rvy}(x,y))\log(\pBar^\rho_{\rvx|\rvy}(x,y)) -\frac{1}{1+
4498: \rho} [\sum_x
4499: \pBar^\rho_{\rvx|\rvy}(x,y)\log(\pBar^\rho_{\rvx|\rvy}(x,y))]^2\nonumber\\
4500: &\geq& 0
4501: \end{eqnarray}
4502: The inequality is true by the Cauchy-Schwartz inequality and by
4503: noticing that $\sum_x \pBar^\rho_{\rvx|\rvy}(x,y)=1$. \hfill$\blacksquare$
4504:
4505: These properties will again be used in the proofs in the following
4506: lemmas.
4507:
4508: \begin{lemma}\label{LEMMAAPP2_SI}
4509: $\frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}\geq0$
4510: \end{lemma}
4511: \pf
4512: \begin{eqnarray}
4513: \frac{\partial \frac{A(y,\rho)}{B(\rho)}}{\partial \rho}&=&\frac{1}{B(\rho)^2}(\frac{\partial A(y,\rho)}{\partial \rho}B(\rho)-\frac{\partial B(\rho)}{\partial \rho}A(y, \rho))\nonumber\\
4514: &=&\frac{1}{B(\rho)^2}( A(y,\rho)H(\pBar^\rho_{\rvx|\rvy=\svy})B(\rho)- H(\pBar^\rho_{\rvx|\rvy})B(\rho)A(y,\rho))\nonumber\\
4515: &=&\frac{A(y,\rho)}{B(\rho)}( H(\pBar^\rho_{\rvx|\rvy=\svy})-
4516: H(\pBar^\rho_{\rvx|\rvy}))\nonumber
4517: \end{eqnarray}
4518: Now,
4519:
4520:
4521: \begin{eqnarray}
4522: \frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial\rho}&=& \frac{\partial}{\partial\rho}\sum_y\frac{A(y,\rho)}{B(\rho)}\sum_x \frac{C(x,y,\rho)}{D(y,\rho)}[-\log(\frac{C(x,y,\rho)}{D(y,\rho)})] \nonumber\\
4523: &=& \frac{\partial}{\partial\rho}\sum_y\frac{A(y,\rho)}{B(\rho)}H(\pBar^\rho_{\rvx|\rvy=\svy}) \nonumber\\
4524: &=& \sum_y\frac{A(y,\rho)}{B(\rho)}\frac{\partial H(\pBar^\rho_{\rvx|\rvy=\svy})}{\partial\rho} + \sum_y \frac{\partial \frac{A(y,\rho)}{B(\rho)}}{\partial\rho}H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\
4525: &\geq& \sum_y \frac{\partial \frac{A(y,\rho)}{B(\rho)}}{\partial\rho}H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\
4526: &=& \sum_y \frac{A(y,\rho)}{B(\rho)}( H(\pBar^\rho_{\rvx|\rvy=\svy})-H(\pBar^\rho_{\rvx|\rvy})) H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\
4527: &=& \sum_y \frac{A(y,\rho)}{B(\rho)} H(\pBar^\rho_{\rvx|\rvy=\svy})^2-H(\pBar^\rho_{\rvx|\rvy}) ^2\nonumber\\
4528: &=& (\sum_y \frac{A(y,\rho)}{B(\rho)} H(\pBar^\rho_{\rvx|\rvy=\svy})^2)(\sum_y \frac{A(y,\rho)}{B(\rho)})-H(\pBar^\rho_{\rvx|\rvy}) ^2\nonumber\\
4529: &\geq_{(a)}& (\sum_y \frac{A(y,\rho)}{B(\rho)} H(\pBar^\rho_{\rvx|\rvy=\svy}))^2-H(\pBar^\rho_{\rvx|\rvy}) ^2\nonumber\\
4530: &=&0
4531: \end{eqnarray}
4532: where (a) is again true by the Cauchy-Schwartz inequality. \hfill$\blacksquare$
4533:
4534:
4535:
4536: \begin{lemma}\label{LEMMAAPP3_SI}
4537: $\frac{\partial D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})}{\partial
4538: \rho}=\rho \frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}$
4539: \end{lemma}
4540: \pf As shown in Lemma~\ref{LEMMA_APP9} and Lemma~\ref{LEMMA_APP11}
4541: respectively:
4542: $$D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})=\rho H(\pBar^\rho_{\rvx|\rvy})- \log(\sum_{y
4543: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})$$
4544: $$ H(\pBar^\rho_{\rvx|\rvy})=\frac{\partial \log(\sum_{y
4545: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial
4546: \rho} $$
4547:
4548: We have:
4549: \begin{eqnarray}
4550: \frac{\partial D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})}{\partial
4551: \rho}&=& H(\pBar^\rho_{\rvx|\rvy}) +\rho\frac{\partial
4552: H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}-\frac{\partial
4553: \log(\sum_{y
4554: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial \rho }\nonumber\\
4555: &=& H(\pBar^\rho_{\rvx|\rvy}) +\rho\frac{\partial
4556: H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}-H(\pBar^\rho_{\rvx|\rvy}) \nonumber\\
4557: &=&\rho\frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}
4558: \end{eqnarray} \hfill$\blacksquare$
4559:
4560:
4561: \begin{lemma}\label{LEMMAAPP4_SI}
4562: $sign\frac{\partial
4563: [D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(\pBar^\rho_{\rvx|\rvy})]}{\partial
4564: \rho}=sign(\rho-1)$.
4565: \end{lemma}
4566: \pf Using the previous lemma, we get:
4567: \begin{eqnarray}
4568: &&\frac{\partial
4569: D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(\pBar^\rho_{\rvx|\rvy})}{\partial
4570: \rho}=(\rho-1)\frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial
4571: \rho}\nonumber
4572: \end{eqnarray}
4573: Then by Lemma~\ref{LEMMAAPP2_SI}, we get the
4574: conclusion.\hfill$\blacksquare$\\
4575:
4576:
4577:
4578:
4579:
4580:
4581:
4582: \begin{lemma}\label{LEMMA_APP8}
4583: $$\rho H(p^\rho_{\rvx\rvy})-(1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})=D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})$$
4584: \end{lemma}
4585: \pf By noticing that
4586: $\log(p_{\rvx\rvy}(x,y))=(1+\rho)[\log(p^\rho_{\rvx\rvy}(x,y))+\log(\sum_{s,t}
4587: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})]$. We have:
4588: \begin{eqnarray}
4589: D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})&=&-H(p^\rho_{\rvx\rvy})-\sum_{x,y}p^\rho_{\rvx\rvy}(x,y)\log(p_{\rvx\rvy}(x,y))\nonumber\\
4590: &=&-H(p^\rho_{\rvx\rvy})-\sum_{x,y}p^\rho_{\rvx\rvy}(x,y)(1+\rho)[\log(p^\rho_{\rvx\rvy}(x,y))+\log(\sum_{s,t}
4591: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})]\nonumber\\
4592: &=&-H(p^\rho_{\rvx\rvy})+(1+\rho)H(p^\rho_{\rvx\rvy})-(1+\rho)\sum_{x,y}p^\rho_{\rvx\rvy}(x,y)\log(\sum_{s,t}
4593: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})\nonumber\\
4594: &=&\rho
4595: H(p^\rho_{\rvx\rvy})-(1+\rho)\log(\sum_{s,t}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})\end{eqnarray}
4596:
4597:
4598: \hfill$\blacksquare$
4599:
4600:
4601:
4602: \begin{lemma}\label{LEMMA_APP9}
4603: $$\rho H(\pBar^\rho_{\rvx|\rvy})- \log(\sum_{y
4604: }(\sum_{x
4605: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})=D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})$$
4606: \end{lemma}
4607: \pf
4608: \begin{eqnarray}
4609: D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})&=&\sum_y\sum_x \frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}\log(\frac{\frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}}{p_{\rvx\rvy}(x,y)})\nonumber\\
4610: &=& \sum_y\sum_x \frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}[\log(\frac{A(y,\rho)}{B(\rho)})+\log(\frac{C(x,y,\rho)}{D(y,\rho)})-\log(p_{\rvx\rvy}(x,y))]\nonumber\\
4611: &=& -\log(B(\rho)) - H(\pBar^\rho_{\rvx|\rvy}) + \sum_y\sum_x \frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}[\log(D(y,\rho)^{1+\rho})-\log(C(x,y,\rho)^{1+\rho})]\nonumber\\
4612: &=& -\log(B(\rho)) - H(\pBar^\rho_{\rvx|\rvy}) +(1+\rho) H(\pBar^\rho_{\rvx|\rvy})\nonumber\\
4613: &=& - \log(\sum_{y }(\sum_{x
4614: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho}) + \rho
4615: H(\pBar^\rho_{\rvx|\rvy}) \nonumber
4616: \end{eqnarray}
4617: \hfill$\blacksquare$
4618:
4619: \begin{lemma}\label{LEMMA_APP10}
4620: $$ H(p^\rho_{\rvx\rvy})=\frac{\partial (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial \rho} $$
4621: \end{lemma}
4622: \pf
4623:
4624: \begin{eqnarray}
4625: & & \frac{\partial
4626: (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial
4627: \rho} \nonumber\\
4628: &=&\log(\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})-
4629: \sum_{y}\sum_{x}\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}}\log(p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})\nonumber\\
4630: &=& -
4631: \sum_{y}\sum_{x}\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}}\log(\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}})\nonumber\nonumber\\
4632: &=&H(p^\rho_{\rvx\rvy})
4633: \end{eqnarray}
4634:
4635:
4636:
4637:
4638: \hfill$\blacksquare$
4639: \begin{lemma}\label{LEMMA_APP11}
4640: $$ H(\pBar^\rho_{\rvx|\rvy})=\frac{\partial \log(\sum_{y
4641: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial
4642: \rho} $$\\
4643: \end{lemma}
4644: \pf Notice that $B(\rho)=\sum_{y }(\sum_{x
4645: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho}$, and $
4646: \frac{\partial B(\rho)}{\partial \rho}
4647: =B(\rho)H(\pBar^\rho_{\rvx|\rvy})$ as shown in
4648: Lemma~\ref{LEMMAAPP1_SI}. It is clear that:
4649:
4650: \begin{eqnarray}
4651: \frac{\partial \log(\sum_{y }(\sum_{x
4652: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial
4653: \rho}&=&\frac{\partial \log(B(\rho))}{\partial \rho}\nonumber\\
4654: &=& \frac{1}{B(\rho)}\frac{\partial B(\rho)}{\partial
4655: \rho}\nonumber\\
4656: &=&H(\pBar^\rho_{\rvx|\rvy})
4657: \end{eqnarray}
4658: \hfill$\blacksquare$
4659:
4660: %------------------------- End of Lemmas ------------------------------------------
4661:
4662: \bibliographystyle{IEEEtran}
4663: \bibliography{IEEEabrv,references}
4664:
4665: \end{document}
4666:
4667: % LocalWords: Lossless IEEE Symp Sahai Cheng Anant iid Slepian
4668: % LocalWords: lossless Slepian encodings cardinalities iid memoryless Lempel
4669: % LocalWords: atypicality blocklengths tradeoffs Ziv convolutional multi
4670: % LocalWords: Bolded Subsequences subsequences subsequence entropies
4671: % LocalWords: lclcl jR iid convolutional Slepian xy lll
4672: % LocalWords: Slepian BSC xy iff
4673: % LocalWords: Chernoff iid exponentials
4674: % LocalWords: Slepian Gallager's
4675: % LocalWords: achievability exponentials
4676: % LocalWords: memoryless Chernoff
4677: % LocalWords: Slepian multiuser
4678: % LocalWords: indices infimum lllll xy memoryless exponentials
4679: % LocalWords: entropies subsequences memoryless
4680: % LocalWords: Zixiang Xiong ITW ITR
4681: % LocalWords: xy Kullback Leibler