0610:cs0610144/all.tex

1: \documentclass[10pt, letterpaper, onecolumn, peerreview]{IEEEtran}

2: \usepackage{epsfig}

3: \usepackage{amssymb}

4: \usepackage[tbtags]{amsmath}

5: \usepackage{graphics,eepic,epic}

6: \usepackage{latexsym}

7: \usepackage{euscript}

8: %\usepackage{styles/preprint}

9: \usepackage{graphics,eepic,epic,psfrag}

10:

11:

12: % GWW DEFINITIONS AND ABBREVIATIONS

13:

14: % TeX Defs

15:

16: \usepackage[tbtags]{amsmath} % defines many math commands and

17: 			     % subequations environment, etc

18: \usepackage{amssymb}  % get, among others, blackboard bold fonts

19:                       % defines extra symbols like \gtreqless, etc

20: \usepackage{verbatim} % get comment environment, + new verbatim

21: % \usepackage{amsxtra}  % get, eg, \accentedsymbol

22:

23: \DeclareMathOperator*{\argmax}{arg\,max}

24: \DeclareMathOperator*{\argmin}{arg\,min}

25: \DeclareMathOperator*{\argsup}{arg\,sup}

26: \DeclareMathOperator*{\arginf}{arg\,inf}

27: \DeclareMathOperator{\erfc}{erfc}

28: \DeclareMathOperator{\diag}{diag}

29: \DeclareMathOperator{\cum}{cum}

30: \DeclareMathOperator{\sgn}{sgn}

31: \DeclareMathOperator{\tr}{tr}

32: \DeclareMathOperator{\spn}{span}

33: \DeclareMathOperator{\adj}{adj}

34: \DeclareMathOperator{\var}{var}

35: \DeclareMathOperator{\cov}{cov}

36: \DeclareMathOperator{\sech}{sech}

37: \DeclareMathOperator{\sinc}{sinc}

38: \DeclareMathOperator*{\lms}{l.i.m.\,}

39: \newcommand{\varop}[1]{\var\left[{#1}\right]}

40: \newcommand{\covop}[2]{\cov\left({#1},{#2}\right)}

41:

42: \newcommand{\p}{\partial}

43:

44: % LIST ENVIRONMENTS

45:

46: \newcounter{actr}

47: \newenvironment{alist}%

48: {\begin{list}{(\alph{actr})}{\usecounter{actr}}}{\end{list}}

49:

50: \newcounter{ictr}

51: \newenvironment{ilist}%

52: {\begin{list}{(\roman{ictr})}{\usecounter{ictr}}}{\end{list}}

53:

54: \iffalse

55:

56: % SPACING ENVIRONMENTS

57:

58: \newenvironment{singlespace}%

59: {\begin{spacing}{1}}{\end{spacing}}

60:

61: \newenvironment{onehalfspace}% for 11pt font

62: {\begin{spacing}{1.21}}{\end{spacing}}

63:

64: \newenvironment{doublespace}% for 11pt font

65: {\begin{spacing}{1.62}}{\end{spacing}}

66:

67: \fi

68:

69: % THEOREM ENVIRONMENTS

70:

71: \newtheorem{thm}{Theorem}

72: \newtheorem{lemma}{Lemma}

73: \newtheorem{claim}{Claim}

74: \newtheorem{corol}{Corollary}

75: \newtheorem{prop}{Proposition}

76: \newtheorem{conj}{Conjecture}

77: \newtheorem{defn}{Definition}

78: %\newenvironment{proof}%

79: %{\noindent{\em Proof: } \begin{singlespace} \small \noindent}%

80: %{\noindent\qed \end{singlespace}}

81: %\newenvironment{new-proof}[1]%

82: %{{\em Proof of #1: } \begin{singlespace} \small \noindent}%

83: %{\ \noindent\qed \end{singlespace}}

84:

85: \iffalse

86: \newenvironment{proof}%

87: {\noindent{\em Proof: } \small \noindent}%

88: {\noindent\qed }

89: \newenvironment{new-proof}[1]%

90: {{\em Proof of #1: } \small \noindent}%

91: {\ \noindent\qed }

92: \fi

93:

94: \newcommand{\abs}[1]{\left|#1\right|}

95: %\newcommand{\comb}[2]{{#1\choose#2}}

96: \newcommand{\comb}[2]{\binom{#1}{#2}}

97: \newcommand{\ie}{i.e.}

98: \newcommand{\eg}{e.g.}

99: \newcommand{\etc}{etc.}

100: \newcommand{\viz}{viz.}

101: \newcommand{\etal}{et al.}

102: \newcommand{\cf}{cf.}

103:

104: \newcommand{\vect}[3]{\begin{bmatrix} #1 & #2 & \cdots & #3 \end{bmatrix}^\T}

105:

106: \newcommand{\dsp}{.5\baselineskip}             % double space amount

107: \newcommand{\down}{\vspace{\dsp}}              % double space command

108: \newcommand{\ddown}{\vspace{\baselineskip}}    % quadruple space command

109: \newcommand{\spec}{\hspace*{1pt}}              % little bit of space

110: \newcommand{\ds}{\displaystyle}                % abbreviation

111: \newcommand{\ts}{\textstyle}                % abbreviation

112: \newcommand{\nin}{\noindent}                   % noindent abbreviation

113: \newcommand{\cvar}[1]{\mathrm{var_{#1}\,}}

114: \newcommand{\qed}{\rule[0.1ex]{1.4ex}{1.6ex}}

115: \newcommand{\mycap}[2]{\caption{\sl #2 \label{#1}}}

116: \newcommand{\subcap}[1]{{\begin{center}\sl #1\end{center}}}

117: \newcommand{\ditem}[1]{\item[#1 \hspace*{\fill}]}

118: \newcommand{\appfig}{\vspace*{1in}\begin{center} Figure appended to

119:                        end of manuscript. \end{center} \vspace*{1in}}

120: \newcommand{\psx}[1]{\centerline{\epsfxsize=6in \epsfbox{#1}}}

121: \newcommand{\psy}[1]{\centerline{\epsfysize=7in \epsfbox{#1}}}

122: \newcommand{\psxs}[2]{\centerline{\epsfxsize=#1in \epsfbox{#2}}}

123: \newcommand{\psxsbb}[3]{\centerline{\epsfxsize=#1in \epsfbox[#3]{#2}}}

124: \newcommand{\psys}[2]{\centerline{\epsfysize=#1in \epsfbox{#2}}}

125: \newcommand{\convsamp}[3]{\left.\left\{#1 \ast #2\right\}\right|_{#3}}

126: \newcommand{\gap}{\qquad}

127: \newcommand{\order}[1]{\mathcal{O}\left(#1\right)}

128: \newcommand{\arror}[3]{\begin{cases} #1 & #2 \\

129:                                      #3 & \text{otherwise} \end{cases}}

130: \newcommand{\arrorc}[3]{\begin{cases} #1 & #2 \\

131:                                      #3 & \text{otherwise,} \end{cases}}

132: \newcommand{\arrorp}[3]{\begin{cases} #1 & #2 \\

133:                                      #3 & \text{otherwise.} \end{cases}}

134: \newcommand{\darror}[4]{\begin{cases} #1 & #2 \\ #3 & #4 \end{cases}}

135: % \newcommand{\defeq}{\stackrel{\triangle}{=}}

136: \newcommand{\defeq}{\stackrel{\Delta}{=}}

137: \newcommand{\msconv}{\stackrel{\mathrm{m.s.}}{\longrightarrow}}

138: \newcommand{\pwaeconv}{\stackrel{\mathrm{p.w.a.e.}}{\longrightarrow}}

139: \newcommand{\peq}{\stackrel{\mathcal{P}}{=}}

140: % \newcommand{\glt}{ \begin{array}{c} \Hh=H_1 \\

141: %  \renewcommand{\arraystretch}{.3}

142: %  \begin{array}{c} > \\ < \end{array}

143: %  \renewcommand{\arraystretch}{1} \\ \Hh=H_0 \end{array}}

144:

145: \hyphenation{or-tho-nor-mal}

146: \hyphenation{wave-let wave-lets}

147:

148: \newcommand{\crb}{Cram\'{e}r-Rao}  % obsolete

149: \newcommand{\CR}{Cram\'{e}r-Rao}

150: \newcommand{\KL}{Karhunen-Lo\`{e}ve}

151: \newcommand{\sE}{\sqrt{E_0}}

152: \newcommand{\pe}{\Pr(\eps)}

153: \newcommand{\jw}{j\w}

154: \newcommand{\ejw}{e^{j\w}}

155: \newcommand{\ejv}{e^{j\nu}}

156: \newcommand{\wo}{{\w_0}}

157: \newcommand{\woh}{{\wh_0}}

158: \newcommand{\sumi}[1]{\sum_{#1=-\infty}^{+\infty}}

159: \newcommand{\inti}{\int_{-\infty}^{+\infty}}

160: \newcommand{\intp}{\int_{-\pi}^{\pi}}

161: \newcommand{\nintp}{\frac{1}{2\pi}\int_{-\pi}^{\pi}}

162: \newcommand{\inth}{\int_{0}^{\infty}}

163: \newcommand{\E}[1]{E\left[{#1}\right]}

164: \newcommand{\bigE}[1]{E\bigl[{#1}\bigr]}

165: \newcommand{\BigE}[1]{E\Bigl[{#1}\Bigr]}

166: \newcommand{\biggE}[1]{E\biggl[{#1}\biggr]}

167: \newcommand{\BiggE}[1]{E\Biggl[{#1}\Biggr]}

168: \newcommand{\Prob}[1]{\Pr\left[{#1}\right]}

169: \newcommand{\Pu}[1]{\Pr\left[{#1}\right]} % obsolete; same as \Prob now

170: \newcommand{\Pc}[2]{\Pr\left[{#1}\mid{#2}\right]}  % obsolete

171: \newcommand{\Pcb}[2]{\Pr\left[{#1}\Bigm|{#2}\right]} % obsolete

172: \newcommand{\Q}[1]{\mathcal{Q}\left({#1}\right)}

173: \newcommand{\FT}[1]{\mathcal{F}\left\{{#1}\right\}}

174: \newcommand{\LT}[1]{\mathcal{L}\left\{{#1}\right\}}

175: \newcommand{\ZT}[1]{\mathcal{Z}\left\{{#1}\right\}}

176: %\newcommand{\reals}{\mathbf{R}}

177: \newcommand{\reals}{\mathbb{R}}

178: %\newcommand{\ints}{\mathbf{Z}}

179: \newcommand{\ints}{\mathbb{Z}}

180: \newcommand{\compls}{\mathbb{C}}

181: \newcommand{\nats}{\mathbb{N}}

182: \newcommand{\rats}{\mathbb{Q}}

183: \newcommand{\ltwor}{L^2(\reals)}

184: \newcommand{\ltwoz}{\ell^2(\ints)}

185: \newcommand{\ltwow}{L^2(\Omega)}

186: % \newcommand{\ltwo}{\mathbf{L}^2}

187: % \newcommand{\ltwor}{\mathbf{L}^2 (\reals)}

188: % \newcommand{\ltwoz}{\mathbf{l}^2 (\ints)}

189: \newcommand{\sys}[1]{\mathcal{S}\left\{#1\right\}}

190: \newcommand{\nn}{\nonumber}

191:

192: \newcommand{\ip}[2]{\left\langle{#1},{#2}\right\rangle}

193: \newcommand{\di}[2]{d\left({#1},{#2}\right)}

194: \newcommand{\ceil}[1]{\lceil{#1}\rceil}

195: \newcommand{\floor}[1]{\lfloor{#1}\rfloor}

196: \newcommand{\phase}{\measuredangle}

197:

198: \newcommand{\Ht}{\mathrm{H}}

199: \newcommand{\T}{{\mathrm{T}}}

200: % \newcommand{\R}{\Re\mathit{e}}

201: % \newcommand{\I}{\Im\mathit{m}}

202: \DeclareMathOperator{\R}{Re}

203: \DeclareMathOperator{\I}{Im}

204:

205:

206:

207:

208: % ABBREVIATIONS FOR CHARACTERS IN VARIOUS FONTS

209:

210: % STANDARD CHARACTERS

211:

212: \newcommand{\ba}{{\mathbf{a}}}

213: \newcommand{\bah}{{\hat{\ba}}}

214: \newcommand{\ah}{{\hat{a}}}

215: \newcommand{\Ah}{{\hat{A}}}

216: \newcommand{\cA}{{\mathcal{A}}}

217: \newcommand{\at}{{\tilde{a}}}

218: \newcommand{\bat}{{\tilde{\ba}}}

219: \newcommand{\At}{{\tilde{A}}}

220: \newcommand{\bA}{{\mathbf{A}}}

221: \newcommand{\ac}{a^{\ast}}

222:

223: \newcommand{\bb}{{\mathbf{b}}}

224: \newcommand{\bbt}{{\tilde{\bb}}}

225: \newcommand{\cB}{{\mathcal{B}}}

226: \newcommand{\tb}{{\tilde{b}}}

227: \newcommand{\tB}{{\tilde{B}}}

228: \newcommand{\hb}{{\hat{b}}}

229: \newcommand{\hB}{{\hat{B}}}

230: \newcommand{\bB}{{\mathbf{B}}}

231:

232: \newcommand{\bc}{{\mathbf{c}}}

233: \newcommand{\bch}{{\hat{\mathbf{c}}}}

234: \newcommand{\bC}{{\mathbf{C}}}

235: \newcommand{\cC}{{\mathcal{C}}}

236: \newcommand{\ct}{{\tilde{c}}}

237: \newcommand{\Ct}{{\tilde{C}}}

238: \newcommand{\ctc}{\ct^{\ast}}

239:

240: \newcommand{\bd}{{\mathbf{d}}}

241: \newcommand{\bD}{{\mathbf{D}}}

242: \newcommand{\cD}{{\mathcal{D}}}

243: \newcommand{\hd}{{\hat{d}}}  % old: \dh

244: \newcommand{\dt}{{\tilde{d}}}

245: \newcommand{\bdt}{{\tilde{\bd}}}

246: \newcommand{\Dt}{{\tilde{D}}}

247: \newcommand{\dtc}{\dt^{\ast}}

248:

249: \newcommand{\et}{{\tilde{e}}}

250: \newcommand{\bfe}{{\mathbf{e}}}

251: \newcommand{\bE}{{\mathbf{E}}}

252: \newcommand{\cE}{{\mathcal{E}}}

253: \newcommand{\cEt}{{\tilde{\cE}}}

254: \newcommand{\cEb}{{\bar{\cE}}}

255: \newcommand{\bcE}{{\mathbf{\cE}}}  % bf cal E doesn't exist

256:

257: \newcommand{\bff}{{{\mathbf{f}}}}

258: \newcommand{\bF}{{\mathbf{F}}}

259: \newcommand{\cF}{{\mathcal{F}}}

260: \newcommand{\ft}{{\tilde{f}}}

261: \newcommand{\Ft}{{\tilde{F}}}

262: \newcommand{\Fh}{{\hat{F}}}

263: \newcommand{\ftc}{\ft^{\ast}}

264: \newcommand{\bft}{{\tilde{\bff}}}

265: \newcommand{\bFt}{{\tilde{\bF}}}

266: \newcommand{\fh}{{\hat{f}}}

267:

268: \newcommand{\bg}{{\mathbf{g}}}

269: \newcommand{\gt}{{\tilde{g}}}

270: \newcommand{\bgt}{{\tilde{\bg}}}

271: \newcommand{\bG}{{\mathbf{G}}}

272: \newcommand{\cG}{{\mathcal{G}}}

273: \newcommand{\Gt}{{\tilde{\bG}}}

274:

275: \newcommand{\hti}{{\tilde{h}}}

276: \newcommand{\Hti}{{\tilde{H}}}

277: \newcommand{\bh}{{\mathbf{h}}}

278: \newcommand{\bht}{{\tilde{\bh}}}

279: \newcommand{\Hh}{{\hat{H}}}

280: \newcommand{\bH}{{\mathbf{H}}}

281: \newcommand{\bHh}{{\hat{\mathbf{H}}}}

282:

283: \newcommand{\ih}{{\hat{\imath}}}

284: \newcommand{\bI}{{\mathbf{I}}}

285: \newcommand{\cI}{{\mathcal{I}}}

286:

287: \newcommand{\jh}{{\hat{\jmath}}}

288: \newcommand{\bJ}{{\mathbf{J}}}

289: \newcommand{\cJ}{{\mathcal{J}}}

290: \newcommand{\Jt}{{\tilde{J}}}

291:

292: \newcommand{\bk}{{\mathbf{k}}}

293: \newcommand{\bK}{{\mathbf{K}}}

294: \newcommand{\Kt}{{\tilde{K}}}

295: \newcommand{\Kh}{{\hat{K}}}

296: \newcommand{\cK}{{\mathcal{K}}}

297:

298: \newcommand{\cl}{\ell}

299: \newcommand{\bL}{{\mathbf{L}}}

300: \newcommand{\cL}{{\mathcal{L}}}

301:

302: \newcommand{\mb}{{\mathbf{m}}}

303: \newcommand{\mh}{{\hat{m}}}

304: \newcommand{\bM}{{\mathbf{M}}}

305: \newcommand{\cM}{{\mathcal{M}}}

306:

307:

308: \newcommand{\cN}{{\mathcal{N}}}

309: \newcommand{\Nt}{{\tilde{N}}}

310: \newcommand{\tN}{{\tilde{N}}}  % backward compatibility

311:

312: \newcommand{\bo}{{\mathbf{o}}}

313: \newcommand{\cO}{{\mathcal{O}}}

314:

315: \newcommand{\bp}{{\mathbf{p}}}

316: \newcommand{\bP}{{\mathbf{P}}}

317: \newcommand{\cP}{{\mathcal{P}}}

318: \newcommand{\ph}{{\hat{p}}}

319: \newcommand{\Ph}{{\hat{P}}}

320:

321: \newcommand{\bq}{{\mathbf{q}}}

322: \newcommand{\cQ}{{\mathcal{Q}}}

323: \newcommand{\bQ}{{\mathbf{Q}}}

324:

325: \newcommand{\br}{{\mathbf{r}}}

326: \newcommand{\bR}{{\mathbf{R}}}

327: \newcommand{\cR}{{\mathcal{R}}}

328: \newcommand{\Rt}{{\tilde{R}}}

329:

330: \newcommand{\sh}{{\hat{s}}}

331: \newcommand{\sck}{{\check{s}}}

332: \newcommand{\shh}{{\Hat{\Hat{s}}}}

333: \newcommand{\bs}{{\mathbf{s}}}

334: \newcommand{\bsh}{{\hat{\mathbf{s}}}}

335: \newcommand{\bsc}{{\check{\mathbf{s}}}}

336: \newcommand{\bshh}{{\Hat{\Hat{\mathbf{s}}}}}

337: \newcommand{\bS}{{\mathbf{S}}}

338: \newcommand{\cS}{{\mathcal{S}}}

339: \newcommand{\st}{{\tilde{s}}}

340:

341: \newcommand{\bT}{{\mathbf{T}}}

342: \newcommand{\cT}{{\mathcal{T}}}

343:

344: \newcommand{\bu}{{\mathbf{u}}}

345: \newcommand{\bU}{{\mathbf{U}}}

346: \newcommand{\bUt}{{\tilde{\bU}}}

347: \newcommand{\ut}{{\tilde{u}}}

348: \newcommand{\cU}{{\mathcal{U}}}

349:

350: \newcommand{\vh}{{\hat{v}}}

351: \newcommand{\bv}{{\mathbf{v}}}

352: \newcommand{\bV}{{\mathbf{V}}}

353: \newcommand{\cV}{{\mathcal{V}}}

354:

355: \newcommand{\bw}{{\mathbf{w}}}

356: \newcommand{\bW}{{\mathbf{W}}}

357: \newcommand{\cW}{{\mathcal{W}}}

358:

359: \newcommand{\bx}{{\mathbf{x}}}

360: \newcommand{\bxt}{{\tilde{\bx}}}

361: \newcommand{\xt}{{\tilde{x}}}

362: \newcommand{\Xt}{{\tilde{X}}}

363: \newcommand{\bX}{{\mathbf{X}}}

364: \newcommand{\cX}{{\mathcal{X}}}

365: \newcommand{\bXt}{{\tilde{\bX}}}

366: \newcommand{\xh}{{\hat{x}}}

367: \newcommand{\xc}{{\check{x}}}

368: \newcommand{\xhh}{{\Hat{\Hat{x}}}}

369: \newcommand{\bxh}{{\hat{\bx}}}

370: \newcommand{\bxc}{{\check{\bx}}}

371: \newcommand{\bxhh}{{\Hat{\hat{\bx}}}}

372:

373: \newcommand{\cY}{{\mathcal{Y}}}

374: \newcommand{\by}{{\mathbf{y}}}

375: \newcommand{\byt}{{\tilde{\by}}}

376: \newcommand{\bY}{{\mathbf{Y}}}

377: \newcommand{\Yt}{{\tilde{Y}}}

378: \newcommand{\yt}{{\tilde{y}}}

379: \newcommand{\yh}{{\hat{y}}}

380:

381: \newcommand{\zt}{{\tilde{z}}}

382: \newcommand{\zh}{{\hat{z}}}

383: \newcommand{\bz}{{\mathbf{z}}}

384: \newcommand{\bZ}{{\mathbf{Z}}}

385: \newcommand{\cZ}{{\mathcal{Z}}}

386:

387: % GREEK CHARACTERS

388:

389: \newcommand{\al}{\alpha}

390: \newcommand{\bal}{{\boldsymbol{\al}}}

391: \newcommand{\balh}{{\hat{\boldsymbol{\al}}}}

392: \newcommand{\alh}{{\hat{\al}}}

393:

394: \newcommand{\bt}{\beta}

395: \newcommand{\btt}{{\tilde{\bt}}}

396: \newcommand{\btht}{{\hat{\bt}}}

397:

398: \newcommand{\g}{\gamma}

399: \newcommand{\G}{\Gamma}

400: \newcommand{\bGa}{{\boldsymbol{\Gamma}}}

401: \newcommand{\gh}{{\hat{\g}}}

402:

403: \newcommand{\de}{\delta}

404: \newcommand{\De}{\Delta}

405: \newcommand{\Deh}{{\hat{\Delta}}}

406: \newcommand{\bde}{{\boldsymbol{\de}}}

407: \newcommand{\bDe}{{\boldsymbol{\De}}}

408:

409: \newcommand{\e}{\epsilon}

410: \newcommand{\eps}{\varepsilon}

411:

412: \newcommand{\etah}{{\hat{\eta}}}

413: \newcommand{\bpi}{{\boldsymbol{\pi}}}

414:

415: \newcommand{\pht}{{\tilde{\phi}}}

416: \newcommand{\Pht}{{\tilde{\Phi}}}

417:

418: \newcommand{\pst}{{\tilde{\psi}}}

419: \newcommand{\Pst}{{\tilde{\Psi}}}

420:

421: \newcommand{\s}{\sigma}

422: \newcommand{\sih}{\hat{\sigma}}

423:

424: \newcommand{\z}{\zeta}

425: \newcommand{\ztt}{{\tilde{\z}}}

426: \newcommand{\ztb}{{\bar{\z}}}

427:

428: % \newcommand{\th}{\theta} % symbol name used by other latex package

429: \newcommand{\thh}{{\hat{\theta}}}

430: \newcommand{\Thh}{{\hat{\Theta}}}

431: \newcommand{\Th}{\Theta}

432: \newcommand{\bth}{{\boldsymbol{\theta}}}

433: \newcommand{\bTh}{{\boldsymbol{\Theta}}}

434: \newcommand{\bThh}{{\hat{\bTh}}}

435: \newcommand{\Tht}{{\tilde{\Theta}}}

436:

437: \newcommand{\la}{\lambda}

438: %\newcommand{\La}{\Lambda}

439: \newcommand{\lam}{\lambda}  % backward compatibility

440: \newcommand{\Lam}{\Lambda}  % backward compatibility

441: \newcommand{\bLa}{{\boldsymbol{\La}}}

442: \newcommand{\lah}{{\hat{\lam}}}

443:

444: \newcommand{\bmu}{{\boldsymbol{\mu}}}

445:

446: \newcommand{\bXi}{{\boldsymbol{\Xi}}}

447:

448: \newcommand{\bPi}{{\boldsymbol{\Pi}}}

449:

450: \newcommand{\rht}{{\tilde{\rho}}}

451: \newcommand{\rhc}{{\check{\rho}}}

452:

453: \newcommand{\bSi}{{\boldsymbol{\Sigma}}}

454:

455: \newcommand{\ups}{\upsilon}

456: \newcommand{\Ups}{\Upsilon}

457: \newcommand{\bUp}{{\boldsymbol{\Ups}}}

458:

459: \newcommand{\bPs}{{\boldsymbol{\Psi}}}

460:

461: \newcommand{\w}{\omega}

462: \newcommand{\wh}{{\hat{\omega}}}

463: \newcommand{\W}{\Omega}

464:

465:

466: \newcounter{psctr}

467: \newcounter{probctr}[psctr]

468: %\renewcommand{\theprobctr}{\arabic{psctr}.\arabic{probctr}}

469: \newcommand{\problem}[1]{%

470: \addtocounter{probctr}{1}

471: \vspace{.15in}

472:

473: \noindent\textbf{Problem \thepsctr.\theprobctr}\nopagebreak

474:

475: \noindent{#1}

476:

477: }

478: \newcommand{\extraproblem}[1]{%

479: \addtocounter{probctr}{1}

480: \vspace{.15in}

481:

482: \noindent\textbf{Problem \thepsctr.\theprobctr\ (practice)}\nopagebreak

483:

484: \noindent{#1}

485:

486: }

487:

488: \DeclareMathAlphabet{\mathbsf}{OT1}{cmss}{bx}{n}% bold sans serif

489: \DeclareMathAlphabet{\mathssf}{OT1}{cmss}{m}{sl}% slanted sans serif

490:

491: % define some useful uppercase Greek letters in regular and bold sf

492: \DeclareSymbolFont{bsfletters}{OT1}{cmss}{bx}{n}

493: \DeclareSymbolFont{ssfletters}{OT1}{cmss}{m}{n}

494: \DeclareMathSymbol{\bsfGamma}{0}{bsfletters}{'000}

495: \DeclareMathSymbol{\ssfGamma}{0}{ssfletters}{'000}

496: \DeclareMathSymbol{\bsfDelta}{0}{bsfletters}{'001}

497: \DeclareMathSymbol{\ssfDelta}{0}{ssfletters}{'001}

498: \DeclareMathSymbol{\bsfTheta}{0}{bsfletters}{'002}

499: \DeclareMathSymbol{\ssfTheta}{0}{ssfletters}{'002}

500: \DeclareMathSymbol{\bsfLambda}{0}{bsfletters}{'003}

501: \DeclareMathSymbol{\ssfLambda}{0}{ssfletters}{'003}

502: \DeclareMathSymbol{\bsfXi}{0}{bsfletters}{'004}

503: \DeclareMathSymbol{\ssfXi}{0}{ssfletters}{'004}

504: \DeclareMathSymbol{\bsfPi}{0}{bsfletters}{'005}

505: \DeclareMathSymbol{\ssfPi}{0}{ssfletters}{'005}

506: \DeclareMathSymbol{\bsfSigma}{0}{bsfletters}{'006}

507: \DeclareMathSymbol{\ssfSigma}{0}{ssfletters}{'006}

508: \DeclareMathSymbol{\bsfUpsilon}{0}{bsfletters}{'007}

509: \DeclareMathSymbol{\ssfUpsilon}{0}{ssfletters}{'007}

510: \DeclareMathSymbol{\bsfPhi}{0}{bsfletters}{'010}

511: \DeclareMathSymbol{\ssfPhi}{0}{ssfletters}{'010}

512: \DeclareMathSymbol{\bsfPsi}{0}{bsfletters}{'011}

513: \DeclareMathSymbol{\ssfPsi}{0}{ssfletters}{'011}

514: \DeclareMathSymbol{\bsfOmega}{0}{bsfletters}{'012}

515: \DeclareMathSymbol{\ssfOmega}{0}{ssfletters}{'012}

516:

517: \newcommand{\fxfm}{\stackrel{\mathcal{F}}{\longleftrightarrow}}

518: \newcommand{\lxfm}{\stackrel{\mathcal{L}}{\longleftrightarrow}}

519: \newcommand{\zxfm}{\stackrel{\mathcal{Z}}{\longleftrightarrow}}

520:

521: \DeclareMathOperator*{\gltop}{\gtreqless}

522: \newcommand{\glt}{\;\gltop^{\Hh=\svH_1}_{\Hh=\svH_0}\;}

523: \newcommand{\glty}{\;\gltop^{\Hh(\svy)=\svH_1}_{\Hh(\svy)=\svH_0}\;}

524: \newcommand{\gltby}{\;\gltop^{\Hh(\svby)=\svH_1}_{\Hh(\svby)=\svH_0}\;}

525: \DeclareMathOperator*{\geltop}{\genfrac{}{}{0pt}{}{\ge}{<}}

526: \newcommand{\gelty}{\;\geltop^{\Hh(\svy)=\svH_1}_{\Hh(\svy)=\svH_0}\;}

527: \newcommand{\geltby}{\;\geltop^{\Hh(\svby)=\svH_1}_{\Hh(\svby)=\svH_0}\;}

528: \renewcommand{\pe}{\Pr(e)}

529: \renewcommand{\defeq}{\triangleq}

530: \newcommand{\like}{\svlike}

531: \newcommand{\rvlike}{\mathssf{L}}

532: \newcommand{\sst}{\cl}

533: \newcommand{\svlike}{L}

534: \newcommand{\llike}{\rvllike}

535: \newcommand{\rvllike}{\cl}

536: \newcommand{\svllike}{l}

537: \newcommand{\bllike}{\rvbllike}

538: \newcommand{\rvbllike}{\boldsymbol{\cl}}

539: \newcommand{\svbllike}{\mathbf{l}}

540: \newcommand{\Qb}{\overline{Q}}

541: \renewcommand{\comb}[2]{\binom{#1}{#2}}

542:

543:

544: %% Random/sample variable/vector declarations.  Please add in alphabetical

545: %% order.  First section is for capitals.  Second for lower case.

546: % Capitals

547: \newcommand{\rvA}{{\mathssf{A}}}	% A

548: \newcommand{\svA}{A}

549: \newcommand{\rvbA}{{\mathbsf{A}}}

550: \newcommand{\svbA}{{\mathbf{A}}}

551: \newcommand{\rvC}{{\mathssf{C}}}	% A

552: \newcommand{\svC}{C}

553: \newcommand{\rvbC}{{\mathbsf{C}}}

554: \newcommand{\svbC}{{\mathbf{C}}}

555: \newcommand{\rvD}{{\mathssf{D}}}	% D

556: \newcommand{\svD}{D}

557: \newcommand{\rvbD}{{\mathbsf{D}}}

558: \newcommand{\svbD}{{\mathbf{D}}}

559: \newcommand{\rvFh}{{\hat{\mathssf{F}}}}	% F

560: \newcommand{\rvF}{{\mathssf{F}}}

561: \newcommand{\rvHh}{{\hat{\mathssf{H}}}}	% H

562: \newcommand{\rvH}{{\mathssf{H}}}

563: \newcommand{\svH}{H}

564: \newcommand{\rvI}{{\mathssf{I}}}        % I

565: \newcommand{\svHh}{{\hat{\svH}}}

566: \newcommand{\rvL}{{\mathssf{L}}}	% L

567: \newcommand{\svL}{L}

568: \newcommand{\rvN}{{\mathssf{N}}}	% N

569: \newcommand{\rvP}{{\mathssf{P}}}	% P

570: \newcommand{\rvR}{{\mathssf{R}}}	% R

571: \newcommand{\rvRh}{{\hat{\rvR}}}

572: \newcommand{\rvS}{{\mathssf{S}}}	% S

573: \newcommand{\rvSh}{{\hat{\rvS}}}

574: \newcommand{\rvW}{{\mathssf{W}}}	% W

575: \newcommand{\rvX}{{\mathssf{X}}}  	% X, random variable

576: \newcommand{\svX}{X}

577: \newcommand{\rvXt}{{\tilde{\rvX}}}

578: \newcommand{\rvY}{{\mathssf{Y}}}	% Y

579: \newcommand{\rvZ}{{\mathssf{Z}}}	% Z

580:

581: \newcommand{\rva}{{\mathssf{a}}}	% a

582: \newcommand{\rvah}{{\hat{\rva}}}

583: \newcommand{\sva}{a}

584: \newcommand{\svah}{{\hat{\sva}}}

585: \newcommand{\rvba}{{\mathbsf{a}}}

586: \newcommand{\svba}{{\mathbf{a}}}

587: \newcommand{\rvb}{{\mathssf{b}}}	% b

588: \newcommand{\rvc}{{\mathssf{c}}}	% c

589: \newcommand{\rvch}{{\hat{\rvc}}}

590: \newcommand{\svc}{c}

591: \newcommand{\svch}{{\hat{\svc}}}

592: \newcommand{\rvbc}{{\mathbsf{c}}}

593: \newcommand{\svbc}{{\mathbf{c}}}

594:

595: \newcommand{\rvd}{{\mathssf{d}}}	% d

596: \newcommand{\rvdh}{{\hat{\rvd}}}

597: \newcommand{\svd}{d}

598: \newcommand{\svdh}{{\hat{\svd}}}

599: \newcommand{\rvbd}{{\mathbsf{d}}}

600: \newcommand{\svbd}{{\mathbf{d}}}

601:

602:

603:

604: \newcommand{\rve}{{\mathssf{e}}}	% e

605: \newcommand{\sve}{e}

606: \newcommand{\rvbe}{{\mathbsf{e}}}

607: \newcommand{\svbe}{{\mathbf{e}}}

608: \newcommand{\rvf}{{\mathssf{f}}}	% f

609: \newcommand{\svf}{f}

610: \newcommand{\rvbf}{{\mathbsf{f}}}

611: \newcommand{\svbf}{{\mathbf{f}}}

612: \newcommand{\rvg}{{\mathssf{g}}}	% g

613: \newcommand{\svg}{g}

614: \newcommand{\rvbg}{{\mathbsf{g}}}

615: \newcommand{\svbg}{{\mathbf{g}}}

616: \newcommand{\rvh}{{\mathssf{h}}}	% h

617: \newcommand{\svh}{h}

618: \newcommand{\rvbh}{{\mathbsf{h}}}

619: \newcommand{\svbh}{{\mathbf{h}}}

620: \newcommand{\rvk}{{\mathssf{k}}}	% k

621: \newcommand{\svk}{k}

622: \newcommand{\rvm}{{\mathssf{m}}}	% m

623: \newcommand{\svm}{m}

624: \newcommand{\rvbm}{{\mathbsf{m}}}

625: \newcommand{\svbm}{{\mathbf{m}}}

626: \newcommand{\rvn}{{\mathssf{n}}}	% n

627: \newcommand{\svn}{n}

628: \newcommand{\rvbn}{{\mathbsf{n}}}

629: \newcommand{\svbn}{{\mathbf{n}}}

630: \newcommand{\rvp}{{\mathssf{p}}}	% p

631: \newcommand{\svp}{p}

632: \newcommand{\rvq}{{\mathssf{q}}}	% q

633: \newcommand{\svq}{q}

634: \newcommand{\rvr}{{\mathssf{r}}}	% r

635: \newcommand{\rvbr}{{\mathbsf{r}}}

636: \newcommand{\svr}{r}

637: \newcommand{\rvs}{{\mathssf{s}}}	% s

638: \newcommand{\rvbs}{{\mathbsf{s}}}

639: \newcommand{\svs}{s}

640: \newcommand{\svbs}{{\mathbf{s}}}

641: \newcommand{\rvt}{{\mathssf{t}}}	% t

642: \newcommand{\rvbt}{{\mathbsf{t}}}

643: \newcommand{\svt}{t}

644: \newcommand{\svbt}{{\mathbf{t}}}

645: \newcommand{\rvu}{{\mathssf{u}}}	% u

646: \newcommand{\svu}{u}

647: \newcommand{\svuh}{{\hat{\svu}}}

648: \newcommand{\rvbu}{{\mathbsf{u}}}

649: \newcommand{\svbu}{{\mathbf{u}}}

650: \newcommand{\rvv}{{\mathssf{v}}}	% v

651: \newcommand{\svv}{v}

652: \newcommand{\svvh}{{\hat{\svv}}}

653: \newcommand{\rvbv}{{\mathbsf{v}}}

654: \newcommand{\svbv}{{\mathbf{v}}}

655: \newcommand{\rvvh}{{\hat{\rvv}}}

656: \newcommand{\rvw}{{\mathssf{w}}}	% w

657: \newcommand{\svw}{w}

658: \newcommand{\rvwh}{{\hat{\rvw}}}

659: \newcommand{\svwh}{{\hat{\svw}}}

660: \newcommand{\rvbw}{{\mathbsf{w}}}

661: \newcommand{\svbw}{{\mathbf{w}}}

662: \newcommand{\rvx}{{\mathssf{x}}}	% x, random variable

663: \newcommand{\rvxh}{{\hat{\rvx}}}

664: \newcommand{\rvxt}{{\tilde{\rvx}}}

665: \newcommand{\svx}{x}			% sample value

666: \newcommand{\svxh}{{\hat{\svx}}}

667: \newcommand{\svxt}{{\tilde{\svx}}}

668: \newcommand{\rvbx}{{\mathbsf{x}}}

669: \newcommand{\rvbxh}{{\hat{\rvbx}}}

670: \newcommand{\rvbxt}{{\tilde{\rvbx}}}

671: \newcommand{\svbx}{{\mathbf{\svx}}}

672: \newcommand{\svbxt}{{\tilde{\svbx}}}

673: \newcommand{\svbxh}{{\hat{\mathbf{x}}}}

674: \newcommand{\rvy}{{\mathssf{y}}}	% y

675: \newcommand{\rvyh}{{\hat{\mathssf{y}}}}

676: \newcommand{\svy}{y}

677: \newcommand{\rvyt}{{\tilde{\rvy}}}

678: \newcommand{\svyt}{{\tilde{\svy}}}

679: \newcommand{\svyh}{{\hat{\svy}}}

680: \newcommand{\rvby}{{\mathbsf{y}}}

681: \newcommand{\rvbyt}{{\tilde{\rvby}}}

682: \newcommand{\svby}{{\mathbf{y}}}

683: \newcommand{\svbyt}{{\tilde{\svby}}}

684: \newcommand{\rvz}{{\mathssf{z}}}	% z

685: \newcommand{\rvzh}{{\hat{\rvz}}}

686: \newcommand{\rvzt}{{\tilde{\rvz}}}

687: \newcommand{\svz}{z}

688: \newcommand{\svzh}{{\hat{\svz}}}

689: \newcommand{\rvbz}{{\mathbsf{z}}}

690: \newcommand{\svbz}{{\mathbf{z}}}

691:

692: % Handle uppercase Greek differently

693: \newcommand{\rvTh}{\ssfTheta}

694: \newcommand{\svTh}{\Theta}

695: \newcommand{\rvbTh}{\bsfTheta}

696: \newcommand{\svbTh}{\boldsymbol{\Theta}}

697: \newcommand{\rvPh}{\ssfPhi}

698: \newcommand{\svPh}{\Phi}

699: \newcommand{\rvbPh}{\bsfPhi}

700: \newcommand{\svbPh}{\boldsymbol{\Phi}}

701:

702: \newcommand{\ddx}{\frac{\p}{\p \svx}}

703: \newcommand{\ddbx}{\frac{\p}{\p\svbx}}

704:

705: \usepackage{fullpage}

706:

707:

708: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

709: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

710: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

711: %

712: %   NEW COMMANDS

713: %

714: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

715: %

716: % RANDOM VARIABLES AND VECTORS

717: %

718: % Random variable: \rvx

719: % Random vector (assume length-n): \rvbx

720: % Sample variable: \svx

721: % Sample vector: \svbx

722: %

723: % Tilde (e.g., \svbxtil), Bar, and BBar

724: % used to denote other sequences, \svbx and \svby

725: % reserved for actual source sequences

726: %

727: % Hat used for estimate

728: %

729:

730: \newcommand{\rvxtil}{\tilde{\rvx}}

731: \newcommand{\svxtil}{\tilde{\svx}}

732: \newcommand{\rvxhat}{\hat{\rvx}}

733: \newcommand{\svxhat}{\hat{\svx}}

734: \newcommand{\rvxBar}{\bar{\rvx}}

735: \newcommand{\svxBar}{\bar{\svx}}

736: \newcommand{\rvxBBar}{\bar{\bar{\rvx}}}

737: \newcommand{\svxBBar}{\bar{\bar{\svx}}}

738:

739: \newcommand{\rvbxtil}{\tilde{\rvbx}}

740: \newcommand{\svbxtil}{\tilde{\svbx}}

741: \newcommand{\rvbxhat}{\hat{\rvbx}}

742: \newcommand{\svbxhat}{\hat{\svbx}}

743: \newcommand{\rvbxBar}{\bar{\rvbx}}

744: \newcommand{\svbxBar}{\bar{\svbx}}

745: \newcommand{\rvbxBBar}{\bar{\bar{\rvbx}}}

746: \newcommand{\svbxBBar}{\bar{\bar{\svbx}}}

747:

748: \newcommand{\rvytil}{\tilde{\rvy}}

749: \newcommand{\svytil}{\tilde{\svy}}

750: \newcommand{\rvyhat}{\hat{\rvy}}

751: \newcommand{\svyhat}{\hat{\svy}}

752: \newcommand{\rvyBar}{\bar{\rvy}}

753: \newcommand{\svyBar}{\bar{\svy}}

754: \newcommand{\rvyBBar}{\bar{\bar{\rvy}}}

755: \newcommand{\svyBBar}{\bar{\bar{\svy}}}

756:

757: \newcommand{\rvbytil}{\tilde{\rvby}}

758: \newcommand{\svbytil}{\tilde{\svby}}

759: \newcommand{\rvbyhat}{\hat{\rvby}}

760: \newcommand{\svbyhat}{\hat{\svby}}

761: \newcommand{\rvbyBar}{\bar{\rvby}}

762: \newcommand{\svbyBar}{\bar{\svby}}

763: \newcommand{\rvbyBBar}{\bar{\bar{\rvby}}}

764: \newcommand{\svbyBBar}{\bar{\bar{\svby}}}

765:

766: % The sample variable that occurs if the

767: % suffixes are not a-typical

768: \newcommand{\sveBar}{\bar{\sve}}

769:

770: % A random constant

771: \newcommand{\rvK}{{\mathssf{K}}}

772: \newcommand{\svK}{{K}}

773:

774:

775: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

776: %

777: % ACTUAL SOURCE DISTRIBUTIONS (NOT NECESSARILY TYPES)

778: %

779: \newcommand{\PxyRV}{p_{\rvx \rvy}}

780: \newcommand{\PxCondyRV}{p_{\rvx|\rvy}}

781: \newcommand{\PyCondxRV}{p_{\rvy|\rvx}}

782: \newcommand{\PxRV}{p_{\rvx}}

783: \newcommand{\PyRV}{p_{\rvy}}

784:

785: \newcommand{\pxySV}{p_{\svx \svy}}

786: \newcommand{\pxCondySV}{p_{\svx | \svy}}

787:

788: \newcommand{\qxySV}{q_{\svx \svy}}

789: \newcommand{\qxyCondSV}{q_{\svx | \svy}}

790: \newcommand{\qyxCondSV}{q_{\svy | \svx}}

791: \newcommand{\qySV}{q_{\svy}}

792:

793: \newcommand{\qxyBarSV}{\bar{q}_{\svx \svy}}

794: \newcommand{\qxyBarCondSV}{\bar{q}_{\svx | \svy}}

795: \newcommand{\qyxBarCondSV}{\bar{q}_{\svy | \svx}}

796: \newcommand{\qyBarSV}{\bar{q}_{\svy}}

797: \newcommand{\qxSV}{q_{\svx}}

798: \newcommand{\qxBarSV}{\bar{q}_{\svx}}

799:

800: \newcommand{\PSVast}{p^{\ast}}

801: \newcommand{\PxSVast}{p_{\svx}^{\ast}}

802: \newcommand{\PySVast}{p_{\svy}^{\ast}}

803: \newcommand{\PxySVast}{p_{\svx \svy}^{\ast}}

804: \newcommand{\PxySVastast}{p_{\svx \svy}^{\ast \ast}}

805: \newcommand{\PxCondySVast}{p_{\svx|\svy}^{\ast}}

806: \newcommand{\PyCondxSVastast}{p_{\svy|\svx}^{\ast \ast}}

807:

808: % a conditional probability that comes up often

809: \newcommand{\srcCond}{\; p_{\rvbx|\rve_1}(\svbx|\sveBar)}

810: \newcommand{\srcProb}{\; p_{\rvbx}(\svbx)}

811:

812: \newcommand{\src}[2]{\; p_{\rvx_{#1}^{#2}}(\svx_{#1}^{#2})}

813:

814:

815: \newcommand{\srcL}{\; p_{\rvx^l}(\svx^l)}

816: \newcommand{\srcKL}{\; p_{\rvx_{l+1}^k}(\svx_{l+1}^k)}

817: \newcommand{\srcNK}{\; p_{\rvx_{k+1}^n}(\svx_{k+1}^n)}

818: \newcommand{\srcNL}{\; p_{\rvx_{l+1}^n}(\svx_{l+1}^n)}

819:

820: \newcommand{\srcYKL}{\; p_{\rvy_{l+1}^k}(\svy_{l+1}^k)}

821: \newcommand{\srcYNK}{\; p_{\rvy_{k+1}^n}(\svy_{k+1}^n)}

822:

823: % a conditional probability that comes up often

824: \newcommand{\jointSrcCond}{\; p_{\rvbx,\rvby|\rve_1}(\svbx,\svby|\sveBar)}

825: \newcommand{\jointSrcProb}{\; p_{\rvbx,\rvby}(\svbx,\svby)}

826: \newcommand{\jointSrcL}{\; p_{\rvx^l,\rvy^l}(\svx^l,\svy^l)}

827: \newcommand{\jointSrcKL}{\; p_{\rvx_{l+1}^k,\rvy_{l+1}^k}

828:     (\svx_{l+1}^k,\svy_{l+1}^k)}

829: \newcommand{\jointSrcNK}{\; p_{\rvx_{k+1}^n,\rvy_{k+1}^n}

830:     (\svx_{k+1}^n,\svy_{k+1}^n)}

831:

832: \newcommand{\condSrcNL}{\; p_{\rvx_{l+1}^n | \rvy_{l+1}^n}

833:     (\svx_{l+1}^n|\svy_{l+1}^n)}

834:

835: \newcommand{\jointSource}[4]{\; p_{\rvx_{#1}^{#2}, \rvy_{#3}^{#4}}

836:     (\svx_{#1}^{#2}, \svy_{#3}^{#4})}

837: \newcommand{\jointsource}[4]{\; p_{\rvx_{#1}^{#2}, \rvy_{#3}^{#4}}}

838:

839: %%%%%%%%%%%%%%%%%%%%%%%%%%%%

840: %

841: % TYPE DEFINITIONS

842: %

843: % Type class

844: \newcommand{\tclass}{\mathcal{T}}

845:

846: %

847: % Always use capitals to denote types

848: %

849: % Use these to denote types of length-n

850: %

851: \newcommand{\PxySV}{P_{\svx \svy}}

852: \newcommand{\PxCondySV}{P_{\svx|\svy}}

853: \newcommand{\PyCondxSV}{P_{\svy|\svx}}

854: \newcommand{\PxSV}{P_{\svx}}

855: \newcommand{\PySV}{P_{\svy}}

856:

857: \newcommand{\Px}{\PxSV}

858: %

859: % Use these to indicate the (joint, conditional) type

860: % of some subsequence, which one should be understood

861: % from the context

862: %

863: \newcommand{\PxySVNL}{P_{\svx \svy}^{n-l}}

864: \newcommand{\PxCondySVNL}{P_{\svx|\svy}^{n-l}}

865: \newcommand{\PyCondxSVNL}{P_{\svy|\svx}^{n-l}}

866: \newcommand{\PxSVNL}{P_{\svx}^{n-l}}

867: \newcommand{\PySVNL}{P_{\svy}^{n-l}}

868:

869: \newcommand{\Ptil}{\tilde{P}}

870: \newcommand{\Wtil}{\tilde{W}}

871:

872: \newcommand{\PL}{P^{l}}

873: \newcommand{\PKL}{P^{k-l}}

874: \newcommand{\PNL}{P^{n-l}}

875: \newcommand{\PNK}{P^{n-k}}

876: \newcommand{\VL}{V^{l}}

877: \newcommand{\VKL}{V^{k-l}}

878: \newcommand{\VNL}{V^{n-l}}

879: \newcommand{\VNK}{V^{n-k}}

880:

881: %

882: %  Type definitions for the suffixes of possibly

883: %  misleading sequences

884: %

885: \newcommand{\PtilL}{\tilde{P}^{l}}

886: \newcommand{\PtilKL}{\tilde{P}^{k-l}}

887: \newcommand{\PtilNL}{\tilde{P}^{n-l}}

888: \newcommand{\PtilNK}{\tilde{P}^{n-k}}

889: \newcommand{\VtilL}{\tilde{V}^{l}}

890: \newcommand{\VtilNL}{\tilde{V}^{n-l}}

891: \newcommand{\VtilNK}{\tilde{V}^{n-k}}

892: \newcommand{\VtilKL}{\tilde{V}^{k-l}}

893:

894: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

895: %

896: % ASSORTED OTHER USEFUL MACROS

897: %

898: %

899: % Decoding delay

900: \newcommand{\delay}{\Delta}

901:

902: % Error exponents

903: \newcommand{\expML}{E_{ML}}

904: \newcommand{\expUniv}{E_{UN}}

905: \newcommand{\expMLSI}{E_{ML, SI}}

906: \newcommand{\expUnivSI}{E_{UN, SI}}

907:

908: % Prefix function

909: \newcommand{\prefix}{\pi}

910:

911: % Binning functions

912: \newcommand{\bin}{\cB}

913: \newcommand{\binX}{\cB_x}

914: \newcommand{\binY}{\cB_y}

915:

916: % Rates

917: \newcommand{\Rx}{R_x}

918: \newcommand{\Ry}{R_y}

919: \newcommand{\Rent}{\Rx}

920:

921: % shorthand for a weighted-averaging function

922: \newcommand{\avg}{\mbox{avg}}

923:

924: % Set of suffix-typical sequences

925: \newcommand{\typSet}{\cA_{n, \Delta}}

926:

927: % Scoring functions

928: \newcommand{\score}{S}

929: \newcommand{\scoreXYpre}{\score(\svbx, \svby| \svbxtil, \svbytil)}

930: \newcommand{\scoreXYtilpre}{\score(\svbxtil, \svbytil|\svbx, \svby)}

931: \newcommand{\scorePV}{\score(\PNL, \PNK, \VNL)}

932: \newcommand{\scorePVtil}{\score(\PtilNL, \PtilNK, \VtilNL)}

933:

934: \newcommand{\minEnt}{\score(\PNK, \PKL, \VNK, \VKL)}

935: \newcommand{\minEntTil}{\score(\PtilNK, \PKL, \VtilNK, \VtilKL)}

936:

937: % Indicator function

938: \newcommand{\ind}{\emph{1}}

939:

940: % Other

941: \newcommand{\kast}{k^{\ast}(l)}

942: \newcommand{\BL}{N} %% block-length for block coding

943: \newcommand{\pf}{{\em Proof: }}

944:

945: % Puts in a header with info for conference / submission info

946: %

947: %\newcommand{\status}{\centerline{Submitted to {\em IEEE Int.\ Symp\

948: %Inform.\ Theory}}}

949: %

950: \newcommand{\status}{\centerline{Submitted to IT Transactions}}

951:

952: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

953: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

954: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

955:

956:

957:

958:

959: \begin{document}

960:

961: % paper title

962: \title{Lossless coding for distributed streaming sources\footnote{This

963:     material was presented in part at the IEEE Int Symp Inform Theory,

964:     Adelaide, Australia, Sept 2005.}}

965:

966: \author{Cheng Chang\footnote{Department of Electrical Engineering and

967:     Computer Science, University of California Berkeley, Berkeley, CA

968:     94720}, Stark C.~Draper\footnote{Mitsubishi Electric Research Labs

969:     in Cambridge, MA. This work was performed while he was a postdoc

970:     at

971:     Wireless Foundations in the University of California Berkeley.}, and Anant Sahai\footnote{Wireless Foundations, Department of Electrical Engineering and Computer Science, University of California Berkeley, Berkeley, CA 94720} \\

972:   {\small \texttt cchang@eecs.berkeley.edu, sdraper@eecs.berkeley.edu,

973:     sahai@eecs.berkeley.edu} }

974:

975: \maketitle

976:

977: \begin{abstract} Distributed source coding is traditionally viewed in

978:   the block coding context --- all the source symbols are known in

979:   advance at the encoders. This paper instead considers a streaming

980:   setting in which iid source symbol pairs are revealed to the

981:   separate encoders in real time and need to be reconstructed at the

982:   decoder with some tolerable end-to-end delay using finite rate

983:   noiseless channels. A sequential random binning argument is used to

984:   derive a lower bound on the error exponent with delay and show that

985:   both ML decoding and universal decoding achieve the same positive

986:   error exponents inside the traditional Slepian-Wolf rate region. The

987:   error events are different from the block-coding error events and

988:   give rise to slightly different exponents. Because the sequential

989:   random binning scheme is also universal over delays, the resulting

990:   code eventually reconstructs every source symbol correctly with

991:   probability $1$.

992: \end{abstract}

993:

994: \IEEEpeerreviewmaketitle

995:

996:

997: \section{Introduction}

998:

999: Traditionally, ``lossless'' coding is considered using two distinct

1000: paradigms: fixed block coding and variable-length

1001: coding\footnote{There are actually four different traditional cases:

1002:   fixed to fixed, fixed to variable, variable to fixed, and variable

1003:   to variable. However, the last three all achieve a probability of

1004:   error of zero and so we consider them together.}. As classically

1005: understood, both consider that the source-symbols are known in advance

1006: at the encoder and that they must be mapped into a string of bits

1007: decoded by the receiver. Fixed-block coding accepts a small

1008: probability of error and constrains the length of the bit-string,

1009: while variable-length encoding constrains only the {\em expected}

1010: length of the bit-string in exchange for keeping the probability of

1011: error at zero. In the point-to-point setting, both paradigms apply

1012: generically. In contrast, distributed source coding, has traditionally

1013: been explored within the fixed block context. In

1014: \cite{slepianWolf:73}, Slepian and Wolf even asked:

1015: \begin{quotation}

1016: What is the theory of variable-length encodings for

1017: correlated sources?

1018: \end{quotation}

1019:

1020: In the classical context of source realizations known entirely in

1021: advance, the answer is simple: there is no nontrivial sense of

1022: variable-length encoding that applies generically while still being

1023: interesting.\footnote{At least at sum rates close to the joint source

1024:   entropy rate.  If the rates of communication are high enough, e.g.,

1025:   equaling the log of the cardinalities of the source alphabets,

1026:   zero-error communication is possible.} This is easiest to see by

1027: example (Illustrated in Figure~\ref{fig.SWcoding} and revisited as

1028: Example 2 in Section~\ref{sec.numerical}).  Suppose that the first

1029: encoder observes the random vector $\rvbx$, which consists of a

1030: sequence of $\BL$ iid uniform binary random variables.  Suppose

1031: further that the second encoder observes $\rvby$ which is related to

1032: $\rvbx$ via a memoryless binary symmetric channel with crossover

1033: probability $\rho < 0.5$. The Slepian-Wolf sum-rate bound is $H(\rvx,

1034: \rvy) = 1 + H(\rho) < 2 = H(\rvx) + H(\rvy)$.  But since the

1035: individual encoders only see uniformly distributed binary sources,

1036: they do not know when the sources are behaving jointly atypically.

1037: Therefore, they have no basis on which to adjust their encoding rates

1038: to combat joint atypicality. Since all pairs are possible when finite

1039: blocklengths are considered, the individual encoders must use distinct

1040: bit-strings for each of them. Since the expected length depends only

1041: on the uniform marginal distributions, this means that the expected

1042: length must be at least $N$. Thus, variable-length approaches do not,

1043: in general\footnote{One should note that, in analogy to zero-error

1044:   channel coding, there are special (non-generic) cases where

1045:   zero-error Slepian-Wolf coding is possible~\cite{koulgiEtAl:03}

1046:   since certain symbol pairs cannot occur.}, lead to zero-error

1047: Slepian-Wolf codes for interesting rate-points.

1048:

1049: \setlength{\unitlength}{1mm}

1050: \begin{figure}

1051: \begin{picture}(140,40)

1052: \put(50,0){\line(1,0){20}} \put(50,10){\line(1,0){20}}

1053: \put(50,0){\line(0,1){10}} \put(70, 0){\line(0,1){10}}

1054: \put(52,4){Encoder $\svy$}

1055: \put(50,30){\line(1,0){20}} \put(50,40){\line(1,0){20}}

1056: \put(50,30){\line(0,1){10}} \put(70, 30){\line(0,1){10}}

1057: \put(52,34){Encoder $\svx$}

1058: \put(90,15){\line(1,0){20}} \put(90,25){\line(1,0){20}}

1059: \put(90,15){\line(0,1){10}} \put(110, 15){\line(0,1){10}}

1060: \put(94,19){Decoder }

1061: \put(70, 5){\vector(3,2){20}}

1062: \put(77, 5){$R_x$}

1063: \put(70, 35){\vector(3,-2){20}}

1064: \put(77, 35){$R_y$}

1065: \put(40, 5){\vector(1,0){10}} \put(40, 35){\vector(1,0){10}}

1066: \put(110, 20){\vector(1,0){10}}

1067: \put(125,24){ $\hat{\rvx}_1,\hat{\rvx}_2, \ldots, \hat{\rvx}_{\BL}$}

1068: \put(125,16){$\hat{\rvy}_1,\hat{\rvy}_2, \ldots, \hat{\rvy}_{\BL}$}

1069: \put(18, 34){ ${\rvx}_1,{\rvx}_2, \ldots, \rvx_{\BL}$}

1070: \put(18,4){ ${\rvy}_1,{\rvy}_2,\ldots \rvy_{\BL}$}

1071: \put(15, 20){ $(\rvx_i,\rvy_i)\sim p_{\rvx\rvy}$}

1072: \put(27,24){\vector(0,1){8}} \put(27,16){\vector(0,-1){8}}

1073: \end{picture}

1074: \caption[]{Slepian-Wolf distributed encoding and joint decoding of a pair of correlated sources.}

1075:      \label{fig.SWcoding}

1076: \end{figure}

1077:

1078: Another view of variable-length coding is as a tool that enables us to

1079: achieve meaningful compression despite not knowing the underlying

1080: probability distribution\footnote{In the point-to-point case, this is

1081:   very closely related to achieving a zero-error probability. The same

1082:   string can be an atypical realization of one source model while

1083:   being a typical realization of another source. Encoding all the

1084:   typical sequences correctly without knowing the underlying model

1085:   requires getting all the possible sequences correctly for any

1086:   specific model.} and allowing the rate used to adapt to the source.

1087: If there is a low-rate, but reliable\footnote{It is clear that our

1088:   techniques from \cite{sahaiSimsek:04, draperSahai:06} can also be

1089:   adapted to make the system of \cite{draperAllerton:04} work using

1090:   only noisy feedback channels.}, feedback link available from the

1091: decoder to the two separate encoders, then this sense of

1092: variable-length Slepian-Wolf coding is possible.

1093: \cite{draperAllerton:04} gives a fixed-to-variable scheme in which the

1094: stopping-time is chosen at the decoder and communicated back to the

1095: encoders over a low-rate feedback link. The goal of

1096: \cite{draperAllerton:04} is not achieving a truly zero probability of

1097: error --- rather it is willing to accept a very small probability of

1098: error in exchange for using a rate that is as small as possible.

1099:

1100: To answer the question posed by Slepian and Wolf in the more classical

1101: sense, we instead want to aim for a probability of error that goes to

1102: zero for every source symbol, but at the cost of a variable delay.  To

1103: do this, we propose stepping back and eliminating the modeling

1104: assumption of encoders having access to the entire source realization

1105: in advance. We argue that a ``streaming setting'' is required to

1106: discern the system-level analog to variable-length source coding in

1107: the distributed context. The streaming setting abstracts sources that

1108: are embedded in time as well as the fact that all physically

1109: realizable encoders/decoders must obey some form of causality. Thus

1110: ``rate'' is not just measured in bits per source symbol but in both

1111: source symbols per second and bits per second. The source-rate

1112: (symbols per second) is specified as a part of the problem while the

1113: bit-rate (bits per second) is something that we get to choose. From an

1114: engineering perspective, three desirable qualities\footnote{Of course,

1115:   ``implementation complexity'' forms a fourth and very important

1116:   consideration, but we will be ignoring that aspect of the problem.}

1117: are:

1118: \begin{itemize}

1119:  \item Using a low rate bit-pipe(s)

1120:  \item Low end-to-end latency

1121:  \item Low probability of error

1122: \end{itemize}

1123: The theory of source-coding should tell us the tradeoffs between these

1124: three desiderata. In addition, we will be interested in to what extent

1125: a streaming code can be made ``universal'' over a class of probability

1126: distributions.

1127:

1128: In the point-to-point streaming setting, regardless of whether block

1129: or variable-length compression is used, the traditional initial step

1130: is the same: group symbols into source blocks.  To compress the data

1131: blocks, either use a fixed-rate block code, or a variable-length code.

1132: The resulting encoding is then enqueued for transmission across the

1133: bit-pipe.  As long as the source entropy rate is below the data-rate,

1134: the queue will remain stable.  When block coding is used for

1135: compression, there is a constant delay through the system, and

1136: atypical source blocks are received in error. The probability of error

1137: is fixed at the system's design-time and so is the end-to-end delay.

1138:

1139: In contrast, variable-length coding induces a variable system delay.

1140: The more unlikely the source blocks, the longer the delay experienced

1141: at run-time. Thus, while {\em asymptotically} there are no errors when

1142: variable-length source codes are used (assuming an infinite buffer

1143: size), the delay till a given symbol can be decoded depends on the

1144: random source realization. Because atypical source realizations are

1145: large deviation events, the probability that some source symbol cannot

1146: be reconstructed $\delay$ samples after it enters the encoder decays

1147: exponentially\footnote{In \cite{Chang:06}, we show that variable

1148:   length codes used in this manner actually achieve the best possible

1149:   error exponent with delay. This is also related to the analysis of

1150:   \cite{jelinek:68}.} in $\delay$. The choice of acceptable end-to-end

1151: delay is left to the receiver/application.

1152:

1153: We show that this type of reliability {\em can} be achieved in a

1154: generic distributed coding context --- the probability of error goes

1155: to zero with end-to-end delay and the choice of the acceptable delay

1156: is entirely up to the decoder. Essentially, every source symbol is

1157: recovered correctly eventually with probability\footnote{The secret

1158:   here is that we are considering a probability measure over infinite

1159:   sequences. While all pairs of finite strings may be possible, most

1160:   pairs of infinite strings collectively have probability zero.} $1$.

1161: The only difference is that unlike the point-to-point case, the

1162: decoder does not necessarily know when the estimate for the symbol has

1163: converged to its final value.  Furthermore, just as in the

1164: point-to-point setting\footnote{Sliding-window Lempel-Ziv compression

1165:   is one example where data is naturally encoded sequentially. It is

1166:   also universal over sources.}, both the encoding and decoding can be

1167: made universal.

1168:

1169: In this paper, we formally define a streaming Slepian-Wolf code, and

1170: develop coding strategies both for situations when source statistics

1171: are known and when they are not. The new tool is a sequential binning

1172: argument that parallels the tree-coding arguments used to study

1173: convolutional codes. We characterize the performance of

1174: the streaming schemes through an error exponent analysis and

1175: demonstrate that the exponents are equal regardless of whether the

1176: system is informed of the source statistics (in which case we use

1177: maximum likelihood decoding) or not (in which case we use universal

1178: decoding). The universal decoder we design for the streaming problem

1179: is somewhat different from those familiar from the block coding

1180: literature, as are the nature of the error exponents.

1181:

1182: \subsection{Potential applications and practical motivation}

1183:

1184: In addition to our core interest in answering some basic questions

1185: about Slepian-Wolf coding, our formulation is also motivated by the

1186: diverse emerging application areas for distributed source coding.

1187: Media (e.g. video-conference) sources naturally have a streaming

1188: character.  Consequently, we are motivated to explore what sort of

1189: streaming Slepian-Wolf technique matches naturally to such

1190: situations.\footnote{A secondary aspect in some multimedia settings is

1191:   a natural multi-scale nature to the source --- the high order bits

1192:   are more important than the low order bits. To the extent that the

1193:   high order bits can be made ``early'' and the low-order bits can be

1194:   made ``late'', our constructions also naturally give more protection

1195:   to the early bits as compared to the later ones. While this

1196:   interpretation might eventually be important in practice, it is a

1197:   bit questionable within the simplified model this paper considers.}

1198:

1199: \subsection{Outline}

1200:

1201: Section~\ref{sec.notation} summarizes the notation used in the paper.

1202: Section~\ref{sec.mainresults} reviews the classical block-coding error

1203: exponent results for Slepian-Wolf source coding and then we state the

1204: main results of this paper: sequential error exponents for

1205: Slepian-Wolf source coding. Section~\ref{sec.numerical} presents a

1206: numeric study of two example sources. We observe that the sequential

1207: error exponent is often the same as the block coding error exponent.

1208: Sections \ref{sec.entropy}, \ref{sec.incDecSI} and \ref{sec.SW} prove

1209: the theorems in Section~\ref{sec.mainresults}. We start with

1210: sequential source coding for single sources in \ref{sec.entropy}.

1211: This is the simplest case but it provides insights to the nature of

1212: sequential source coding problem and sequential error events. We show

1213: that the sequential error exponent is the same as the random block

1214: source coding error exponent. Section~\ref{sec.incDecSI} moves on to

1215: the case with decoder side-information. Finally, Section~\ref{sec.SW}

1216: presents the proof of the main result of the paper. We derive the

1217: sequential error exponent of distributed source coding for correlated

1218: sources. This error exponent strictly positive everywhere inside the

1219: achievable rate region of \cite{slepianWolf:73}. For all these three

1220: scenarios in Sections \ref{sec.entropy}, \ref{sec.incDecSI} and

1221: \ref{sec.SW}, both ML and universal decoding rules are studied.  The

1222: appendix shows that the resulting error exponents are indeed the same.

1223:

1224:

1225:

1226: \section{Notation}\label{sec.notation}

1227:

1228:

1229: We use serifed-fonts, e.g., $\svx$ to indicate sample values, and

1230: sans-serif, e.g., $\rvx$, to indicate random variables.  Bolded fonts

1231: are reserved to indicate sample or random vectors, e.g., $\svbx =

1232: \svx^n$ and $\rvbx = \rvx^n$, respectively, where the vector length

1233: ($n$ here) is understood from the context.  Subsequences, e.g.,

1234: $\svx_l, \svx_{l+1}, \ldots, \svx_{n}$ are denoted as $\svx_l^n$ where

1235: $\svx_i^j \defeq \emptyset$ if $i<j$.  Distributions are indicated

1236: with lower-case $p$, e.g., $\rvx$ is distributed according to

1237: $p_{\rvx}(\svx)$.  Sets and their elements are denoted as, e.g., $\svx

1238: \in \cX$, and their cardinality by $|\cX|$. We use calligraphic font

1239: to denote sets, $\mathcal{X}$, $\mathcal{F}$, $\mathcal{W}$ etc, and

1240: reserve $\mathcal{E}$ and $\mathcal{D}$ to denote encoding and

1241: decoding functions, respectively.  We use standard notation for types,

1242: see, e.g., \cite{csiszarKorner}.  Let $N(a; \svbx)$ denote the number

1243: of symbols in the length-$n$ vector $\svbx$ that take on value $a$.

1244: Then, $\svbx$ is of type $P$ if $P(a) = N(a; \svbx)/n$.  The

1245: type-class, or set of length-$n$ vectors of type $P$ is denoted

1246: $\tclass_{P}$.  A sequence $\svby$ has conditional type $V$ given

1247: $\svbx$ if $N(a,b; \svbx, \svby) = N(a; \svbx) V(b|a) = P(a) V(b|a)$

1248: for every $a, b$. The set of sequences $\svby$ having conditional type

1249: $V$ with respect to $\svbx$ is called the $V$-shell of $\svbx$ and is

1250: denoted by $\tclass_{V}(\svbx)$.  When considered together, the pair

1251: $(\svbx, \svby)$ is said to have joint type $V \times P$.  We always

1252: use upper-case, e.g., $P$ and $V$, to denote length-$n$ types and

1253: conditional types.  As we often discuss the types of subsequences we

1254: add a superscript notation to remind the reader of the length of the

1255: subsequence in question.  If, for instance, the subsequence under

1256: consideration is $\svx_{l}^n$ we write $\svx_{l}^n \in

1257: \tclass_{\PNL}$.  Similarly we use $\VNL$ for the conditional type of

1258: length-$(n-l+1)$, and $\VNL \times \PNL$ for the joint type.

1259:

1260: Given a joint type $V \times P$, entropies and conditional entropies

1261: are denoted as $H(P)$ and $H(V|P)$, respectively. The KL divergence

1262: between two distributions $q$ and $p$ is denoted by $D(q \| p)$.

1263:

1264: \section{Main Results}\label{sec.mainresults}

1265:

1266: In this section, we begin by reviewing classical results on the error

1267: exponents of distributed block coding.  We then present the main

1268: results of the paper: error exponents for streaming Slepian-Wolf

1269: coding and its special cases: point-to-point coding and source coding

1270: with decoder side information.  We analyze both maximum likelihood and

1271: universal decoding and show that the achieved exponents are equal.

1272: Leaving numerical examples and proofs for later sections, we here

1273: compare the form of the streaming exponents with their block coding

1274: counterparts.

1275:

1276:

1277: %%%%%%%%%%%%%%%%%%%%%%%

1278: \subsection{Block source coding and error exponents}

1279:

1280: In the classic block-coding Slepian-Wolf paradigm, full length-$\BL$

1281: vectors $\rvbx$ and $\rvby$ are observed by their respective encoders

1282: before communication commences.  In this situation a rate-$(\Rx, \Ry)$

1283: length-$\BL$ block source code consists of an encoder-decoder triplet

1284: $(\mathcal{E}^x_{\BL},\mathcal{E}^y_{\BL}, \mathcal{D}_{\BL})$, as we

1285: will define shortly. For the rate-region considerations, the general

1286: case of distributed encoders can be considered by using time-sharing

1287: among codes that alternate between sending at rates close to the

1288: marginal entropy and those that correspond to perfectly known

1289: side-information. However, it is easy to see that this results in a

1290: substantial loss of error-exponent even in the block-coding case. To

1291: get good exponents, something else is required:

1292:

1293: \begin{defn}\label{def.SWblockCode}

1294:   A randomized length-$\BL$ rate-$(\Rx, \Ry)$ block encoder-decoder

1295:   triplet  $(\mathcal{E}^x_{\BL},\mathcal{E}^y_{\BL},\mathcal{D}_{\BL})$ is a

1296:   set of maps

1297: %

1298: \begin{eqnarray*}

1299: \begin{array}{lclcl}

1300: \mathcal{E}^x_{\BL} &: & \mathcal{X}^{\BL} \rightarrow \{0,1\}^{ \BL R_x},

1301: & \mbox{e.g.,} & \mathcal{E}^x_{\BL}(x^{\BL})=a^{ \BL R_x}\\

1302: %

1303: \mathcal{E}^y_{\BL} &: & \mathcal{Y}^{\BL} \rightarrow \{0,1\}^{ \BL R_y},

1304: & \mbox{e.g.,} & \mathcal{E}^y_{\BL}(y^{\BL})=b^{ \BL R_y}\\

1305: %

1306: \mathcal{D}_{\BL} &: & \{0,1 \}^{ \BL R_x }\times \{0,1 \}^{ \BL R_y }

1307: \rightarrow \mathcal{X}^{n}\times \mathcal{Y}^{n}, & \mbox{e.g.,}

1308: &\mathcal{D}_{\BL}(a^{ \BL R_x }, b^{ \BL R_y })=(\hat{x}^{\BL},\hat{y}^{\BL})

1309: \end{array}

1310: \end{eqnarray*}

1311: %

1312: where common randomness, shared between the encoders and the decoder

1313: is assumed.  This allows us to randomize the mappings independently of

1314: the source sequences.

1315: \end{defn}

1316:

1317: The error probability typically considered in Slepian-Wolf coding is

1318: the joint error probability, $\Pr[(\rvx^{\BL}, \rvy^{\BL})\neq

1319: (\hat{\rvx}^{\BL},\hat{\rvy}^{\BL})]=\Pr[(\rvx^{\BL},\rvy^{\BL})\neq

1320: \mathcal{D}_{\BL}(\mathcal{E}^x_{\BL}(\rvx^{\BL}),

1321: \mathcal{E}^y_{\BL}(\rvy^{\BL}))]$.  This probability is taken over

1322: the random source vectors as well as the randomized mappings.  An

1323: error exponent $E$ is said to be achievable if there exists a family

1324: of rate-$(\Rx, \Ry)$ encoders and decoders

1325: $\{(\mathcal{E}^x_{\BL},\mathcal{E}^y_{\BL},\mathcal{D}_{\BL})\}$,

1326: indexed by $\BL$,

1327: such that %\footnote{We use nats and $\log$ in this paper.}

1328: %

1329: \begin{equation}

1330: \lim_{\BL \rightarrow \infty}-\frac{1}{\BL}\log

1331: \Pr[(\rvx^{\BL}, \rvy^{\BL})\neq

1332: (\hat{\rvx}^{\BL},\hat{\rvy}^{\BL})] \geq E. \label{eq.SWblockErrExp}

1333: \end{equation}

1334:

1335: In this paper, we study random source vectors $(\rvbx, \rvby)$ that

1336: are iid across time but may have dependencies at any given time:

1337: \begin{equation*}

1338: p_{\rvx,\rvy}(\svbx,\svby)=\prod_{i=1}^{\BL}p_{\rvx,\rvy}(\svx_i,\svy_i).

1339: \end{equation*}

1340:

1341: For such iid sources, upper and lower bounds on the achievable error

1342: exponents are derived in~\cite{gallagerTech:76,csiszarKorner}.  These

1343: results are summarized by the following theorem.

1344:

1345: \begin{thm}\label{THM.INTRO}

1346:   (Lower bound) Given a rate pair $(\Rx, \Ry)$ such that $\Rx >

1347:   H(\rvx|\rvy)$, $\Ry > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$.

1348:   Then, for all

1349: \begin{equation}

1350: E < \min_{\rvxBar,\rvyBar} D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy})+ \big|

1351: \min[R_x+R_y-H(\rvxBar,\rvyBar), R_x-H(\rvxBar|\rvyBar),

1352:  R_y-H(\rvyBar|\rvxBar) ]\big|^{+} \label{eq.SWblockLowBnd}

1353: \end{equation}

1354: there exists a family of randomized encoder-decoder mappings as

1355: defined in Definition~\ref{def.SWblockCode} such

1356: that~(\ref{eq.SWblockErrExp}) is satisfied.

1357: In~(\ref{eq.SWblockLowBnd}) the function $|z|^{+} = z$ if $z \geq 0$

1358: and $|z|^{+} = 0$ if $z < 0$.

1359:

1360: (Upper bound) Given a rate pair $(\Rx, \Ry)$ such that $\Rx >

1361:   H(\rvx|\rvy)$, $\Ry > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$. Then,

1362:   for all

1363: %

1364: \begin{equation}

1365: E >\min \left\{

1366: \min_{ \rvxBar,\rvyBar: R_x<H(\rvxBar|\rvyBar)}

1367: D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}) ,

1368: \min_{ \rvxBar,\rvyBar: R_y<H(\rvyBar|\rvxBar)}

1369: D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}),

1370: \min_{ \rvxBar,\rvyBar: R_x+R_y<H(\rvxBar,\rvyBar)}

1371: D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}) \right\}

1372: \label{eq.SWblockUpBnd}

1373: \end{equation}

1374: %

1375: there does not exists a randomized encoder-decoder mapping as defined

1376: in Definition~\ref{def.SWblockCode} such that~(\ref{eq.SWblockErrExp}) is

1377: satisfied.

1378:

1379: In both bounds $(\rvxBar,\rvyBar)$ are dummy random variables

1380: with joint distribution $p_{\rvxBar,\rvyBar}$.

1381: \end{thm}

1382:

1383:

1384: {\em Remark:} As long as $(R_x,R_y)$ is in the interior of the

1385: achievable region, i.e., $R_x> H(\rvx|\rvy)$, $R_y> H(\rvy|\rvx)$ and

1386: $R_x+R_y> H(\rvx, \rvy)$ then the lower-bound~(\ref{eq.SWblockLowBnd})

1387: is positive.  The achievable region is illustrated in

1388: Fig~\ref{fig.SW_region_intro}.  As shown in \cite{csiszarKorner},

1389: the upper and lower bounds~(\ref{eq.SWblockUpBnd})

1390: and~(\ref{eq.SWblockLowBnd}) match when the rate pair $(R_x,R_y)$ is

1391: achievable and close to the boundary of the region. This is analogous

1392: to the high rate regime in channel coding where the random coding

1393: bound (analogous to~(\ref{eq.SWblockLowBnd})) and the sphere packing

1394: bound (analogous to~(\ref{eq.SWblockUpBnd})) agree.

1395:

1396: Theorem~\ref{THM.INTRO} can also be used to generate bounds on the

1397: exponent for source coding with decoder side information (i.e.,

1398: $\rvby$ observed at the decoder), and for source coding without side

1399: information (i.e., $\rvby$ is a constant).  These corollaries will

1400: prove useful as a basis for comparison as we build up to the complete

1401: solution for streaming Slepian-Wolf coding.

1402:

1403:

1404: \begin{corol}\label{thm.blockSI}

1405:   (Source coding with decoder side information) Consider a

1406:   Slepian-Wolf problem where $\rvby$ is known by the decoder. Given a

1407:   rate $\Rx$ such that $\Rx > H(\rvx|\rvy)$, then for all

1408: %

1409: \begin{equation}

1410: E < \min_{\rvxBar,\rvyBar} D(p_{\rvxBar,\rvyBar}\|p_{\rvx\rvy}) +

1411: |R_x-H(\rvxBar|\rvyBar)|^{+}, \label{eq.SIblockLowBnd}

1412: \end{equation}

1413: %

1414: there exists a family of randomized encoder-decoder mappings as

1415: defined in Definition~\ref{def.SWblockCode} such

1416: that~(\ref{eq.SWblockErrExp}) is satisfied.

1417: \end{corol}

1418:

1419: The proof of Corollary~\ref{thm.blockSI} follows from

1420: Theorem~\ref{THM.INTRO} by letting $\Ry$ be arbitrarily large.

1421: Similarly, by letting $\rvby$ be deterministic so that $H(\rvx|\rvy)

1422: = H(\rvx)$ and $H(\rvy) = 0$, we get the following random-coding

1423: bound for the point-to-point case of a single source $\rvbx$.

1424:

1425: \begin{corol}\label{thm.blockEnt} (point-to-point)

1426:   Consider a Slepian-Wolf problem where $\rvby$ is deterministic,

1427:   i.e., $\rvby = \svby$.  Given a rate $\Rx$ such that $\Rx >

1428:   H(\rvx)$, for all

1429: %

1430: \begin{equation}

1431: E < \min_{\rvxBar} D(p_{\rvxBar}\|p_{\rvx})+ |R_x-H(\rvxBar)|^{+}

1432: =E_x(R_x) \label{eq.EntblockLowBnd}

1433: \end{equation}

1434: %

1435: there exists a family of randomized encoder-decoder triplet as defined

1436: in Definition~\ref{def.SWblockCode} such that~(\ref{eq.SWblockErrExp}) is

1437: satisfied.

1438: \end{corol}

1439:

1440:

1441:

1442: \setlength{\unitlength}{1mm}

1443:  \begin{figure}[htbp]

1444:    \begin{center}

1445:      \leavevmode

1446:  \begin{picture}(130,80)

1447:

1448: \put(40, 10){\vector(1,0){55}}

1449:  \put(40, 10){\vector(0,1){55}}

1450:

1451: \put(40,70){$R_y$} \put(100,10){$R_x$}

1452:

1453: \put(28,40){$H(\rvy)$} \put(28,31){$H(\rvy| \rvx)$}

1454:

1455: \put(68,5){$H(\rvx)$} \put(56, 5){$H(\rvx| \rvy)$}

1456:

1457: \put(28,60){$\log|\mathcal{Y}|$}   \put(87,5){$\log|\mathcal{X}|$}

1458:

1459:

1460:  \put(90,60){\line(0,-1){29}}  \put(90,31){\line(-1,0){20}}

1461:   \put(70,31){\line(-1, 1){9}}

1462:    \put(90,60){\line(-1,0){29}}  \put(61,60){\line(0,-1){20}}

1463:

1464:  \put(64, 52){Achievable} \put(66, 48){Region}

1465:

1466:  \put(47,

1467: 25){$R_x+R_y=H(\rvx,\rvy)$} \put(60, 28){\vector(1,1){6.5}}

1468:

1469: \end{picture}

1470: \caption{ Achievable region for Slepian-Wolf source coding }

1471: \label{fig.SW_region_intro}

1472: \end{center}

1473: \end{figure}

1474:

1475: \subsection{Sequential Distributed Source Coding}

1476:

1477: We now state our main results for streaming encoding, and contrast

1478: them with the block-coding results of the last section.  To begin, we

1479: define a streaming encoder.

1480:

1481: \begin{defn}

1482: \label{def.seqn_coding}

1483: A randomized sequential encoder-decoder triplet

1484: $\mathcal{E}^x,\mathcal{E}^y,\mathcal{D}$ is a sequence of mappings,

1485: $\{\mathcal{E}^x_j\},j=1,2,...$, $\{\mathcal{E}^y_j\},j=1,2,...$ and

1486: $\{\mathcal{D}_j\},j=1,2,...$:

1487:

1488: \begin{equation}

1489: \begin{array}{lclcl}

1490: \mathcal{E}^x_j & : & \mathcal{X}^{j} \longrightarrow

1491:     \{0,1\}^{ R_x }, & \mbox{e.g.,} & \mathcal{E}^x_j(x^j)=a_{ (j-1)R_x +1}^{

1492:       jR_x }, \\

1493: %

1494: \mathcal{E}^y_j & : & \mathcal{Y}^{j}

1495:     \longrightarrow \{0,1\}^{ R_y }, & \mbox{e.g.,} & \mathcal{E}^y_j(y^j)=b_{

1496:       (j-1)R_y +1}^{ jR_y }.

1497: \end{array}

1498: \label{eq.xEnc}

1499: \end{equation}

1500: %

1501: Common randomness, shared between encoders and decoder, is assumed.

1502: This allows us to randomize the mappings independently of the source

1503: sequence.

1504: \end{defn}

1505:

1506: In this paper, the sequential encoding maps will always work by

1507: assigning random ``parity bits'' in a causal manner to the observed

1508: source sequence.  That is, the $\Rx$ (or $\Ry$) bits generated at each

1509: time in~(\ref{eq.xEnc}), are iid Bernoulli-$(0.5)$.\footnote{We assume

1510:   that $\Rx$ and $\Ry$ are integer.  To justify this assumption note

1511:   that we can always group sets of $\alpha$ successive symbols into

1512:   super-symbols.  These larger symbols can be encoded at an average

1513:   rate $\alpha \Rent$.  Generally, if we group $\alpha$ symbols

1514:   together, and transmit $\beta$ bits per super-symbol, we can realize

1515:   an average rate $\alpha/ \beta$, i.e., a rational rate.  If desired,

1516:   non-integer average rates are easily implemented by a time-varying

1517:   transmission rate.  For example, say we want to implement an average

1518:   encoding rate of $5/4$ bits per source symbol.  Say we generate one

1519:   new parity bit per symbol for each symbol observed except for the

1520:   fourth symbol, eighth symbol, etc, when we generate two.  The

1521:   average encoding rate is $5/4$.  As long as the decoding delay

1522:   $\delay$ we target is long enough so that the decoder received an

1523:   ``average'' number of encoded bits -- $\delta \Rent$ -- before we

1524:   must make an estimate (e.g., if $\delay \gg 1 / \Rent$), these

1525:   small-scale issues even out.  In particular, they do not effect the

1526:   exponents.}  Since parity bits are assigned causally, if two source

1527: sequences share the same length-$l$ prefix, then their first $l

1528: {\Rent}$ parity bits must match.  Subsequent parities are drawn

1529: independently. Such a sequential coding strategy is the source-coding

1530: parallel to tree and convolutional codes used for channel coding

1531: \cite{Forney:74}. In fact, we call these ``parity bits'' as they can

1532: be generated using an infinite constraint-length time-varying random

1533: convolutional code.

1534:

1535: \begin{defn}

1536: The decoder mapping

1537: %

1538: \begin{eqnarray}

1539:   &&\mathcal{D}_j: \{0,1 \}^{ jR_x }\times\{0,1 \}^{ jR_y }

1540:   \longrightarrow \mathcal{X}^j \times \mathcal{Y}^j \nonumber\\

1541: %

1542:   &&\mathcal{D}_j(a^{ jR_x },b^{ jR_y

1543:   })=(\svxhat_{1}^{j}(j),\svyhat_{1}^{j}(j))\nonumber

1544: \end{eqnarray}

1545: %

1546: At each time $j$ the decoder $\mathcal{D}_j$ outputs estimates of all

1547: the source symbols that have entered the encoder by time $j$.

1548: \end{defn}

1549:

1550: {\em Remark:} While we state Definition~\ref{def.seqn_coding} only for

1551: Slepian-Wolf coding, it immediately specializes to source coding with

1552: decoder side information (dropping the $\mathcal{E}_y$ and revealing

1553: $\rvy^n$ to the decoder), and source coding without side information

1554: (dropping the $\mathcal{E}_y$).  We present results for both these

1555: situations as well.

1556:

1557: In this paper we study two error probabilities.  We define the pair of

1558: source estimates at time $n$ as $(\hat{\rvx}^n, \hat{\rvy}^n) =

1559: \mathcal{D}_n(\prod_{j=1}^n \mathcal{E}^x_j, \prod_{j=1}^n

1560: \mathcal{E}^y_j)$, where $\prod_{j=1}^n \mathcal{E}^x_j$ indicates the

1561: full $n \Rx$ bit stream from encoder $x$ up to time $n$.  We use

1562: $(\hat{\rvx}^{n - \delay}, \hat{\rvy}^{n - \delay})$ to indicate the

1563: first $n - \delay$ symbols of each estimate, where for conciseness of

1564: notation both the estimate time, $n$, and the decoding delay,

1565: $\delay$, are indicated in the superscript.  With these definitions

1566: the two error probabilities we study are

1567: %

1568: \begin{align}

1569: \Pr[\rvxhat^{n - \delay} \neq \rvx^{n - \delay}] \;\; \mbox{and} \;\;

1570: \Pr[\rvyhat^{n - \delay} \neq \rvy^{n - \delay}]. \nonumber

1571: \end{align}

1572: %

1573: A pair of exponents $E_x > 0$ and $E_y > 0$ is said to be achievable

1574: if there exists a family of rate-$(\Rx, \Ry)$ encoders and decoders

1575: $\{(\mathcal{E}_j^x, \mathcal{E}_j^y, \mathcal{D}_j)\}$ such that

1576: %

1577: \begin{align}

1578: \lim_{\delay \rightarrow \infty} \lim_{n \rightarrow \infty}

1579: - \frac{1}{\delay} \log \Pr[\hat{\rvx}^{n - \delay} \neq \rvx^{n - \delay}]

1580: &\geq E_x \label{eq.errExpX}\\

1581: %

1582: \lim_{\delay \rightarrow \infty} \lim_{n \rightarrow \infty}

1583: - \frac{1}{\delay} \log \Pr[\hat{\rvy}^{n - \delay} \neq \rvy^{n - \delay}]

1584: &\geq E_y \label{eq.errExpY}

1585: \end{align}

1586:

1587: {\em Remarks:} In contrast to~(\ref{eq.SWblockErrExp}) the error

1588: exponent we look at is in the delay, $\delay$, rather than total

1589: observation time, $n$. The order of the limits is important since the

1590: total time-period $n$ is allowed to go to infinity faster than the

1591: delay $\delay$. While the definitions

1592: of~(\ref{eq.errExpX})--(\ref{eq.errExpY}) and

1593: of~(\ref{eq.SWblockErrExp}) are asymptotic in nature, the results hold

1594: for finite block-lengths and delays as well.  Finally, we note that

1595: while in~(\ref{eq.SWblockErrExp}) the error exponent of a joint error

1596: event on either $\rvbx$ or $\rvby$ is considered, we provide a refined

1597: analysis specifying potentially different exponents on either

1598: decision.  The results for joint errors are found by taking the

1599: minimum of the individual exponents, i.e.,

1600: %

1601: \begin{equation*}

1602: \lim_{\delay \rightarrow \infty} \lim_{n \rightarrow \infty}

1603: - \frac{1}{\delay} \log \Pr[(\hat{\rvx}^{n-\delay}, \hat{\rvy}^{n - \delay})

1604: \neq (\rvx^{n-\delay}, \rvy^{n - \delay})]  \geq

1605: \min\{E_x, E_y\}.

1606: \end{equation*}

1607:

1608:

1609: \subsection{Streaming source coding}

1610:

1611: Our first results concern streaming coding in the point-to-point

1612: setting.  The first theorem we state gives random coding error

1613: exponents for maximum likelihood decoding where the source statistics

1614: are known, and the second exponents for universal decoding, where they

1615: are not.

1616: %

1617: \begin{thm} \label{thm.entCodeML}

1618:   Given a rate $\Rent > H(\PxRV)$, there exists a randomized streaming

1619:   encoder and maximum likelihood decoder pair (per

1620:   Definition~\ref{def.seqn_coding}) such that for all $E < \expML(\Rent)$

1621:   there is a constant $K > 0$ such that $\Pr[\rvxhat^{n - \delay} \neq

1622:   \rvx^{n - \delay}] \leq K \exp\{- \delay \expML(\Rent)\}$ for all

1623:   $n, \delay \geq 0$ where

1624: %

1625: \begin{equation}

1626: \expML(\Rent) = \sup_{0 \leq \rho \leq 1} \rho \Rent - (1 + \rho) \log

1627: \left( \sum_{\svx} \PxRV(\svx)^{\frac{1}{1 + \rho}} \right).

1628: \label{eq.errExpML}

1629: \end{equation}

1630: \end{thm}

1631:

1632:

1633: \begin{thm} \label{thm.entCodeUniv}

1634:   Given a rate $\Rent > H(\PxRV)$, there exists a randomized streaming

1635:   encoder and universal decoder pair (per Definition~\ref{def.seqn_coding})

1636:   such that for all $E < \expUniv(\Rent)$ there is a constant $K > 0$

1637:   such that $\Pr[\rvxhat^{n - \delay} \neq \rvx^{n - \delay}] \leq K

1638:   \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where

1639: %

1640: \begin{equation}

1641: \expUniv(\Rent) = \inf_q D(q \| \PxRV) + |\Rent - H(q)|^{+},

1642: \label{eq.errExpUniv}

1643: \end{equation}

1644: %

1645: where $q$ is an arbitrary probability distribution on $\cX$ and where

1646: $|z|^{+} = z$ if $z \geq 0$ and $|z|^{+} = 0$ if $z < 0$.

1647: \end{thm}

1648:

1649: {\em Remark:} The error exponents of Theorems~\ref{thm.entCodeML}

1650: and~\ref{thm.entCodeUniv} both equal their respective random

1651: block-coding exponents for ML and universal decoders.

1652: For example, compare~(\ref{eq.errExpUniv})

1653: with~(\ref{eq.EntblockLowBnd}).  The main difference in the

1654: formulation is that the error probability decays with delay $\delay$

1655: rather than block length $\BL$.  Furthermore, it is known

1656: that~(\ref{eq.errExpML}) and~(\ref{eq.errExpUniv}) are equal --- see

1657: \cite{csiszarKorner} exercise $13$ on page $44$.  Such equality is

1658: required by the formal definition of a universal scheme, i.e., for the

1659: same source statistics and coding rates, the universal decoder should

1660: asymptotically achieve the same error exponent as the maximum

1661: likelihood decoder.  See~\cite{lapidothNarayan:98} for a detailed

1662: discussion of universal versus maximum likelihood decoding in the

1663: context of channel coding.

1664:

1665:

1666: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1667: \subsection{Streaming distributed source coding with decoder side information}

1668:

1669: This section summarizes our results for distributed streaming source

1670: coding when the side information is observed at the decoder, but not

1671: the encoder:

1672:

1673: \begin{thm} \label{thm.decSIML}

1674:   Given a rate $\Rent > H(\rvx|\rvy)$, there exists a randomized

1675:   encoder decoder pair (per Definition~\ref{def.seqn_coding}) such that for

1676:   all $E < \expMLSI(\Rent)$ there is a constant $K > 0$ such that

1677:   $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K \exp\{- \delay

1678:   E\}$ for all $n, \delay \geq 0$ where

1679: %

1680: \begin{equation}

1681: \expMLSI(\Rent) = \sup_{0 \leq \rho \leq 1} \rho \Rx - \log \Big[

1682: \sum_{\svy} \Big[ \sum_{\svx}

1683: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1+\rho} \Big].

1684: \label{eq.errExpMLSI}

1685: \end{equation}

1686: \end{thm}

1687:

1688: \begin{thm} \label{thm.decSIUniv}

1689:   Given a rate $\Rent > H(\rvx|\rvy)$, there exists a randomized

1690:   encoder decoder pair (per Definition~\ref{def.seqn_coding} ) such that for

1691:   all $E < \expUnivSI(\Rent)$ there is a constant $K > 0$ such that

1692:   $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K \exp\{- \delay

1693:   E\}$ for all $n, \delay \geq 0$ where

1694: %

1695: \begin{equation}

1696: \expUnivSI(\Rent)

1697: =\inf_{\rvxtil, \rvytil} D(p_{\rvxtil, \rvytil} \| \PxyRV) +

1698: |\Rent - H(\rvxtil | \rvytil)|^{+}, \label{eq.errExpUnivSI}

1699: \end{equation}

1700: %

1701: and $(\rvxtil, \rvytil)$ are random variables with joint distribution

1702: $p_{\rvxtil, \rvytil}$, $H(\rvxtil | \rvytil)$ is their conditional

1703: entropy, and where $|z|^{+} = z$ if $z \geq 0$ and $|z|^{+} = 0$ if $z

1704: < 0$.

1705: \end{thm}

1706:

1707:

1708: {\em Remark:} Similar to the point-to-point case, the error exponents

1709: of Theorems~\ref{thm.decSIML} and~\ref{thm.decSIUniv} both equal their

1710: respective random block-coding exponents.  For example,

1711: compare~(\ref{eq.errExpUnivSI}) with~(\ref{eq.SIblockLowBnd}).

1712: Similarly, (\ref{eq.errExpMLSI}) and~(\ref{eq.errExpUnivSI}) can be

1713: shown to be equal.

1714:

1715:

1716: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1717: \subsection{Streaming Slepian-Wolf coding}

1718:

1719: In contrast to streaming point-to-point coding and streaming source

1720: coding with decoder side information, the general case of streaming

1721: Slepian-Wolf coding with two distributed encoders results in error

1722: exponents that differ from their block coding counterparts.  In the

1723: streaming setting, fundamentally different error events dominate as

1724: compared to the block setting.

1725:

1726:

1727: \begin{thm} \label{thm.jointCodeML}

1728:

1729:   Let $(\Rx, \Ry)$ be a rate pair such that $\Rx > H(\rvx|\rvy)$, $\Ry

1730:   > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$.  Then, there exists a

1731:   randomized encoder pair and maximum likelihood decoder triplet (per

1732:   Definition~\ref{def.seqn_coding}) that satisfies the following three

1733:   decoding criteria.

1734:

1735:   (i) For all $E < E_{ML,SW,x}(\Rx, \Ry)$, there is a constant $K > 0$

1736:   such that

1737: $\Pr[ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K

1738:   \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where

1739: %

1740: \begin{equation}

1741: E_{ML,SW, x}(\Rx,  \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}

1742: E_x^{ML}(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}

1743: \frac{1}{1-\gamma} E_y^{ML}(\Rx, \Ry, \gamma) \Bigg\}.\nonumber

1744: \end{equation}

1745:

1746:

1747: (ii) For all $E < E_{ML,SW,y}(\Rx, \Ry)$ there is a constant $K > 0$

1748: such that $\Pr[\rvyhat^{n-\delay} \neq \rvy^{n-\delay}] \ \leq K

1749: \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where

1750: %

1751: \begin{equation}

1752: E_{ML,SW, y}(\Rx,  \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}

1753: \frac{1}{1-\gamma} E_x^{ML}(\Rx, \Ry, \gamma), \inf_{\gamma \in

1754: [0,1]} E_y^{ML}(\Rx, \Ry, \gamma) \Bigg\}.\nonumber

1755: \end{equation}

1756:

1757: (iii) For all $E < E_{ML,SW,xy}(\Rx, \Ry)$ there is a constant $K >

1758: 0$ such that $\Pr[(\rvxhat^{n-\delay}, \rvyhat^{n-\delay}) \neq

1759: (\rvx^{n-\delay}, \rvy^{n-\delay})] \ \leq K \exp\{- \delay E\}$ for

1760: all $n, \delay \geq 0$ where

1761: %

1762: \begin{equation}

1763: E_{ML,SW, xy}(\Rx,  \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}

1764: E_x^{ML}(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]} E_y^{ML}(\Rx,

1765: \Ry, \gamma) \Bigg\}.\nonumber

1766: \end{equation}

1767:

1768:

1769: In definitions (i)--(iii),

1770: %

1771: \begin{equation}

1772: \begin{array}{lll}

1773: E_x^{ML}(\Rx, \Ry, \gamma) & = & \sup_{\rho \in [0,1]} [ \gamma

1774: E_{x|y}(\Rx, \rho) + (1-\gamma) E_{xy}(\Rx, \Ry, \rho)]

1775: \vspace{1ex} \\

1776: %

1777: E_y^{ML}(\Rx, \Ry, \gamma) & = & \sup_{\rho \in [0,1]} [ \gamma

1778: E_{y|x}(\Rx, \rho) + (1-\gamma) E_{xy}(\Rx, \Ry, \rho)]

1779: \end{array} \label{eq.compoundExp}

1780: \end{equation}

1781: %

1782: and

1783: %

1784: \begin{equation}

1785: \begin{array}{lll}

1786: E_{xy}(\Rx, \Ry, \rho) & = & \rho (\Rx + \Ry) - \log \Big[ \sum_{\svx, \svy}

1787: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1 + \rho} \vspace{1ex}\\

1788: %

1789: E_{x|y}(\Rx, \rho) & =  & \rho \Rx - \log \Big[ \sum_{\svy}

1790: \Big[ \sum_{\svx}

1791: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1+\rho} \Big]\vspace{1ex}\\

1792: %

1793: E_{y|x}(\Ry, \rho) & = & \rho \Ry - \log \Big[ \sum_{\svx} \Big[

1794: \sum_{\svy} p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}}

1795: \Big]^{1+\rho} \Big] \vspace{1ex}

1796: \end{array}\label{eq.defBasicExp}

1797: \end{equation}

1798: %

1799: \end{thm}

1800:

1801: \begin{thm} \label{thm.jointCode}

1802:

1803:   Let $(\Rx, \Ry)$ be a rate pair such that $\Rx > H(\rvx|\rvy)$, $\Ry

1804:   > H(\rvy|\rvx)$, $\Rx + \Ry > H(\rvx, \rvy)$.  Then, there exists a

1805:   randomized encoder pair and universal decoder triplet (per

1806:   Definition~\ref{def.seqn_coding}) that satisfies the following three

1807:   decoding criteria.

1808:

1809:   (i) For all $E < E_{UN,SW,x}(\Rx, \Ry)$, there is a constant $K >

1810:   0$ such that $\Pr[ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] \leq K

1811:   \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where

1812: %

1813: \begin{equation}

1814: E_{UN,SW, x}(\Rx,  \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}

1815: E_x^{UN}(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}

1816: \frac{1}{1-\gamma} E_y^{UN}(\Rx, \Ry, \gamma) \Bigg\}.

1817: \end{equation}

1818:

1819:

1820: (ii) For all $E < E_{UN,SW,y}(\Rx, \Ry)$, there is a constant $K >

1821: 0$ such that $\Pr[ \rvyhat^{n-\delay} \neq \rvy^{n-\delay}] \leq K

1822: \exp\{- \delay E\}$ for all $n, \delay \geq 0$ where

1823: %

1824: \begin{equation}

1825: E_{UN,SW, y}(\Rx,  \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}

1826: \frac{1}{1-\gamma} E_x^{UN}(\Rx, \Ry, \gamma), \inf_{\gamma \in

1827: [0,1]} E_y^{UN}(\Rx, \Ry, \gamma) \Bigg\}.

1828: \end{equation}

1829:

1830: (iii) For all $E < E_{UN,SW,xy}(\Rx, \Ry)$, there is a constant $K >

1831: 0$ such that $\Pr[ (\rvxhat^{n-\delay}, \rvxhat^{n-\delay}) \neq

1832: (\rvx^{n-\delay}, \rvy^{n-\delay})] \leq K \exp\{- \delay E\}$ for

1833: all $n, \delay \geq 0$ where

1834: %

1835: \begin{equation}

1836: E_{UN,SW, xy}(\Rx,  \Ry) = \min \Bigg\{ \inf_{\gamma \in [0,1]}

1837:  E_x^{UN}(\Rx, \Ry, \gamma), \inf_{\gamma \in

1838: [0,1]} E_y^{UN}(\Rx, \Ry, \gamma) \Bigg\}.

1839: \label{eq.expSWUnivJoint}

1840: \end{equation}

1841:

1842: In definitions (i)--(iii),

1843:  %%

1844: \begin{align}

1845: & E_{x}^{UN}(\Rx, \Ry, \gamma) \nonumber \\

1846: & = \inf_{\tiny \rvxtil, \rvytil,

1847: \rvxBar, \rvyBar} \gamma D(p_{\rvxtil, \rvytil} \| \PxyRV) +

1848: (1-\gamma) D(p_{\rvxBar, \rvyBar} \| \PxyRV)

1849: %

1850: + \left|\gamma [\Rx - H(\rvxtil | \rvytil )]

1851: + (1-\gamma)  [\Rx + \Ry - H(\rvxBar, \rvyBar)]\right|^{+}

1852: \nonumber \\

1853: %

1854: & E_{y}^{UN}(\Rx, \Ry, \gamma) \nonumber \\

1855: & = \inf_{\tiny \rvxtil, \rvytil,

1856: \rvxBar, \rvyBar} \gamma D(p_{\rvxtil, \rvytil}\| \PxyRV) +

1857: (1-\gamma) D(p_{\rvxBar, \rvyBar}  \| \PxyRV)

1858: %

1859: + \left|\gamma [\Ry - H(\rvytil|\rvxtil)]

1860: + (1-\gamma) [\Rx + \Ry - H(\rvxBar, \rvyBar)]\right|^{+}

1861: \end{align}

1862: %

1863: where the random variables $(\rvxtil, \rvytil)$ and $(\rvxBar,

1864: \rvyBar)$ have joint distributions $p_{\rvxtil, \rvytil}$ and

1865: $p_{\rvxBar, \rvyBar}$, respectively.  The function $|z|^{+} = z$ if

1866: $z \geq 0$ and $|z|^{+} = 0$ if $z < 0$.

1867: \end{thm}

1868:

1869:

1870: {\em Remark:} Definitions (i) and (ii) in

1871: Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode} concern

1872: individual decoding error events which might be useful in applications

1873: where the $\rvbx$ and $\rvby$ streams are decoded jointly, but

1874: utilized individually.  The more standard joint error event is given

1875: by (iii).

1876:

1877: {\em Remark:} We can compare the joint error event for block and

1878: streaming Slepian-Wolf coding, c.f.~(\ref{eq.expSWUnivJoint})

1879: with~(\ref{eq.SWblockLowBnd}).  The streaming exponent differs by the

1880: extra parameter $\gamma$ that must be minimized over. If the

1881: minimizing $\gamma = 1$, then the block and streaming exponents are

1882: the same. The minimization over $\gamma$ results from a fundamental

1883: difference in the types of error-causing events that can occur in

1884: streaming Slepian-Wolf as compared to block Slepian-Wolf.

1885:

1886: {\em Remark:} The error exponents of maximum likelihood and universal

1887: decoding in Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode} are

1888: the same.  However, because there are new classes of error events

1889: possible in streaming, this needs proof. The equivalence is

1890: summarized in the following theorem.

1891:

1892: \begin{thm}  \label{THM:Universal_ML_SW}

1893:   Let $(\Rx, \Rx)$ be a rate pair such that $\Rx > H(\rvx|\rvy)$, $\Ry

1894:   > H(\rvy|\rvx)$, and $\Rx + \Ry > H(\rvx, \rvy)$.  Then,

1895: %

1896: \begin{equation}

1897: E_{ML, SW, x}(\Rx, \Ry) = E_{UN, SW, x}(\Rx, \Ry),

1898: \end{equation}

1899: %

1900: and

1901: %

1902: \begin{equation}

1903: E_{ML, SW, x}(\Rx, \Ry) = E_{UN, SW, x}(\Rx, \Ry).

1904: \end{equation}

1905: \end{thm}

1906:

1907: Theorem~\ref{THM:Universal_ML_SW} follows directly from the

1908: following lemma, shown in the  appendix.

1909: %

1910: \begin{lemma}

1911: For all $\gamma \in [0,1]$

1912: %

1913: \begin{equation}

1914: E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma),

1915: \end{equation}

1916: %

1917: and

1918: %

1919: \begin{equation}

1920: E^{ML}_y(R_x,R_y,\gamma)=E^{UN}_y(R_x,R_y,\gamma).

1921: \end{equation}.

1922: \end{lemma}

1923:

1924: {\em Remark:} This theorem allows us to simplify notation.  For

1925: example, we can define $E_x(R_x,R_y,\gamma)$ as

1926: $E_x(R_x,R_y,\gamma)=E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$,

1927: and can similarly define $E_y(R_x,R_y,\gamma)$.  Further, since the

1928: ML and universal exponents are the same for the whole rate region we

1929: can define $E_{SW,x}(\Rx, \Ry)$ as $E_{SW,x}(\Rx, \Ry) =

1930: E_{ML,SW,x}(\Rx, \Ry) = E_{UN,SW,x}(\Rx, \Ry)$, and can similarly

1931: define $E_{SW,y}(R_x,R_y)$.

1932:

1933:

1934: \section{Numerical Results}\label{sec.numerical} To build insight

1935: into the differences between the sequential error exponents of Theorem

1936: \ref{thm.entCodeML} - \ref{THM:Universal_ML_SW} and block-coding error

1937: exponents, we give some examples of the exponents for binary sources.

1938:

1939: For the point-to-point case, the error exponents of random sequential

1940: and block source coding are identical everywhere in the achievable

1941: rate region as can be seen by comparing Theorem~\ref{thm.entCodeUniv}

1942: and Corollary~\ref{thm.blockEnt}. The same is true for source coding

1943: with decoder side information (cf.~Theorem~\ref{thm.decSIUniv} and

1944: Corollary~\ref{thm.blockSI}). For distributed Slepian-Wolf source

1945: coding however, the sequential and block error exponents can be

1946: different. The reason for the discrepancy is that a new type of error

1947: event can be dominant in Slepian-Wolf source coding. This is reflected

1948: in Theorem~\ref{thm.jointCodeML} by the minimization over $\gamma$.

1949: Example $2$ illustrates the impact of this $\gamma$ term.

1950:

1951: For Slepian-Wolf source coding at very high rates, where $\Rx >

1952: H(\rvx)$, the decoder can ignore any information from encoder $y$ and

1953: still decode $x$ with with a positive error exponent.  However, the

1954: decoder could also choose to decode source $x$ and $y$ jointly.

1955: Fig~\ref{fig.numerical1}.a and \ref{fig.numerical1}.b illustrate that

1956: joint decoding may or surprisingly {\em may not} help decoding source

1957: $x$. This is seen by comparing the error exponent when the decoder

1958: ignores the side information from encoder $y$ (the dotted curves) to

1959: the joint error exponent (the lower solid curves). It seems that when

1960: the rate for source $y$ is low, atypical behaviors of source $y$ can

1961: cause joint decoding errors that end up corrupting $x$ estimates.

1962: This holds for both block and sequential coding.

1963:

1964:

1965: \subsection{Example 1: symmetric source with uniform marginals}

1966:

1967:

1968: \begin{figure}

1969: \begin{center}

1970: \begin{picture}(100,70)

1971: \put(40, 10){\vector(1,0){55}} \put(40, 10){\vector(0,1){55}}

1972: \put(40,70){$\Ry$} \put(97,10){$\Rx$}

1973: %\put(29,31){$H(\rvy| \rvx)$}

1974: %\put(56, 5){$H(\rvx| \rvy)$} \put(30,60){$\log(2)$}

1975: %\put(87,5){$\log(2)$}

1976: \put(90,60){\line(0,-1){27}} \put(90,33){\line(-1,1){27}}

1977: \put(90,60){\line(-1,0){27}} \multiput(40,45)(2,0){25}{$.$}

1978: \put(30,45 ){$0.49$   } \multiput(40,59)(2,0){25}{$.$}  \put(30,59

1979: ){$0.67$ } \put(72, 52){Achievable } \put(78, 48){Region } \put(57,

1980: 35){$\Rx+\Ry=H(\rvx,\rvy)$} \put(72, 37){\vector(1,1){6.5}}

1981: \end{picture}

1982: \caption{Rate region for the example 1 source, we focus on the error

1983: exponent on source $x$ for fixed encoder $y$ rates: $R_y=0.49$

1984: and $R_y=0.67$ } \label{fig.SWregion1}

1985: \end{center}

1986: \end{figure}

1987:

1988: Consider a symmetric source where $|\mathcal{X}|=|\mathcal{Y}|=2$,

1989: $p_{\rvx\rvy}(0,0)=0.45$, $p_{\rvx\rvy}(0,1)= p_{\rvx\rvy}(1,0)=0.05$

1990: and $p_{\rvx\rvy}(1,1)=0.45$. This is a marginally-uniform source:

1991: $\rvx$ is Bernoulli(1/2), $\rvy$ is the output from a BSC with input

1992: $\rvx$, thus $\rvy$ is Bernoulli(1/2) as well. For this source

1993: $H(\rvx)=H(\rvy)=\log(2)$, $H(\rvx|\rvy)=H(\rvy|\rvx)=0.32$,

1994: $H(\rvx,\rvy)=1.02$. The achievable rate region is the triangle shown

1995: in Figure(\ref{fig.SWregion1}).

1996:

1997:

1998: For this source, as will be shown later, the dominant sequential

1999: error event is on the diagonal line in Fig~\ref{fig.twoD2}. This is

2000: to say that:

2001: %

2002: \begin{equation}

2003: E_{SW,x}(\Rx, \Ry)= E_{SW,x}^{BLOCK}(\Rx, \Ry)=  E^{ML}_x(\Rx, \Ry,

2004: 0) = \sup_{\rho \in [0,1]} [ E_{xy}(\Rx, \Ry, \rho)].

2005: \end{equation}

2006:

2007: Where $E_{SW,x}^{BLOCK}(\Rx, \Ry)=\min\{E^{ML}_x(\Rx, \Ry,

2008: 0),E^{ML}_x(\Rx, \Ry, 1)\} $ as shown in \cite{gallagerTech:76}.

2009:

2010: Similarly for source $y$:

2011: %

2012: \begin{equation}

2013: E_{SW,y}(\Rx, \Ry)= E_{SW,y}^{BLOCK}(\Rx, \Ry)=  E^{ML}_y(\Rx, \Ry,

2014: 0) = \sup_{\rho \in [0,1]} [ E_{xy}(\Rx, \Ry, \rho)].

2015: \end{equation}

2016:

2017: We   first show that for this source $\forall \rho\geq 0$, $

2018: E_{x|y}(\Rx, \rho) \geq  E_{xy}(\Rx, \Ry, \rho)$. By definition:

2019: %

2020: \begin{eqnarray}

2021: E_{x|y}(\Rx, \rho)- E_{xy}(\Rx, \Ry, \rho) & = & \rho \Rx - \log

2022: \Big[ \sum_{\svy} \Big[ \sum_{\svx}

2023: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1+\rho}

2024: \Big]\nonumber\\

2025: %

2026: %

2027: %

2028: &&-\Big(\rho (\Rx + \Ry) - \log \Big[ \sum_{\svx, \svy}

2029: p_{\rvx\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1 + \rho}

2030: \Big)\nonumber\\

2031: %

2032: %

2033: & = & -\rho \Ry - \log \Big[ 2 \Big[ \sum_{\svx}

2034: p_{\rvx\rvy}(\svx,0)^{\frac{1}{1+\rho}} \Big]^{1+\rho} \Big] + \log

2035: \Big[ 2\sum_{\svx }

2036: p_{\rvx\rvy}(\svx,0)^{\frac{1}{1+\rho}} \Big]^{1 + \rho}  \nonumber\\

2037: %

2038: %

2039: %

2040: & = & -\rho \Ry - \log \Big[ 2   \Big] + \log \Big[ 2 \Big]^{1 + \rho}  \nonumber\\

2041: %

2042: %

2043: & =& \rho (\log [2] -\Ry)\nonumber\\

2044: & \geq & 0\nonumber

2045: \end{eqnarray}

2046:

2047: The last inequality is true because we only consider the problem when

2048: $\Ry \leq \log|\mathcal{Y}|$. Otherwise, $y$ is better viewed as

2049: perfectly known side-information. Now

2050:

2051: \begin{eqnarray}

2052: E^{ML}_x(\Rx, \Ry, \gamma) &=& \sup_{\rho \in [0,1]} [ \gamma

2053: E_{x|y}(\Rx, \rho) + (1-\gamma) E_{xy}(\Rx, \Ry, \rho)]\nonumber\\

2054: %

2055: &\geq & \sup_{\rho \in [0,1]} [   E_{xy}(\Rx, \Ry, \rho)]\nonumber\\

2056: %

2057: &=&   E^{ML}_x(\Rx, \Ry, 0) \nonumber

2058: \end{eqnarray}

2059:

2060: Similarly $E^{ML}_y(\Rx, \Ry, \gamma) \geq   E^{ML}_y(\Rx, \Ry, 0)=

2061: E^{ML}_x(\Rx, \Ry, 0)$. Finally,

2062: %

2063: \begin{eqnarray}

2064: E_{SW, x}(\Rx,  \Ry)& =& \min \Bigg\{ \inf_{\gamma \in [0,1]}

2065: E_x(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}

2066: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma) \Bigg\}\nonumber\\

2067: &=& E^{ML}_x(\Rx, \Ry, 0)\nonumber

2068: \end{eqnarray}

2069:

2070: Particularly $E_x(\Rx, \Ry, 1) \geq E_x(\Rx, \Ry, 0)$, so

2071: %

2072: \begin{eqnarray}

2073: E_{SW,x}^{BLOCK}(\Rx, \Ry) &=& \min\{E^{ML}_x(\Rx, \Ry,

2074: 0),E^{ML}_x(\Rx, \Ry, 1)\}\nonumber\\

2075: &=& E^{ML}_x(\Rx, \Ry, 0)\nonumber

2076: \end{eqnarray}

2077: %

2078: The same proof holds for source $y$.

2079:

2080:

2081:

2082: In Fig~\ref{fig.numerical3} we plot the joint sequential/block coding

2083: error exponents $E_{SW,x}(\Rx, \Ry)=E_{SW,x}^{BLOCK}(\Rx, \Ry)$, the

2084: error exponents are positive iff $\Rx> H(\rvx\rvy)-\Ry=1.02-\Ry$.

2085:

2086:

2087: \begin{figure}[htbp]

2088: \begin{center}

2089: \leavevmode

2090: \includegraphics[width=100mm]{simu3}

2091: \caption[]{ Error exponents plot:  $E_{SW,x}(\Rx, \Ry)$ plotted for $R_y=0.49$ and $R_y=0.67$\\

2092: $E_{SW,x}(\Rx, \Ry)= E_{SW ,x}^{BLOCK}(\Rx,

2093:   \Ry)=E_{SW,y}(\Rx, \Ry)= E_{SW,y}^{BLOCK}(\Rx, \Ry)$ and

2094:   $E_{x}(\Rx)=0$  }

2095: \label{fig.numerical3}

2096: \end{center}

2097: \end{figure}

2098:

2099:

2100: \subsection{Example 2: non-symmetric source}

2101:

2102: Consider a non-symmetric source where $|\mathcal{X}|=|\mathcal{Y}|=2$,

2103: $p_{\rvx\rvy}(0,0)=0.1$, $p_{\rvx\rvy}(0,1)= p_{\rvx\rvy}(1,0)=0.05$

2104: and $p_{\rvx\rvy}(1,1)=0.8$.  For this source $H(\rvx)=H(\rvy)=0.42$,

2105: $H(\rvx|\rvy)=H(\rvy|\rvx)=0.29$ and $H(\rvx,\rvy)=0.71$. The

2106: achievable rate region is shown in Fig~\ref{fig.SWregion}.  In

2107: Fig~\ref{fig.numerical1}.a, \ref{fig.numerical1}.b,

2108: \ref{fig.numerical1}.c and \ref{fig.numerical1}.d, we compare the

2109: joint sequential error exponent $E_{SW,x}(\Rx, \Ry)$ the joint block

2110: coding error exponent $E_{SW,x}^{BLOCK}(\Rx, \Ry)=\min\{E_x(\Rx, \Ry,

2111: 0),E_x(\Rx, \Ry, 1)\} $ as shown in \cite{gallagerTech:76} and the

2112: individual error exponent for source $X$, $E_{x}(\Rx)$ as shown in

2113: Corollary~\ref{thm.blockEnt}. Notice that $E_{x}(\Rx)>0$ only if $\Rx>

2114: H(\rvx)$. In Fig~\ref{fig.numerical2}, we compare the sequential error

2115: exponent for source $y$: $E_{SW,y}(\Rx, \Ry)$ and the block coding

2116: error exponent for source $y$: $E_{SW,y}^{BLOCK}(\Rx, \Ry)

2117: =\min\{E_y(\Rx, \Ry, 0),E_y(\Rx, \Ry, 1)\}$ and $E_{y}(\Ry)$ which is

2118: a constant since we fix $\Ry$.

2119:

2120: For $\Ry=0.35$ as shown in Fig~\ref{fig.numerical1}.a.b and

2121: \ref{fig.numerical2}.a.b, the difference between the block coding and

2122: sequential coding error exponents is very small for both source $x$

2123: and $y$. More interestingly, as shown in Fig~\ref{fig.numerical1}.a,

2124: because the rate of source $y$ is low, i.e. it is more likely to get a

2125: decoding error due to the atypical behavior of source $y$. So as $\Rx$

2126: increases, it is sometimes better to ignore source $y$ and decode $x$

2127: individually. This is evident as the dotted curve is above the solid

2128: curves.

2129:

2130:

2131: For $\Ry=0.49$ as shown in Fig~\ref{fig.numerical1}.c.d and

2132: \ref{fig.numerical2}.c.d, since the rate for source $y$ is high

2133: enough, source $y$ can be decoded with a positive error exponent

2134: individually as shown in Fig~\ref{fig.numerical2}.c. But as the rate

2135: of source $x$ increases, joint decoding gives a better error exponent.

2136: When $\Rx$ is very high, then we observe the saturation of the error

2137: exponent on $y$ as if source $x$ is known perfectly to the decoder!

2138: This is illustrated by the flat part of the solid curves in

2139: Fig~\ref{fig.numerical2}.c.

2140:

2141: \begin{figure}

2142: \begin{center}

2143: \begin{picture}(100,70)

2144: \put(40, 10){\vector(1,0){55}} \put(40, 10){\vector(0,1){55}}

2145: \put(40,70){$\Ry$} \put(97,10){$\Rx$}

2146: %\put(30,40){$H(\rvy)$}

2147: %\put(29,31){$H(\rvy| \rvx)$} \put(68,5){$H(\rvx)$}

2148: % \put(56,5){$H(\rvx| \rvy)$} \put(30,60){$\log(2)$} \put(87,5){$\log(2)$}

2149: \put(90,60){\line(0,-1){29}} \put(90,31){\line(-1,0){20}}

2150: \put(70,31){\line(-1, 1){9}} \put(90,60){\line(-1,0){29}}

2151: \put(61,60){\line(0,-1){20}}

2152: % \put(40,32 ){\line(1,0){50}} (5,0){12}{\multiput(0,0)(0,5){12}{\circle*{1.5}}}

2153: \multiput(40,35)(2,0){25}{$.$}

2154: % \put(40,35 ){\line(1,0){50}}

2155: %  \put(40,42 ){\line(1,0){50}}

2156: %    \put(40,45 ){\line(1,0){50}}

2157: \multiput(40,45)(2,0){25}{$.$}

2158: %   \put(40,55 ){\line(1,0){50}}

2159: %\put(92,32 ){$\Ry=0.44$, Fig. \ref{fig.numerical1}}

2160: \put(30,35 ){$0.35$   }

2161: % \put(92,42 ){$\Ry=0.64$, Fig. \ref{fig.numerical3}}

2162: \put(30,45 ){$0.49$ }

2163: %  \put(92,55 ){$\Ry=0.90$, Fig. \ref{fig.numerical5}}

2164: \put(64, 52){Achievable} \put(66, 48){Region}

2165:

2166: \put(47, 25){$\Rx+\Ry=H(\rvx,\rvy)$} \put(60, 28){\vector(1,1){6.5}}

2167: \end{picture}

2168: \caption{ Rate region for the example 2 source, we focus on the error

2169: exponent on source $x$ for  fixed encoder $y$ rates: $R_y=0.35$

2170: and $R_y=0.49$  } \label{fig.SWregion}

2171: \end{center}

2172: \end{figure}

2173:

2174:

2175:

2176: \begin{figure}[htbp]

2177: \begin{center}

2178: \leavevmode

2179: \includegraphics[width=140mm]{simu1}

2180: \caption[]{ Error exponents plot for source $x$ for fixed $\Ry$ as $\Rx$ varies:\\

2181: $\Ry=0.35$:\\(a) Solid curve: $E_{SW,x}(\Rx, \Ry)$, dashed curve

2182:   $ E_{SW,x}^{BLOCK}(\Rx, \Ry)$ and dotted

2183:   curve: $E_{x}(\Rx)$,  notice that $E_{SW,x}(\Rx, \Ry)\leq

2184:   E_{SW,x}^{BLOCK}(\Rx, \Ry)$ but the difference is  small.\\(b) $10

2185:   \log_{10}(\frac{E_{SW,x}^{BLOCK}(\Rx, \Ry)}{E_{SW,x}(\Rx, \Ry)})$. This shows the difference is there at high rates.\\

2186:   $\Ry=0.49$:\\(c) Solid curve $E_{SW,x}(\Rx, \Ry)$, dashed

2187:   curve $ E_{SW,x}^{BLOCK}(\Rx, \Ry)$ and

2188:   dotted curve: $E_{x}(\Rx)$, again $E_{SW,x}(\Rx, \Ry)\leq

2189:   E_{SW,x}^{BLOCK}(\Rx, \Ry)$ but the difference is extremely small.\\(d) $10

2190:   \log_{10}(\frac{E_{SW,x}^{BLOCK}(\Rx, \Ry)}{E_{SW,x}(\Rx, \Ry)})$. This shows the difference is there at intermediate low rates. }

2191: \label{fig.numerical1}

2192: \end{center}

2193: \end{figure}

2194:

2195:

2196:

2197: \begin{figure}[htbp]

2198: \begin{center}

2199: \leavevmode

2200: \includegraphics[width=140mm]{simu2}

2201: \caption[]{ Error exponents plot for source $y$ for fixed $\Ry$ as $\Rx$ varies:\\

2202: $\Ry=0.35$:

2203:

2204: (a) Solid curve:  $E_{SW,y}(\Rx, \Ry)$

2205:   and dashed curve $E_{SW,y}^{BLOCK}(\Rx, \Ry)$, $E_{SW,y}(\Rx, \Ry)\leq

2206:   E_{SW,y}^{BLOCK}(\Rx, \Ry)$, the difference is extremely small.

2207:   $E_{y}(\Ry)$ is $0$ because $R_y=0.35< H(\rvy)$. (b) $ 10 \log_{10}(\frac{E_{SW,y}^{BLOCK}(\Rx,

2208:     \Ry)}{E_{SW,y}(\Rx, \Ry)})$. This shows the two exponents are not identical everywhere. \\$\Ry=0.49$:\\(c) Solid curves:

2209:   $E_{SW,y}(\Rx, \Ry)$, dashed curve $ E_{SW,y}^{BLOCK}(\Rx, \Ry)$ and  $E_{SW,y}(\Rx,

2210:   \Ry)\leq E_{SW,y}^{BLOCK}(\Rx, \Ry)$ and $E_{y}(\Ry)$ is constant

2211:   shown in a dotted line.\\(d) $ 10 \log_{10}(\frac{E_{SW,y}^{BLOCK}(\Rx,

2212:     \Ry)}{E_{SW,y}(\Rx, \Ry)})$. Notice how the gap goes to infinity when we leave the Slepian-Wolf region. }

2213: \label{fig.numerical2}

2214: \end{center}

2215: \end{figure}

2216:

2217:

2218: %%%%%%%%%%%%%%%%%%%%%

2219:

2220: \section{Streaming point-to-point coding via sequential random binning}

2221: \label{sec.entropy}

2222:

2223:

2224: In this section we prove Theorems~\ref{thm.entCodeML}

2225: and~\ref{thm.entCodeUniv}.  While the emphasis of the paper is on

2226: distributed source coding, the basic causal random binning ideas and

2227: analysis techniques can be more easily developed in the point-to-point

2228: context.

2229:

2230: %%%%%

2231: \subsection{Maximum-likelihood decoding}

2232: \label{sec.MLent}

2233:

2234: To show Theorems~\ref{thm.entCodeML} and~\ref{thm.entCodeUniv}, we

2235: first develop the common core of the proof in the context of ML

2236: decoding.  The proof strategy is as follows.  A decoding error can

2237: only occur if there is some spurious source sequence $\svxtil^n$ that

2238: satisfies three conditions: (i) it must be in the same bin (share the

2239: same parities) as $\svx^n$, i.e., $\svxtil^n \in \binX(\svx^n)$, (ii)

2240: it must be more likely than the true sequence, i.e.,

2241: $p_{\rvbx}(\svxtil^n) > p_{\rvbx}(\svx^n)$, and (iii) $\svxtil_{l}

2242: \neq \svx_{l}$ for some $l \leq n - \delay$.

2243:

2244: The error probability is

2245: %

2246: \begin{align}

2247: \Pr [ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] %%

2248: = &  \sum_{\svx^n} \Pr [\rvxhat^{n-\delay} \neq \svx^{n-\delay} |

2249: \rvx^n = \svx^n]

2250: p_\rvbx(\svx^n) \label{eq.condSS} \\

2251: %%

2252: %%

2253: %%

2254: = & \sum_{\svx^n} \sum_{l=1}^{n- \delay} \Pr \big[ \exists \;

2255: \svxtil^n \in \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \;

2256: \mbox{s.t.} \; p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n)  \big]

2257: p_\rvbx(\svx^n) \label{eq.decomp}\displaybreak[2]\\

2258: %%

2259: %%

2260: %%

2261: = & \sum_{l=1}^{n- \delay} \Big\{ \sum_{\svx^n}  \Pr \big[ \exists

2262: \; \svxtil^n \in \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n)

2263: \; \mbox{s.t.} \; p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n)  \big]

2264: p_\rvbx(\svx^n) \Big\} \nonumber \\

2265: %%

2266: %%

2267:  =&\sum_{l=1}^{n- \delay} p_n(l).

2268: \label{eq.sufDec}

2269: \end{align}

2270: %

2271: After conditioning on the realized source sequence

2272: in~(\ref{eq.condSS}), the remaining randomness is only in the binning.

2273: In~(\ref{eq.decomp}) we decompose the error event into a number of

2274: mutually exclusive events (see Fig~\ref{fig.oneD1}) by partitioning

2275: all source sequences $\svxtil^n$ into sets $\mathcal{F}_n(l,\svx^n)$

2276: defined by the time $l$ of the first sample in which they differ from

2277: the realized source $\svx^n$,

2278: %

2279: \begin{equation}

2280: \mathcal{F}_n(l,\svx^n) =\{\svxtil^n\in

2281: \mathcal{X}^n|\svxtil^{l-1} = \svx^{l-1}, \svxtil_{l} \neq

2282: \svx_{l}\}, \label{eq.partition}

2283: \end{equation}

2284: %

2285: and define $\mathcal{F}_n(n+1,\svx^n)=\{\svx^n\}$. Finally,

2286: in~(\ref{eq.sufDec}) we define

2287: %

2288: \begin{equation}

2289: p_n(l)= \sum_{\svx^n}  \Pr \big[ \exists

2290: \; \svxtil^n \in \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n)

2291: \; \mbox{s.t.} \; p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n)  \big]

2292: p_\rvbx(\svx^n).

2293: \label{eq.errTimeL}

2294: \end{equation}

2295:

2296: %

2297: \begin{figure}

2298: \setlength{\unitlength}{1mm}

2299:  \begin{picture}(100,20)

2300: \multiput(30,10)(5,0){14}{\circle*{1.5}}

2301: \multiput(30,10)(5,0){9}{\oval(2,3)}

2302:  \thicklines% \linethickness{0.5mm}

2303:  \put(75,10){\oval(2,3)}

2304:  \thinlines

2305:  \put(25,10){\vector(1,0){85}}

2306:  \put(115,9){$l$}

2307:  \put(29,5){$1$}

2308:  \put(94,5){$n$}  \put(70,5){$n-\Delta$}

2309:   \end{picture}

2310: \caption{Decoding error probability at $n-\delay$ can be union

2311: bounded by the sum of probabilities of first decoding error at $l$,

2312: $1\leq l\leq n-\delay$. The dominant error event $p_n(n-\delay)$ is

2313: the one in the highlighted oval(shortest delay).} \label{fig.oneD1}

2314: \end{figure}

2315:

2316:

2317: We now upper bound $p_n(l)$ using a Chernoff bound argument similar to

2318: \cite{gallagerTech:76}.

2319: %

2320: \begin{lemma}\label{Lemma.indivupperbound}

2321: %

2322: $p_n(l)\leq \exp\{-(n-l+1)\expML(\Rent)\}$.

2323: \end{lemma}

2324:

2325: \pf

2326: %

2327: \begin{align}

2328: p_n(l) =&\sum_{\svx^n} \Pr \big[ \exists \; \svxtil^n \in

2329: \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \; \mbox{s.t.} \;

2330: p_{\rvbx} (\svxtil^n) \geq p_{\rvbx}(\svx^n)  \big]

2331: p_\rvbx(\svx^n)  \nonumber\\

2332: %%

2333: \leq &  \sum_{\svx^n} \min \Big[1, \hspace{-1.5em} \sum_{\tiny

2334: \begin{array}{c} \svxtil^n\in \mathcal{F}_n(l,\svx^n) \mbox{s.t.}  \\ p_{\rvbx}

2335: (\svx^n)\leq p_{\rvbx} (\svxtil^n) \end{array}} \hspace{-1.5em} \Pr[

2336: \svxtil^n \in \binX(\svx^n)] \Big] p_\rvbx(\svx^n)

2337: \label{eq.setBoundViaScore} \displaybreak[2]\\

2338: %%

2339: %%

2340: = &  \sum_{\svx^{l-1}, \svx_{l}^n}

2341: %

2342: \min \Big[1, \hspace{-1em}

2343: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n \; \mbox{s.t.}  \\

2344: p_{\rvx }(\svx_{l}^n) < p_{\rvx }(\svxtil_{l}^n)

2345: \end{array}}

2346: %

2347: \exp\{-(n-l + 1) \Rent\} \Big] p_\rvbx(\svx^{l-1}) p_\rvbx(\svx_l^{n})

2348: \label{eq.randBin} \\

2349: %

2350: %

2351: = &  \sum_{\svx_{l}^n}

2352: %

2353: \min \Big[1, \hspace{-1em}

2354: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n \; \mbox{s.t.}  \\

2355: p_{\rvx }(\svx_{l}^n) < p_{\rvx }(\svxtil_{l}^n)

2356: \end{array}}

2357: %\hspace{-2em} \ldots \nonumber\\

2358: \exp\{-(n-l +1) \Rent\} \Big]

2359:  p_\rvbx(\svx_l^{n})\nonumber\\

2360:  %%

2361:  %%

2362: = &

2363: %

2364: \sum_{\svx_{l}^n}

2365: %

2366: \min \Big[ 1, \sum_{\tiny \svxtil_{l}^n } \ind[ p_{\rvx

2367: }(\svxtil_{l}^n) >  p_\rvbx(\svx_l^{n})] \exp\{-(n-l+1) \Rent\} \Big]

2368:  p_\rvbx(\svx_l^{n}) \label{eq.indicator}\\

2369: %

2370: %

2371: %

2372: \leq &  \sum_{\svx_{l}^n}

2373: %

2374: \min \left[1, \sum_{\tiny \svxtil_{l}^n } \min \left[ 1,

2375: \frac{p_{\rvx }(\svxtil_{l}^n)}{p_{\rvx }(\svx_{l}^n)} \right]

2376: \exp\{-(n-l +1) \Rent\} \right]

2377:  p_\rvbx(\svx_l^{n}) \nonumber \displaybreak[2]\\

2378: %

2379: %

2380: %

2381: \leq &  \sum_{\svx_{l}^n}

2382: %

2383: \left[ \sum_{\tiny \svxtil_{l}^n } \left[ \frac{p_{\rvx

2384: }(\svxtil_{l}^n)}{p_{\rvx }(\svx_{l}^n)} \right]^{\frac{1}{1+\rho}}

2385: \exp\{-(n-l+1) \Rent\} \right]^{\rho}

2386:  p_\rvbx(\svx_l^{n}) \label{eq.limOnRho} \displaybreak[2]\\

2387: %

2388: %

2389: %

2390: = &  \sum_{\svx_{l}^n}  p_\rvbx(\svx_l^{n})^{\frac{1}{1+\rho}}

2391: %

2392: \left[ \sum_{\tiny \svxtil_{l}^n } \left[ p_{\rvx }(\svxtil_{l}^n)

2393: \right]^{\frac{1}{1+\rho}}\right]^{\rho}

2394: \exp\{-(n-l +1) \rho \Rent\} \nonumber \displaybreak[2] \\

2395: %

2396: %

2397: %

2398: = &  \left[\sum_{\svx} \PxRV(x)^{\frac{1}{1+\rho}}\right]^{(n-l+1)}

2399: %

2400: \left[

2401: \sum_{\svx} \PxRV(\svx)^{\frac{1}{1+\rho}}\right]^{(n-l+1)\rho}

2402: \exp\{-(n-l +1) \rho \Rent\} \label{eq.iid} \displaybreak[2]\\

2403: %

2404: %

2405: %

2406: = &  \left[ \sum_{\svx}

2407: \PxRV(\svx)^{\frac{1}{1+\rho}}\right]^{(n-l+1)(1+\rho)}

2408: \exp\{-(n-l+1) \rho \Rent\} \nonumber\\

2409: %

2410: %

2411: = & \exp\left\{-(n-l+1) \left[\rho \Rent - (1+\rho) \ln

2412: \left(\sum_{\svx} \PxRV(\svx)^{\frac{1}{1+\rho}}\right) \right]

2413: \right\}. \label{eq.rhoBnd}

2414: \end{align}

2415:

2416: In~(\ref{eq.setBoundViaScore}) the union bound is applied.

2417: In~(\ref{eq.randBin}) we use the fact that after the first symbol in

2418: which two sequences differ, the remaining parity bits are independent,

2419: and the fact that only the likelihood of the differing suffixes

2420: matter.  That is, if $\svx^{l-1} = \svxtil^{l-1}$, then $p_{\rvbx}

2421: (\svx^n)< p_{\rvbx} (\svxtil^n)$ if and only if $p_{\rvbx}(\svx_{l}^n)

2422: < p_{\rvbx}(\svxtil_{l}^n)$.  In~(\ref{eq.indicator}) $\ind(\cdot)$ is

2423: the indicator function, taking the value one if the argument is true,

2424: and zero if it is false. We get~(\ref{eq.limOnRho}) by limiting $\rho$

2425: to the range $0 \leq \rho \leq 1$ since the arguments of the

2426: minimization are both positive and upper-bounded by one. We use the

2427: iid property of the source, exchanging sums and products to

2428: get~(\ref{eq.iid}).  The bound in~(\ref{eq.rhoBnd}) is true for all

2429: $\rho$ in the range $0 \leq \rho \leq 1$. Maximizing~(\ref{eq.rhoBnd})

2430: over $\rho$ gives $p_n(l)\leq \exp\{-(n-l+1)\expML(\Rent)\}$ where

2431: $\expML(\Rent)\}$ is defined in Theorem~\ref{thm.entCodeML}, in

2432: particular~(\ref{eq.errExpML}). \hfill$\blacksquare$

2433: %

2434:

2435: Using Lemma~\ref{Lemma.indivupperbound} in~(\ref{eq.sufDec}) gives

2436: %

2437: \begin{align}

2438: \Pr [ \rvxhat^{n-\delay} \neq \rvx^{n-\delay}] %%

2439: \leq & \sum_{l=1}^{n-\delay} \exp\{- (n-l+1) E_{ML}(\Rent)\}

2440: \label{eq.delayTerm}\\

2441: %

2442: = & \sum_{l=1}^{n-\delay} \exp\{- (n-l+1-\delay) E_{ML}(\Rent)\}

2443: \exp\{- \delay E_{ML}(\Rent)\} \nonumber \\

2444: %

2445: \leq & K_0 \exp\{- \delay E_{ML}(\Rent)\} \label{eq.pullOutExp}

2446: \end{align}

2447: %

2448: In~(\ref{eq.pullOutExp}) we pull out the exponent in $\delay$.  The

2449: remaining summation is a sum over decaying exponentials, can thus

2450: can be bounded by some constant $K_0$. This proves Theorem~\ref{thm.entCodeML}.

2451:

2452:

2453: \subsection{Error events and sequential decoding}

2454: \label{sec.entMLseq}

2455:

2456:

2457: To better understand the dominant error event in the

2458: sum~(\ref{eq.delayTerm}), consider constructing the ML estimate in a

2459: symbol-by-symbol sequential manner.  The decoder starts by first

2460: identifying as candidates those sequences whose parities match the

2461: received bit stream up to time $n$.  If the encoder observes the

2462: length-$n$ sequence $\rvbx = \svbx$, this is $\{ \svbxBar \;

2463: \mbox{s.t.} \; \svbxBar \in \binX(\svbx)\}$.  The $l$th symbol of the

2464: estimate, $\rvxhat_l$, is defined as

2465: %

2466: \begin{equation}

2467: \svxhat_l = \svw_l \;\;\; \mbox{where} \;\;\;

2468: \svbw = \argmax_{\svbxBar \in \binX(\svbx) \;\; \mbox{s.t.} \;\;

2469: \svxBar^{l-1} = \svxhat^{l-1}} p_{\rvx_{l}^n}(\svxBar_{l}^n).

2470: \label{eq.defSeqDec}

2471: \end{equation}

2472: %

2473: The estimate thus produced is the maximum likelihood estimate because

2474: the decision regarding which pair of sequences is more likely depends

2475: only on which one's suffix is more likely.

2476:

2477: This is a decision-directed decoder. Semi-hard\footnote{Decisions are

2478:   only ``hard'' for computational time. As soon as the next set of

2479:   parities arrive and real-time advances, all the computations are

2480:   done again.} estimate are made sequentially for each symbol.  These

2481: estimates are then fixed, and taken as true when estimating subsequent

2482: symbols.  Each such hard-decision is analogous to a classic

2483: block-coding Slepian-Wolf problem. This is because we only need to

2484: decide between sequences that start to differ in the symbol we are

2485: trying to estimate---previous symbols have been fixed, and subsequent

2486: symbols are not yet in question.  Thus, all sequences that could lead

2487: to different estimates of symbol $l$ are binned independently for the

2488: remainder of the block.  This is why the error exponent we derive

2489: in~(\ref{eq.pullOutExp}) equals Gallager's block coding

2490: exponent~\cite{gallagerTech:76}.  Since the error exponent for each

2491: block-decoding problem is the same, the dominant error event is the

2492: hard-decision with the shortest block-length.  This symbol is the last

2493: symbol we need to estimate.  Its block-length equals the estimation

2494: delay $\delay$. We revisit this story in Section~\ref{sec.SW} when we

2495: consider Slepian-Wolf coding.  In that context the dominant error

2496: event has some features that do not arise in block coding.

2497:

2498:

2499: %%%%%%%%%%%

2500: \subsection{Universal decoding}

2501: \label{sec.univEnt}

2502:

2503: In this section we prove Theorem~\ref{thm.entCodeUniv}.  We use the

2504: sequential decoder introduced in Section~\ref{sec.entMLseq}, but with

2505: minimum-entropy, rather than maximum-likelihood, decoding.  That is,

2506: %

2507: \begin{equation}

2508: \svxhat_l = \svw_l[l] \;\;\; \mbox{where} \;\;\; \svw^n[l] =

2509: \argmin_{\svxBar^n \in \binX(\svx^n) \;\; \mbox{s.t.} \;\;

2510: \svxBar^{l-1} = \svxhat^{l-1}} H(\svxBar_{l}^n).

2511: \label{eq.defSeqUniv}

2512: \end{equation}

2513: %

2514: We term this a minimum suffix-entropy decoder.  The reason for using

2515: this decoder instead of the standard minimum block-entropy decoder is

2516: that the block-entropy decoder has a polynomial term in $n$ (resulting

2517: from summing over the type classes) that multiplies the exponential

2518: decay in $\delay$.  For $n$ large, this polynomial can dominate.

2519: Using the minimum suffix-entropy decoder results in a polynomial term

2520: in $\delay$.

2521:

2522: With this decoder, errors can only occur if there is some sequence

2523: $\svxtil^n$ such that (i) $\svxtil^n \in \binX(\svx^n)$, (ii)

2524: $\rvxtil^{l-1} = \rvx^{l-1}$, and $\rvxtil_l \neq \rvx_l$, for some $l

2525: \leq n-\delay$, and (iii) the empirical suffix entropy of

2526: $\svxtil_l^n$ is such that $H(\rvxtil_{l}^n) < H(\svx_l^n)$.  Building

2527: on the common core of the

2528: achievability~(\ref{eq.condSS})--(\ref{eq.sufDec}) with the

2529: substitution of universal decoding in the place of maximum likelihood

2530: results in the following definition of $p_n(l)$ (cf.~(\ref{eq.pnUniv})

2531: with~(\ref{eq.errTimeL}),

2532:

2533: \begin{align}

2534: p_n(l)=\sum_{\svx^n}  \Pr \big[ \exists \; \svxtil^n \in

2535: \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \; \mbox{s.t.} \;

2536:  H(\svxtil_{l}^n) \leq H(\svx_{l}^n)  \big]

2537: p_\rvbx(\svx^n) \label{eq.pnUniv}

2538: \end{align}

2539:

2540:

2541: The following lemma gives a bound on $p_n(l)$.

2542: %

2543: \begin{lemma}\label{Lemma.indivUniv}

2544:   For minimum suffix-entropy decoding, $p_n(l)\leq (n-l+2)^{2|\cX|}

2545:   \exp\{-(n - l +1) E_{UN}(\Rent)\}.$

2546: \end{lemma}

2547:

2548: \pf We define $\PNL$ to be the type of length-$(n-l+1)$ sequence

2549: $x_{l}^n$, and $\tclass_{\PNL}$ to be the corresponding type class so

2550: that $x_{l}^n \in \tclass_{\PNL}$. Analogous definitions hold for

2551: $\PtilNL$ and $\tilde{x}_{l}^n$.  We rewrite the constraint

2552: $H(\svxtil_{l}^n) < H(\svxtil_{l}^n)$ as $H(\PtilNL) < H(\PNL)$.

2553: Thus,

2554: %

2555: \begin{align}

2556:  p_n(l)=&\sum_{\svx^n} \Pr \big[ \exists \; \svxtil^n \in

2557: \mathcal{B}_x(\svx^n)\cap \mathcal{F}_n(l, \svx^n) \; \mbox{s.t.} \;

2558:  H(\svxtil_{l}^n) \leq H(\svx_{l}^n)  \big]

2559: p_\rvbx(\svx^n)\nonumber\\

2560: %

2561: %

2562: \leq & %

2563: \sum_{\svx_{1}^n}

2564: %

2565: \min \Big[1, \hspace{-1em}

2566: \sum_{\tiny \begin{array}{c} \svxtil_{1}^n \in

2567: \mathcal{F}_n(l,\svx^n)\; \mbox{s.t.}  \\

2568: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \end{array}}

2569: %\hspace{-2em} \ldots \nonumber\\

2570:  \Pr[\svxtil_{1}^n\in \mathcal{B}_x(\svx_{1}^n)] \Big]

2571: p_\rvbx(\svx^n)\nonumber\\

2572: %

2573: %

2574: %

2575: %

2576: =& %

2577: \sum_{\svx_{1}^{l-1},\svx_{l}^{n}}

2578: %

2579: \min \Big[1, \hspace{-1em}

2580: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n  \; \mbox{s.t.}  \\

2581: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \end{array}}

2582: %\hspace{-2em} \ldots \nonumber\\

2583:  \exp\{-(n-l +1) \Rent\}  \Big] p_\rvbx(\svx^{l-1})p_\rvbx(\svx_l^{n})

2584: \nonumber\\

2585: %

2586: %

2587: %

2588: %

2589: = & %

2590: \sum_{\svx_{l}^n}

2591: %

2592: \min \Big[1, \hspace{-1em}

2593: \sum_{\tiny \begin{array}{c} \svxtil_{l}^n \; \mbox{s.t.}  \\

2594: H(\svxtil_{l}^n) \leq H(\svx_{l}^n) \end{array}}

2595: %\hspace{-2em} \ldots \nonumber\\

2596: \exp\{-(n-l +1) \Rent\} \Big] p_\rvbx(\svx_l^{n})\label{eq.nonBlock}\\

2597: %

2598: %

2599: =&

2600: %

2601: \sum_{\PNL}

2602: %

2603: \sum_{\tiny \svx_{l}^n \in \tclass_{\PNL}}

2604: %

2605: \min \Big[ 1, \hspace{-2em}

2606: \sum_{\tiny \begin{array}{c}\PtilNL \; \mbox{s.t.}\\

2607: H (\PtilNL) \leq H(\PNL) \end{array}}

2608: %\hspace{-2em} \ldots \nonumber \\

2609: %

2610: %&

2611: \sum_{\tilde{x}_{l}^n \in \tclass_{\PtilNL}} \exp\{-(n-l+1) \Rent\}

2612: \Big] p_\rvbx(\svx_l^{n})

2613: \label{eq.tildeType} \displaybreak[2]\\

2614: %%

2615: %%

2616: \leq &

2617: %

2618: \sum_{\PNL}

2619: %

2620: \sum_{\tiny \svx_{l+1}^n \in \tclass_{\PNL}}

2621: %

2622: \min \Big[ 1, (n-l+2)^{|\cX|}

2623: %\ldots \nonumber \\

2624: %

2625: %&

2626: \exp\{-(n-l) [\Rent - H(\PNL)]\} \Big]

2627: p_\rvbx(\svx_l^{n})\label{eq.entBnd} \displaybreak[2]\\

2628: %%

2629: %%

2630: \leq&    (n-l+2)^{|\cX|}

2631: %

2632: \sum_{\PNL} \sum_{\svx_{l}^n \in \tclass_{\PNL}}

2633: %

2634: \exp\{-(n-l+1) [ |\Rent \! - \! H(\PNL)|^{+} ]\} \nonumber\\

2635: &\hspace{1in} \exp\{-(n-l+1) [D(\PNL \| \PxRV) + H(\PNL)]\}

2636: %

2637: \label{eq.incExp} \displaybreak[2]\\

2638: %%

2639: %%

2640: %%

2641: \leq&  (n-l+2)^{|\cX|}

2642: %

2643: \sum_{\PNL}

2644: %

2645: \exp\{-(n-l+1)

2646: %\ldots \nonumber \\

2647: %& \hspace{1em}

2648: \inf_{q}[D(q \| \PxRV)  + |\Rent -

2649: H(q)|^{+}]\} \label{eq.optErrExp} \displaybreak[2]\\

2650: %%

2651: %%

2652: %%

2653: \leq&  (n-l+2)^{2|\cX|}

2654: %

2655: \exp\{-(n - l +1) E_{UN}(\Rent)\} \label{eq.defEr} \displaybreak[2]

2656: %%

2657: %%

2658: %%

2659: \end{align}

2660: %

2661: In going from~(\ref{eq.tildeType}) to~(\ref{eq.entBnd}) first note

2662: that the argument of the inner-most summation (over $\svxtil_{l}^n$)

2663: does not depend on $\svbx$.  We then use the following relations: (i)

2664: $\sum_{\svxtil_{l}^n \in \tclass_{\PtilNL}} = |\tclass_{\PtilNL}| \leq

2665: \exp\{(n-l+1) H(\PtilNL)\}$, which is a standard bound on the size of

2666: the type class, (ii) $H(\PtilNL) \leq H(\PNL)$ by the

2667: minimum-suffix-entropy decoding rule, and (iii) the polynomial bound

2668: on the number of types, $|\{\PtilNL\}| \leq (n-l+2)^{|\cX|}$.

2669: In~(\ref{eq.incExp}) we recall the function definition $|\cdot|^+

2670: \defeq \max\{0, \cdot\}$.  We pull the polynomial term out of the

2671: minimization and use $p_\rvbx(\svx_l^{n}) = \exp\{-(n-l+1) [ D(\PNL \|

2672: \PxRV) + H(\PNL)]\}$ for all $p_\rvbx(\svx_l^{n}) \in \tclass_{\PNL}$.

2673: It is also in~(\ref{eq.incExp}) that we see why we use a minimum

2674: suffix-entropy decoding rule instead of a minimum entropy decoding

2675: rule.  If we had not marginalized out over $\svx^{l-1}$ in

2676: ~(\ref{eq.nonBlock}) then we would have a polynomial term out front in

2677: terms of $n$ rather than $n-l$, which for large $n$ could dominate the

2678: exponential decay in $n-l$.  As the expression in~(\ref{eq.optErrExp})

2679: no longer depends on $\svx_{l}^n$, we simplify by using

2680: $|\tclass_{\PNL}| \leq \exp\{(n-l+1) H(\PNL)\}$.  In~(\ref{eq.defEr})

2681: we use the definition of the universal error exponent $E_{UN}(\Rent)$

2682: from~(\ref{eq.errExpUniv}) of Theorem~\ref{thm.entCodeUniv}, and the

2683: polynomial bound on the number of types.  \hfill $\blacksquare$

2684:

2685: Lemma~\ref{Lemma.indivUniv} and  $\Pr [   \rvxhat^{n-\delay} \neq

2686: \rvx^{n-\delay}]\leq \sum_{l=1}^{n-\delay} p_n(l)$ imply that:

2687: \begin{align}

2688: \Pr [   \rvxhat^{n - \delay} \neq \rvx^{n - \delay}]  \leq&

2689: \sum_{l=1}^{n-\delay}  (n-l+2)^{2|\cX|}

2690:  \exp\{-(n - l +1) E_{UN}(\Rent)\}\nonumber\\

2691: %%%

2692: %%%

2693: %%%

2694:  \leq&

2695: \sum_{l=1}^{n-\delay} K_1

2696: %

2697: \exp\{-(n -l + 1 ) [E_{UN}(\Rent) - \gamma]\}

2698: \label{eq.polyIntoExp} \displaybreak[2]\\

2699: %%

2700: %%

2701: \leq & K_2 \exp\{ - \delay [E_{UN}(\Rent) - \gamma] \}

2702: \label{eq.entErrExp}

2703: \end{align}

2704:  In~(\ref{eq.polyIntoExp}) we

2705: incorporate the polynomial into the exponent. Namely, for all $a

2706: >0$, $b>0$, there exists a $C$ such that $z^a \leq C \exp \{b (z

2707: -1)\}$ for all $z \geq 1$.

2708:

2709: We then   make explicit the delay-dependent term.  Pulling out the

2710: exponent in $\delay$, the remaining summation is a sum over decaying

2711: exponentials, and can be bounded by a constant. Together with $K_1$,

2712: this gives the constant $K_2$ in~(\ref{eq.entErrExp}). This proves

2713: Theorem~\ref{thm.entCodeUniv}.  Note that the $\gamma$

2714: in~(\ref{eq.entErrExp}) does not enter the optimization because

2715: $\gamma > 0$ can be picked equal to any constant.  The choice of

2716: $\gamma$ effects the constant $K$ in Theorem~\ref{thm.entCodeUniv}.

2717:

2718: %%% SOURCE CODING WITH DECODER SI

2719:

2720: \section{Streaming source coding with side information at the

2721: decoder}

2722: \label{sec.incDecSI}

2723:

2724: If a random sequence $\rvy^n$, related to the source $\rvx^n$ through

2725: a discrete memoryless channel, is observed at the decoder, then this

2726: side information can be used to reduce the rate of the source code.

2727: In this model $p_{\rvbx, \rvby}(\svx^n, \svy^n) = \prod_{i=1}^n \PxyRV

2728: (\svx_i, \svy_i) = \prod_{i=1}^n \PxCondyRV (\svx_i | \svy_i)

2729: \PyRV(\svy_i)$.  The source $\rvx^n$ is observed at the encoder, and

2730: the decoder, which observes $\rvy^n$ and a bit stream from the

2731: encoder, wants to estimate each source symbol $\rvx_i$ with a

2732: probability of error that decreases exponentially in the decoding

2733: delay $\delay$.

2734:

2735: We can apply the analysis of Section~\ref{sec.entropy} to this problem

2736: with a few minor modifications. For ML decoding, we need to pick the

2737: sequence with the maximum conditional probability given $\rvy^n$. The

2738: error exponent can be derived using a similar Chernoff bounding

2739: argument as in section ~\ref{sec.entropy}.  For universal decoding,

2740: the only change is that we now use a minimum suffix

2741: conditional-entropy decoder that compares sequence pairs $(\svxBar^n,

2742: \svy^n)$ and $(\svxBBar^n, \svy^n)$. In terms of the analysis, one

2743: change enters in~(\ref{eq.condSS}) where we must also sum over the

2744: possible side information sequences. And in~(\ref{eq.tildeType}) the

2745: entropy condition in the summation over $\svbxtil$ changes to

2746: $H(\svxtil_{l+1}^n|\svy_{l+1}^n) < H(\svx_{l+1}^n| \svy_{l+1}^n)$ (or

2747: the equivalent type notation).  Since there is no ambiguity in the

2748: side information, since $\rvy^n$ is observed at the decoder, this

2749: condition is equivalent to $H(\svxtil_{l+1}^n, \svy_{l+1}^n) <

2750: H(\svx_{l+1}^n, \svy_{l+1}^n)$.

2751:

2752: These results are summarized in Theorems~\ref{thm.decSIML}

2753: and~\ref{thm.decSIUniv}.  We do not include the full derivation of

2754: these theorems as no new ideas are required.

2755:

2756: %%% GENERAL SW

2757: \section{Streaming Slepian-Wolf source coding}

2758: \label{sec.SW}

2759:

2760: In this section we provide the proofs of

2761: Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode}, which consider

2762: the two-user\footnote{The multiuser case is essentially the same, just

2763:   with a lot more notation and minimization parameters

2764:   $\gamma_1,\gamma_2,\ldots$.} Slepian-Wolf problem. As with the

2765: proofs of Theorems~\ref{thm.entCodeML} and~\ref{thm.entCodeUniv} in

2766: Sections~\ref{sec.MLent} and~\ref{sec.univEnt}, we start by developing

2767: the common core of the proof in the context of maximum likelihood

2768: decoding.  This allows us to develop the results for universal

2769: decoding more quickly and transparently.  Furthermore, as shown in

2770: Theorem~\ref{THM:Universal_ML_SW}, maximum likelihood decoding and

2771: universal decoding provide the same reliability with delay.

2772:

2773: %%%%%%%%%%%%%%%%%

2774: \subsection{Maximum Likelihood Decoding}

2775: \label{sec.MLSW}

2776:

2777: In Theorems~\ref{thm.jointCodeML} and~\ref{thm.jointCode} three error

2778: events are considered: (i) $\Pr[\rvx^{n - \delay} \neq

2779: \rvxhat^{n-\delay}]$, (ii) $\Pr[\rvy^{n - \delay} \neq

2780: \rvyhat^{n-\delay}]$, and (iii) $\Pr[(\rvx^{n - \delay}, \rvy^{n -

2781:   \delay}) \neq (\rvxhat^{n-\delay}, \rvyhat^{n-\delay})]$.  We

2782: develop the error exponent for case (i).  The error exponent for case

2783: (ii) follows from a similar derivation, and that of case (iii) from an

2784: application of the union bound resulting in an exponent that is the

2785: minimum of the exponents of cases (i) and (ii).

2786:

2787:

2788: To lead to the decoding error $\Pr[\rvx^{n - \delay} \neq

2789: \rvxhat^{n-\delay}]$ there must be some spurious source pair

2790: $(\svxtil^n, \svytil^n)$ that satisfies three conditions: (i)

2791: $\svxtil^n \in \binX(\svx^n)$ and $\svytil^n \in \binY(\svy^n)$, (ii)

2792: it must be more likely than the true pair $p_{\rvbx, \rvby}(\svxtil^n,

2793: \svytil^n) > p_{\rvbx, \rvby}(\svx^n, \svy^n)$, and (iii) $\svxtil_{l}

2794: \neq \svx_{l}$ for some $l \leq n - \delay$.

2795:

2796: The error probability is

2797: %

2798: \begin{align}

2799: \Pr[&\rvxhat^{n-\delay} \neq \rvx^{n-\delay}] %%

2800: = \sum_{\svx^n, \svy^n} \Pr [\rvxhat^{n-\delay} \neq \svx^{n-\delay}

2801: | \rvx^n = \svx^n, \rvy^n = \svy^n]

2802: p_{\rvbx,\rvby}(\svx^n, \svy^n) \nonumber\\

2803: %%

2804: %%

2805: &\leq \sum_{\svx^n, \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n)\Big\{

2806:  \sum_{l=1}^{n - \delay}

2807: \sum_{k=1}^{n+1}   \nonumber \\

2808: %%

2809: %%

2810: &   \hspace{0.75in}

2811:  \Pr \big[ \exists \; (\svxtil^n, \svytil^n) \in

2812: \binX(\svx^n) \times \binY(\svy^n)\cap

2813: \mathcal{F}_n(l,k,\svx^n, \svy^n) \; \mbox{s.t.} \;

2814:   p_{\rvbx,\rvby}(\svxtil^n, \svytil^n) \geq

2815: p_{\rvbx,\rvby}(\svx^n, \svy^n)\big]  \Big\}

2816: \label{eq.diffTime} \\

2817: %%

2818: %%

2819: & = \sum_{l=1}^{n - \delay} \sum_{k=1}^{n+1} \Big\{ \sum_{\svx^n,

2820: \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n)\nonumber \\

2821: &   \hspace{0.75in}

2822:  \Pr \big[ \exists \; (\svxtil^n, \svytil^n)\in

2823: \binX(\svx^n)\times \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svx^n,

2824: \svy^n) \; \mbox{s.t.} \;

2825:     p_{\rvbx,\rvby}(\svxtil^n, \svytil^n) \geq

2826: p_{\rvbx,\rvby}(\svx^n, \svy^n) \big]  \Big\}

2827: \nonumber\\

2828: %%

2829: %%

2830: = & \sum_{l=1}^{n - \delay} \sum_{k=1}^{n+1} p_n(l,k).

2831: \label{eq.defPn}

2832: \end{align}

2833: %

2834: In~(\ref{eq.diffTime}) we decompose the error event into a number of

2835: mutually exclusive events by partitioning all source pairs

2836: $(\svxtil^n, \svytil^n)$ into sets $\mathcal{F}_n(l, k,\svx^n,

2837: \svy^n)$ defined by the times $l$ and $k$ at which $\svxtil^n$ and

2838: $\svytil^n$ diverge from the realized source sequences.  The set

2839: $\mathcal{F}_n(l, k,\svx^n, \svy^n)$ is defined as

2840: %

2841: \begin{equation}

2842:  \mathcal{F}_n(l,k,x^n,y^n)=\{(\svxBar^n,\svytil^n)\in

2843: \mathcal{X}^{n} \times\mathcal{Y}^{n} \; \mbox{s.t.} \;

2844: \svxBar^{l-1} = x^{l-1},\svxBar_l \neq x_l,\svyBar^{k-1}= y^{k-1},

2845: \svyBar_k\neq y_k\}, \label{eq.jointPart}

2846: \end{equation}

2847: In contrast to streaming point-to-point or side-information coding

2848: (cf.~(\ref{eq.jointPart}) with~(\ref{eq.partition})), the partition is

2849: now doubly-indexed.  To find the dominant error event, we must search

2850: over both indices.  Having two dimensions to search over results in an

2851: extra minimization when calculating the error exponent (and leads to

2852: the infimum over $\gamma$ in Theorem~\ref{thm.jointCodeML}).

2853:

2854: Finally, to get~(\ref{eq.defPn}) we define $p_n(l,k)$ as

2855: %

2856: \begin{eqnarray*}

2857: & & p_n(l,k) \\

2858: &=& \sum_{\svx^n, \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n) \Pr \Big[

2859: \exists \; (\svxtil^n, \svytil^n)\in \binX(\svx^n)\times

2860: \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svx^n, \svy^n) \; \mbox{s.t.} \;

2861:     p_{\rvbx,\rvby}(\svxtil^n, \svytil^n) \geq

2862: p_{\rvbx,\rvby}(\svx^n, \svy^n)\Big].

2863: \end{eqnarray*}

2864: %

2865: The following lemma provides an upper bound on $p_n(l,k)$:

2866: %

2867: \begin{lemma} \label{lemm.jointPn}

2868: %

2869: \begin{equation}

2870: \begin{array}{lllll}

2871: p_n(l,k) & \leq & \exp\{-(n-l+1) E_x(\Rx, \Ry, \frac{k-l}{n-l+1})\} &

2872: \mbox{if} & l \leq k, \vspace{1ex} \\

2873: %

2874: p_n(l,k) & \leq & \exp\{-(n-k+1) E_y(\Rx, \Ry, \frac{l-k}{n-k+1})\} &

2875: \mbox{if} & l \geq k,

2876: \end{array} \label{eq.mlSWbnd}

2877: \end{equation}

2878: %

2879: where $E_x(\Rx, \Ry, \gamma)$ and $E_y(\Rx, \Ry, \gamma)$ are

2880: defined in ~(\ref{eq.compoundExp}) and~(\ref{eq.defBasicExp})

2881: respectively. Notice that $l,k \leq n$, for $l\leq k$: $

2882: \frac{k-l}{n-l+1}\in [0,1]$ serves as $\gamma$ in the error exponent

2883: $E_x(\Rx, \Ry, \gamma)$. Similarly for  $l\geq k$.

2884: \end{lemma}

2885:

2886:

2887: \pf The bound depends on whether $l \leq k$ or $l \geq k$. Consider

2888: the case for $l \leq k$,

2889: %

2890: \begin{align}

2891: %%

2892: & p_n(l,k) \nonumber \\

2893: &=\sum_{\svx^n, \svy^n} p_{\rvbx,\rvby}(\svx^n, \svy^n) \Pr[

2894: \exists \; (\svxtil^n, \svytil^n)\in \binX(\svx^n)\times

2895: \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svx^n, \svy^n) \; \mbox{s.t.} \;

2896:     p_{\rvbx,\rvby}(\svx^n, \svy^n) < p_{\rvbx,\rvby}(\svxtil^n,

2897: \svytil^n)]\nonumber\\

2898: %

2899: %

2900: %

2901: &\leq  \sum_{\svx^n, \svy^n}

2902: %\hspace{-1em}

2903: \min\Big[1, \sum_{\tiny \begin{array}{c} (\svxtil^n, \svytil^n) \in

2904: \mathcal{F}_n(l,k,\svx^n, \svy^n)\; \\

2905:   p_{\rvbx,\rvby}(\svx^n, \svy^n) <

2906: p_{\rvbx,\rvby}(\svxtil^n, \svytil^n)

2907: \end{array}}

2908: %\hspace{-2em}

2909: \Pr[ \svxtil^n \in \binX(\svx^n), \svytil^n \in \binY(\svy^n)]\Big]

2910: p_{\rvbx,\rvby}(\svx^n, \svy^n)  \label{eq.enumJoint} \displaybreak[2]\\

2911: %%

2912: %%

2913: %%

2914: %%

2915: %%

2916: &\leq     \sum_{\svx_l^n, \svy_l^n}

2917: %\hspace{-2em}

2918: \min \Big[1, \sum_{\tiny \begin{array}{c} (\svxtil_l^n,

2919:     \svytil_l^n) \; \mbox{s.t.} \;   \svytil^{k-1}=\svy^{k-1}  \; \\

2920:     p_{\rvbx,\rvby}(\svx_l^n, \svy_l^n) < p_{\rvbx,\rvby}(\svxtil_l^n,

2921:     \svytil_l^n)

2922: \end{array}}

2923: %\hspace{-2em}

2924: \exp\{-(n-l +1) \Rx -(n-k+1) \Ry\} \Big]

2925: p_{\rvbx,\rvby}(\svx_l^n,\svy_l^n)  \label{eq.indepBin} \\

2926: %%

2927: %%

2928: &= \sum_{\svx_l^n, \svy_l^n}

2929: %\hspace{-2em}

2930: \min \Big[1, \sum_{\svxtil_l^n, \svytil_k^n}

2931: \exp\{-(n-l+1) \Rx -(n-k+1) \Ry\} \nonumber \\\

2932: %

2933: & \hspace{0.75in} \ind [ p_{\rvbx,\rvby}(\svxtil_l^{k-1}, \svy_l^{k-1})

2934: p_{\rvbx,\rvby}(\svxtil_k^{n}, \svytil_k^{n}) > p_{\rvbx,\rvby}

2935: (\svx_l^{n}, \svy_l^{n})]

2936: \Big] p_{\rvbx,\rvby}(x_l^n,y_l^n) \nonumber \\% \label{eq.indAgain}\\

2937: %%

2938: %%

2939: &\leq

2940: \sum_{\svx_l^n, \svy_l^n}

2941: %\hspace{-2em}

2942: \min  \Bigg[1, \sum_{\svxtil_l^n, \svytil_k^n}

2943: \exp\{-(n-l+1) \Rx -(n-k+1) \Ry\} \nonumber \\\

2944: %

2945: & \hspace{0.5in}  \min \Bigg[1, \frac{p_{\rvbx,\rvby}(\svxtil_l^{k-1},

2946: \svy_l^{k-1}) p_{\rvbx,\rvby} (\svxtil_k^{n}, \svytil_k^{n})}{

2947: p_{\rvbx,\rvby}(\svx_l^{n}, \svy_l^{n})} \Bigg] \Bigg]

2948: p_{\rvbx,\rvby}(\svx_l^{n}, \svy_l^{n}) \nonumber

2949: \displaybreak[2] \\

2950: %

2951: %

2952: %

2953: &\leq

2954: \sum_{\svx_l^n, \svy_l^n}

2955: %\hspace{-2em}

2956: \Bigg[\sum_{\svxtil_l^n, \svytil_k^n}

2957: e^{-(n-l+1) \Rx -(n-k+1) \Ry}

2958: %

2959:  \Bigg[

2960: \frac{p_{\rvbx,\rvby}(\svxtil_l^{k-1}, \svy_l^{k-1}) p_{\rvbx,\rvby}

2961: (\svxtil_k^{n}, \svytil_k^{n})}{ p_{\rvbx,\rvby}(\svx_l^{n},

2962: \svy_l^{n})} \Bigg]^{\frac{1}{1+\rho}} \Bigg]^{\rho}

2963: p_{\rvbx,\rvby}(\svx_l^{n}, \svy_l^{n})

2964: \displaybreak[2] \label{eq.gallagerRho} \\

2965: %

2966: %

2967: %

2968: &=

2969: e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry}

2970: \sum_{\svx_l^n, \svy_l^n}

2971: %\hspace{-2em}

2972: \Bigg[\sum_{\svxtil_l^n, \svytil_k^n}

2973: %

2974: [p_{\rvbx,\rvby}(\svxtil_l^{k-1}, \svy_l^{k-1}) p_{\rvbx,\rvby}

2975: (\svxtil_k^{n}, \svytil_k^{n}) ]^{\frac{1}{1+\rho}} \Bigg]^{\rho}

2976: p_{\rvbx,\rvby}(\svx_l^n,\svy_l^n)^{\frac{1}{1+\rho}}

2977: \nonumber \displaybreak[2]\\

2978: %

2979: %

2980: %

2981: &= e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry} \sum_{\svy_l^{k-1}}

2982: \Big[ \sum_{\svx_l^{k-1}}

2983: p_{\rvbx,\rvby}(\svx_l^{k-1},\svy_l^{k-1})^{\frac{1}{1+\rho}}\Big]

2984: %\hspace{-2em}

2985: \Big[\sum_{\svxtil_l^{k-1}}

2986: %

2987: p_{\rvbx,\rvby} (\svxtil_l^{k-1}, \svy_l^{k-1})^{\frac{1}{1+\rho}}

2988: \Big]^{\rho}

2989: \nonumber \\

2990: %

2991: & \hspace{0.5in} \Big[ \sum_{\svxtil_k^n, \svytil_k^n} p_{\rvbx,\rvby}

2992: (\svxtil_k^{n}, \svytil_k^{n})^{\frac{1}{1+\rho}} \Big]^{\rho}

2993: \sum_{\svx_k^n, \svy_k^n} p_{\rvbx,\rvby}(\svx_k^{n},

2994: \svy_k^{n})^{\frac{1}{1+\rho}}

2995: \nonumber \displaybreak[2] \\

2996: %

2997: %

2998: %

2999: &= e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry}

3000: \Bigg[\sum_{\svy_l^{k-1}} \Big[ \sum_{\svx_l^{k-1}}

3001: p_{\rvbx,\rvby}(\svx_l^{k-1}, \svy_l^{k-1})^{\frac{1}{1+\rho}} \Big]^{1

3002: + \rho} \Bigg]

3003: %\hspace{-2em}

3004: \Big[\sum_{\svx_k^n, \svy_k^n}

3005: %

3006: p_{\rvbx,\rvby}(\svx_k^{n}, \svy_k^{n})^{\frac{1}{1+\rho}} \Big]^{1

3007: +\rho}

3008: \nonumber \displaybreak[2]\\

3009: %

3010: %

3011: %

3012: &= e^{-(n-l+1) \rho \Rx -(n-k+1) \rho \Ry} \Bigg[\sum_{\svy} \Big[

3013: \sum_{\svx} p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{1 +

3014: \rho} \Bigg]^{k-l}

3015: %\hspace{-2em}

3016: \Big[\sum_{\svx, \svy}

3017: %

3018: p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big]^{(1 +\rho)(n-k+1)}

3019: \label{eq.rearranging}\displaybreak[2] \\

3020: %

3021: %

3022: %

3023: &= \exp\left\{-(k-l) \Bigg[ \rho \Rx - \log \Big[ \sum_{\svy} \Big[

3024: \sum_{\svx} p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}}

3025: \Big]^{1+\rho} \Big] \Bigg]

3026: \right\} \nonumber \\

3027: %%

3028: & \hspace{0.5in} \exp\left\{ -(n-k+1) \Bigg[ \rho (\Rx + \Ry) -

3029: (1+\rho) \log \Big[ \sum_{\svx, \svy}

3030: p_{\rvx,\rvy}(\svx,\svy)^{\frac{1}{1+\rho}} \Big] \Bigg] \right\}

3031: \nonumber \displaybreak[2] \\

3032: %

3033: %

3034: &= \exp\left\{-(k-l) E_{x|y}(\Rx, \rho)

3035:           -(n-k+1) E_{xy}(\Rx, \Ry, \rho) \right\} \label{eq.defElk}\\

3036: %

3037: %

3038: %

3039: &= \exp \left\{ -(n-l+1) \Big[ \frac{k-l}{n-l+1} E_{x|y}(\Rx,\rho) +

3040: \frac{n-k+1}{n-l+1} E_{xy}(\Rx, \Ry, \rho)\Big] \right\} \label{eq.defEl2}\\

3041: %

3042: %

3043: %

3044: &\leq \exp \left\{ -(n-l+1) \sup_{\rho \in [0,1]}

3045: \Big[ \frac{k-l}{n-l+1} E_{x|y}(\Rx,\rho) +

3046: \frac{n-k+1}{n-l+1} E_{xy}(\Rx, \Ry, \rho)\Big] \right\}

3047: \label{eq.jointMLoptRho}\\

3048: %

3049: %

3050: %

3051: &= \exp \left\{ -(n-l+1) E_{x}^{ML} \left(\Rx, \Ry,

3052:     \frac{k-l}{n-l+1}\right) \right\} = \exp \left\{ -(n-l+1)

3053:   E_{x}(\Rx, \Ry, \frac{k-l}{n-l+1}) \right\}.

3054: \label{eq.subDefsEx}

3055: %%

3056: %& \leq \exp \left\{ -(n-l+1) E_x(\Rx,\Ry,\rho,\frac{k-l}{n-l+1}) \right\}

3057: %\label{eq.compoundDef}

3058: \end{align}

3059:

3060: In~(\ref{eq.enumJoint}) we explicitly indicate the three conditions

3061: that a suffix pair $(\svxtil_{l}^n, \svytil_{k}^n)$ must satisfy to

3062: result in a decoding error.  In~(\ref{eq.indepBin}) we sum out over

3063: the common prefixes $(\svx^{l-1}, \svy^{l-1})$, and use the fact that

3064: the random binning is done independently at each encoder, see

3065: Definition.~\ref{def.seqn_coding}.  We get~(\ref{eq.gallagerRho}) by

3066: limiting $\rho$ to the interval $0 \leq \rho \leq 1$, as

3067: in~(\ref{eq.limOnRho}). Getting~(\ref{eq.rearranging})

3068: from~(\ref{eq.gallagerRho}) follows by a number of basic

3069: manipulations.  In~(\ref{eq.rearranging}) we get the single letter

3070: expression by again using the memoryless property of the sources.

3071: In~(\ref{eq.defElk}) we use the definitions of $E_{x|y}$ and $E_{xy}$

3072: from~(\ref{eq.defBasicExp}) of Theorem~\ref{thm.jointCodeML}.  Noting

3073: that the bound holds for all $\rho \in [0,1]$ optimizing over $\rho$

3074: results in~(\ref{eq.jointMLoptRho}).  Finally, using the definition

3075: of~(\ref{eq.compoundExp}) and the remark following

3076: Theorem~\ref{THM:Universal_ML_SW} that the maximum-likelihood and

3077: universal exponents are equal gives~(\ref{eq.subDefsEx}).  The bound

3078: on $p_n(l,k)$ when $l > k$, is developed in an analogous

3079: fashion.\hfill $\blacksquare$

3080:

3081: We use Lemma~\ref{lemm.jointPn} together with~(\ref{eq.defPn}) to

3082: bound $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}]$ for two distinct

3083: cases.  The first, simpler case, is when $\inf_{\gamma \in [0,1] }

3084: E_y(\Rx, \Ry, \gamma) > {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry,

3085:   \gamma)}$.  To bound $\Pr[\rvxhat^{n-\delay} \neq \rvx^{n-\delay}]$

3086: in this case, we split the sum over the $p_n(l,k)$ into two terms,

3087: as visualized in Fig~\ref{fig.twoD2}.  There are $(n+1)\times

3088: (n-\delay)$ such events to account for

3089:   (those inside the box).  The probability of the event within each oval are

3090:   summed together to give an upper bound on $\Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}]$.

3091:   We add extra probabilities outside of the box but within the ovals

3092:   to make the summation symmetric thus simpler. Those extra

3093:   error events do not impact the error exponent because $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry, \rho,

3094:   \gamma) \geq {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry, \rho,\gamma)}$.

3095:   The possible dominant   error events are highlighted in Figure \ref{fig.twoD2} . Thus,

3096: %

3097: \begin{align}

3098:   & \Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}] \leq \sum_{l=1}^{n

3099:     - \delay} \sum_{k=l}^{n+1} p_n(l,k) + \sum_{k=1}^{n - \delay}

3100:     \sum_{l=k}^{n+1} p_n(l,k)  \label{eq.twoTerms} \\

3101: %%

3102: %%

3103: &\leq \sum_{l=1}^{n - \delay}  \sum_{k=l}^{n+1} \exp\{ -(n-l+1)

3104: \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) \}

3105: %

3106:  + \sum_{k=1}^{n-\delay}\sum_{l=k}^{n+1}

3107: \exp\{-(n-k+1) \inf_{\gamma \in [0,1]} E_y(\Rx, \Ry, \gamma)\}

3108:  \label{eq.usinglemma} \\

3109: %%

3110: %%

3111: & =   \sum_{l=1}^{n - \delay} \Big[ (n-l+2) \exp\{ -(n-l+1)

3112: \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) \} \nonumber \\

3113: %%

3114: %%

3115:  & \ \ \ \ \ +  \sum_{k=1}^{n - \delay} \Big[ (n-k+2) \exp\{ -(n-k+1)

3116: \inf_{\gamma \in [0,1]}

3117: E_y(\Rx, \Ry, \gamma) \} \nonumber\\%\label{eq.ineq}\\

3118: %%

3119: %%

3120: & \leq 2 \sum_{l=1}^{n - \delay} \Big[ (n-l+2) \exp\{ -(n-l+1)

3121: \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) \}

3122: %

3123:  \label{eq.sumTerms} \\

3124: %%

3125: %%

3126: %%

3127: & \leq \sum_{l=1}^{n - \delay}   C_1 \exp\{ -(n-l+2)[ \inf_{\gamma

3128: \in [0,1]} E_x(\Rx, \Ry, \gamma) -\alpha]\}

3129: %

3130:   \label{eq.smallerExp}\\

3131: %%

3132: %%

3133: & \leq C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry,

3134: \gamma) -\alpha]\} \label{eq.boundEyBigEx}

3135: \end{align}

3136:

3137:

3138: Equation (\ref{eq.twoTerms}) follows directly from (\ref{eq.defPn}),

3139: in the first  term  $l\leq k $, in the second

3140:  term $l\geq k$.  In~(\ref{eq.usinglemma}), we use Lemma~\ref{lemm.jointPn}. In~(\ref{eq.sumTerms}) we

3141: use the assumption that $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry,

3142: \gamma) > \inf_{\gamma \in [0,1] } E_x(\Rx, \Ry, \gamma)$.

3143: In~(\ref{eq.smallerExp}) the $\alpha > 0$ results from incorporating

3144: the polynomial into the first exponent, and can be chosen as small

3145: as desired.  Combining terms and summing out the decaying

3146: exponential yield the bound~(\ref{eq.boundEyBigEx}).

3147:

3148:

3149:

3150: \begin{figure}

3151:  \begin{picture}(100,100)

3152: \multiput(50,20)(5,0){12}{\multiput(0,0)(0,5){12}{\circle{0.5}}}

3153: \put(45, 15){\vector(1,0){70}} \put(45, 15){\vector(0,1){70}}

3154: \put(45,87){$k$} \put(117,15){$l$}

3155:

3156: \put(39,74) {\rotatebox{90}{ $n+1$}}

3157:   \put(39,60){\rotatebox{90}{$n-\Delta$}}

3158:

3159: \put(30,15) {\rotatebox{90}{Index at which $ \rvy^n$ and $

3160: \rvytil^n$ first diverge}}

3161:

3162: \put(45,5)  {Index at which $ \rvx^n$ and $ \rvxtil^n$ first

3163: diverge}

3164:  \put(104,10){$n+1$}

3165: \put(90,10){$n-\Delta$}

3166:

3167: \linethickness{0.5mm}\put(97.5,15){\line(0,1){63}} % the box

3168: \put(45,78){\line(1,0){52.5}} % the box

3169:

3170: \multiput(95,65)(0, 5){ 3 }{\circle*{1.5}}

3171:

3172: \thinlines \put(50,47.5){\oval(2,60)} \put(55,50){\oval(2,55)}

3173: \put(60,52.5){\oval(2,50)} \put(65,55){\oval(2,45)}

3174: \put(70,57.5){\oval(2,40)} \put(75,60){\oval(2,35)}

3175: \put(80,62.5){\oval(2,30)} \put(85,65){\oval(2,25)}

3176: \put(90,67.5){\oval(2,20)} %\thicklines %\linethickness{1.5mm}

3177: \put(95,70){\oval(2,15)} \thinlines \put(77.5,20){\oval(60,2)}

3178: \put(80,25){\oval( 55,2)} \put(82.5,30){\oval(50,2)}

3179: \put(85,35){\oval(45,2)} \put(87.5,40){\oval(40,2)}

3180: \put(90,45){\oval( 35,2)} \put(92.5,50){\oval(30,2)}

3181: \put(95,55){\oval( 25,2)} \put(97.5,60){\oval(20,2)}

3182: %\thicklines %\linethickness{0.5mm}

3183: \put(100,65){\oval( 15,2)} \thinlines

3184:  \end{picture}

3185: %

3186: \caption{Two dimensional plot of the error probabilities $p_n(l,k)$,

3187: corresponding to error events $(l,k)$,

3188:   contributing to $\Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}]$ in

3189:   the situation where $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry, \rho,

3190:   \gamma) \geq {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry, \rho,\gamma)}$.

3191:   }   \label{fig.twoD2}

3192: \end{figure}

3193:

3194:

3195: The second, more involved case, is when $\inf_{\gamma \in [0,1] }

3196: E_y(\Rx, \Ry, \rho, \gamma) < {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry,

3197:   \rho, \gamma)}$.  To bound $\Pr[ \rvxhat^{n - \delay} \neq

3198: \rvx^{n-\delay}]$, we could use the same bounding technique used in

3199: the first case. This gives the error exponent $\inf_{\gamma \in [0,1]

3200: } E_y(\Rx, \Ry, \gamma)$ which is generally smaller than what we can

3201: get by dividing the error events in a new scheme as shown in Figure

3202: \ref{fig.errEvents}. In this situation we split~(\ref{eq.defPn}) into

3203: three terms, as visualized in Fig~\ref{fig.errEvents}.  Just as in the

3204: first case shown in Fig~\ref{fig.twoD2}, there are $(n+1)\times

3205: (n-\delay)$ such events to account for (those inside the box). The

3206: error events are partitioned into 3 regions. Region 2 and 3 are

3207: separated by $k^*(l)$ using a dotted line.  In region 3, we add extra

3208: probabilities outside of the box but within the ovals to make the

3209: summation simpler.  Those extra error events do not affect the error

3210: exponent as shown in the proof.  The possible dominant error events

3211: are highlighted shown in Fig~\ref{fig.errEvents}. Thus,

3212: %

3213: \begin{equation}

3214: \Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}] \leq \sum_{l=1}^{n -

3215: \delay} \sum_{k=l}^{n+1} p_n(l,k) + \sum_{l=1}^{n - \delay}

3216: \sum_{k=k^{\ast}(l)}^{l-1} p_n(l,k) + \sum_{l=1}^{n - \delay}

3217: \sum_{k=1}^{k^{\ast}(l)-1} p_n(l,k) \label{eq.threeTerms}

3218: \end{equation}

3219: %

3220: Where  $\sum_{k=1}^{0} p_k=0$. The lower boundary of Region 2 is

3221: $k^{\ast}(l) \geq 1$ as a function of $n$ and $l$:

3222: %

3223: \begin{equation}

3224: \kast = \max\left\{1, n +1-  \ceil{\frac{ \inf_{\gamma \in [0,1]}

3225: E_x(\Rx, \Ry, \gamma)}{ \inf_{\gamma \in [0,1] } E_y(\Rx, \Ry,

3226: \gamma)}} (n+1-l )\right\} = \max\left\{1, n+1 - G

3227: (n+1-l)\right\}\label{eq.kast}

3228: \end{equation}

3229: %

3230: where we use $G$ to denote the ceiling of the ratio of exponents.

3231: Note that when $\inf_{\gamma \in [0,1] } E_y(\Rx, \Ry,  \gamma)

3232: > {\inf_{\gamma \in [0,1] } E_x(\Rx, \Ry,   \gamma)}$ then $G =

3233: 1$ and region two of Fig.~\ref{fig.errEvents} disappears.  In other

3234: words, the middle term of~(\ref{eq.threeTerms}) equals zero.  This

3235: is the first case considered.  We now consider the cases when $G

3236: \geq 2$ (because of the ceiling function $G$ is a positive integer).

3237:

3238:

3239:

3240: \begin{figure}[t]

3241: %\caption{}

3242: \begin{picture}(100,100)

3243:

3244: \multiput(50,20)(5,0){12}{\multiput(0,0)(0,5){12}{\circle{0.5}}}

3245: \put(45, 15){\vector(1,0){70}} \put(45, 15){\vector(0,1){70}}

3246: \put(45,87){$k$} \put(117,15){$l$}

3247:

3248: \put(39,74) {\rotatebox{90}{ $n+1$}}

3249:   \put(39,60){\rotatebox{90}{$n-\Delta$}}

3250:

3251: \put(30,15) {\rotatebox{90}{Index at which $ \rvy^n$ and $

3252: \rvyhat^n$ first diverge}}

3253:

3254: \put(45,5)  {Index at which $ \rvx^n$ and $ \rvxhat^n$ first

3255: diverge}

3256:

3257:

3258:    \put(110,49){$k^*(n-\Delta)-1$}

3259: \put(104,10){$n+1$}  \put(90,10){$n-\Delta$}

3260:  \multiput(95,55)(0,5){

3261: 5 }{\circle*{1.5}}

3262:

3263: \linethickness{0.5mm}\put(97.5,15){\line(0,1){63}} % the box

3264: \put(45,78){\line(1,0){52.5}} % the box

3265:

3266:  \thinlines

3267: \put(105,75){\line(-1,-1){58}}  \dottedline{1}(105,75)(77.5,20)

3268:

3269: \dottedline{1}(77.5,20)(50,20)

3270:

3271: %\put(105,75){\line(-1,-2){29}}

3272: \put(50,47.5){\oval(2,60)} \put(55,50){\oval(2,55)}

3273: \put(60,52.5){\oval(2,50)} \put(65,55){\oval(2,45)}

3274: \put(70,57.5){\oval(2,40)} \put(75,60){\oval(2,35)}

3275: \put(80,62.5){\oval(2,30)} \put(85,65){\oval(2,25)}

3276: \put(90,67.5){\oval(2,20)}

3277:  \put(95,70){\oval(2,15)}

3278: \put(55,20){\oval(2,5)} \put(60,22.5){\oval(2,10)}

3279: \put(65,25){\oval(2,15)} \put(70,27.5){\oval(2,20)}

3280: \put(75,30){\oval(2,25)} \put(80,35){\oval(2,25)}

3281: \put(85,42.5){\oval(2,20)} \put(90,50){\oval(2,15)}   \put(95,57

3282: ){\oval(2,10)}  \put(92.5,20){\oval( 30,2)} \put(95,25){\oval(

3283: 25,2)} \put(95,30){\oval(25,2)} \put(97.5,35){\oval(20,2)}

3284: \put(97.5,40){\oval(20,2)} \put(100,45){\oval( 15,2)}

3285: \put(100,50){\oval(15,2)} \thinlines \put(61,56){Region

3286: 1}\put(65,27){Region 2} \put(96,27){Region 3} \put(108,

3287: 61){$k^*(l)$} \put(107, 62){\vector(-1,0){8.5}}

3288: \end{picture}

3289: \caption{Two dimensional plot of the error probabilities $p_n(l,k)$,

3290: corresponding to error events $(l,k)$,

3291:   contributing to $\Pr[ \rvxhat^{n - \delay} \neq \rvx^{n-\delay}]$ in

3292:   the situation where $\inf_{\gamma \in

3293:     [0,1] } E_y(\Rx, \Ry,   \gamma) < {\inf_{\gamma \in [0,1] }

3294:     E_x(\Rx, \Ry,  \gamma)}$. }\label{fig.errEvents}

3295: \end{figure}

3296:

3297:

3298:

3299: The first term of~(\ref{eq.threeTerms}), i.e., region one in

3300: Fig.~\ref{fig.errEvents} where $ l\leq k$, is bounded in the same

3301: way that the first term of~(\ref{eq.twoTerms}) is, giving

3302: %

3303: \begin{equation}

3304: \sum_{l=1}^{n - \delay} \sum_{k=l}^{n+1} p_n(l,k) \leq

3305: C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma) -\alpha]\}.

3306: \label{eq.firstTerm}

3307: \end{equation}

3308:

3309:

3310: In Fig.~\ref{fig.errEvents}, region two is upper bounded by the

3311: 45-degree line, and lower bounded by $k^{\ast}(l)$. The second term

3312: of~(\ref{eq.threeTerms}), corresponding to this region where $ l\geq

3313: k$,

3314: %

3315: \begin{align}

3316:  \sum_{l=1}^{n - \delay} \sum_{k=\kast}^{l-1} p_n(l,k)

3317: &\leq \sum_{l=1}^{n - \delay}\sum_{k=\kast}^{l-1} \exp\{-(n-k+1)

3318:   E_y(\Rx, \Ry, \frac{l-k}{n-k+1})\}

3319: \nonumber \\

3320:  %%

3321:   &= \sum_{l=1}^{n - \delay}\sum_{k=\kast}^{l-1}

3322: \exp\{-(n-k+1) \frac{n-l+1}{n-l+1} E_y(\Rx, \Ry,

3323: \frac{l-k}{n-k+1})\}

3324:  \label{eq.gammaInv} \\

3325:  & \leq \sum_{l=1}^{n - \delay}\sum_{k=\kast}^{l-1}

3326:  \exp\{ -(n-l+1) \inf_{\gamma \in [0,1]}

3327: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\}

3328: \label{eq.defGamma}\\

3329:  & = \sum_{l=1}^{n - \delay} (l-\kast)

3330:  \exp\{ -(n-l+1) \inf_{\gamma \in [0,1]}

3331: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\} \label{eq.secTerm}

3332: \end{align}

3333: %

3334: In~(\ref{eq.gammaInv}) we note that $l\geq k $, so define

3335: $\frac{l-k}{n-k+1}=\gamma$ as in~(\ref{eq.defGamma}). Then

3336: $\frac{n-k+1}{n-l+1} = \frac{1}{1-\gamma}$.

3337:

3338: The third term of~(\ref{eq.threeTerms}), i.e., the intersection of

3339: region three and the ``box'' in Fig.~\ref{fig.errEvents} where $

3340: l\geq k$, can be bounded as,

3341: %

3342: \begin{align}

3343:  \sum_{l=1}^{n - \delay}\sum_{k = 1}^{\kast-1}

3344:  p_n(l,k) &\leq \sum_{l=1}^{n + 1}\sum_{k = 1}^{\min\{l, k^*(n-\delay)-1\}}

3345:  p_n(l,k) \label{eq.changeOrder}\\

3346: %

3347: %

3348:  &=\sum_{k=1}^{k^*(n-\Delta)-1}

3349: \sum_{l=k}^{n+1}p_n(l,k) \label{eq.changeOrder1}\\

3350: %

3351: &\leq \sum_{k=1}^{k^*(n-\Delta)-1}

3352: \sum_{l=k}^{n+1}\exp\{-(n-k+1)E_y(R_x,R_y,\frac{l-k}{n-k+1})\}\nonumber\\

3353: %

3354: &\leq \sum_{k=1}^{k^*(n-\Delta)-1}

3355: \sum_{l=k}^{n+1}\exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\}

3356: \nonumber\\

3357: %

3358: &\leq \sum_{k=1}^{k^*(n-\Delta)-1}(n-k+2)

3359: \exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\}

3360: \label{eq.thirdTerm}

3361: \end{align}

3362:

3363: In (\ref{eq.changeOrder}) we note that $l\leq n-\delay$ thus

3364: $k^*(n-\delay) -1 \geq \kast -1$, also $l\geq 1$, so $l\geq

3365: \kast-1$. This can be visualized in Fig~\ref{fig.errEvents} as we

3366: extend the summation from the intersection of the ``box'' and region

3367: 3 to the whole region under the diagonal line and  the horizontal

3368: line $k=k^*(n-\delay)-1$. In (\ref{eq.changeOrder1}) we simply

3369: switch the order of the summation.

3370:

3371:

3372:

3373: Finally when $G \geq 2$, we substitute~(\ref{eq.firstTerm}),

3374: (\ref{eq.secTerm}), and~(\ref{eq.thirdTerm})

3375: into~(\ref{eq.threeTerms}) to give

3376: %

3377: \begin{align}

3378: %

3379: %

3380: \Pr[\rvxhat^{n - \delay} \neq \rvx^{n-\delay}] &\leq C_2

3381:   \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry, \gamma)

3382:   -\alpha]\} \nonumber \\

3383: %

3384:   &+ \sum_{l=1}^{n - \delay} (l-\kast) \exp\{ -(n-l+1) \inf_{\gamma

3385:     \in [0,1]} \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\}\\

3386: %

3387:   &+\sum_{k=1}^{k^*(n-\Delta)-1}(n-k+2)\exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\}\nonumber\\

3388: %%

3389: %%

3390: %%

3391: &\leq C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry,

3392: \gamma) -\alpha]\}

3393: \nonumber \\

3394: %

3395: &+ \sum_{l=1}^{n - \delay} (l-n-1+G(n+1-l))

3396:  \exp\{ -(n-l+1) \inf_{\gamma \in [0,1]}

3397: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)\}\nonumber\\

3398: %

3399: &+\sum_{k=1}^{n +1- G(\delay+1)}(n-k+2)\exp\{-(n-k+1)\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)\} \label{eq.largerSum}\\

3400: %%

3401: %%

3402: %%

3403: &\leq C_2 \exp\{-\delay [ \inf_{\gamma \in [0,1]} E_x(\Rx, \Ry,

3404: \gamma) -\alpha]\}

3405: \nonumber \\

3406: %

3407: &+ (G-1)C_3

3408:  \exp\{ -\delay \big[\inf_{\gamma \in [0,1]}

3409: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma)-\alpha\big]\}\nonumber\\

3410: %

3411: &+ C_4\exp\{-\big[\delay

3412: G\inf_{\gamma\in[0,1]}E_y(R_x,R_y,\gamma)-\alpha \big]\}\nonumber\\

3413: %%

3414: %%

3415: %%

3416: &\leq C_5 \exp \Big\{  - \delay \Big[\min \Big\{ \inf_{\gamma \in

3417: [0,1]} E_x(\Rx, \Ry, \gamma), \inf_{\gamma \in [0,1]}

3418: \frac{1}{1-\gamma} E_y(\Rx, \Ry, \gamma) \Big\} - \alpha

3419: \Big]\Big\}. \label{eq.finResMLSW}

3420: \end{align}

3421: %

3422: To get (\ref{eq.largerSum}), we use the fact that $k^*(l)\geq

3423: n+1-G(n+1-l)$ from the definition of $k^*(l)$ in (\ref{eq.kast}) to

3424: upper bound the second term. We exploit the definition of $G$ to

3425: convert the exponent in the third term to $\inf_{\gamma \in [0,1]}

3426: E_x(\Rx, \Ry, \gamma)$.  Finally, to get~(\ref{eq.finResMLSW}) we

3427: gather the constants together, sum out over the decaying

3428: exponentials, and are limited by the smaller of the two exponents.

3429:

3430: Note: in the proof of Theorem~\ref{thm.jointCodeML},  we regularly

3431: double count the error events or add smaller extra probabilities to

3432: make the summations simpler. But it should be clear that the error

3433: exponent is not affected.

3434:

3435:

3436: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

3437: \subsection{Universal Decoding}

3438:

3439:

3440: As discussed in Section~\ref{sec.univEnt}, we do not use a pairwise

3441: minimum joint-entropy decoder because of polynomial term in $n$ would

3442: multiply the exponential decay in $\delay$. Analogous to the

3443: sequential decoder used there, we use a ``weighted suffix entropy''

3444: decoder.  The decoding starts by first identifying candidate sequence

3445: pairs as those that agree with the encoding bit streams up to time

3446: $n$, i.e., $\svxBar^n \in \binX(\svx^n), \svyBar^n \in \binY(\svy^n)$.

3447: For any one of the $|\binX(\svx^n)| |\binY(\svy^n)|$ sequence pairs in

3448: the candidate set, i.e., $(\svxBar^n, \svyBar^n) \in \binX(\svx^n)

3449: \times \binY(\svy^n)$ we compute $(n+1)\times (n+1)$ weighted

3450: entropies:

3451:

3452:

3453: \begin{eqnarray}

3454: &&H_S(l,k,\svxBar^n, \svyBar^n)=H(\svxBar_{l}^{(n+1-l)},\svyBar_{l}^{(n+1-l)}),\ \ \ \ l=k\nonumber\\

3455: &&H_S(l,k,\svxBar^n, \svyBar^n)=\frac{k-l}{n+1-l}H({\svxBar}_{l}^{k-1}|{\svyBar}_{l}^{k-1})+\frac{n+1-k}{n+1-l}H({\svxBar}_{k}^{n},{\svyBar}_{k}^{n}),\ \ \ \ l<k\nonumber\\

3456: &&H_S(l,k,\svxBar^n,\svyBar^n)=\frac{l-k}{n+1-k}H({\svyBar}_{k}^{l-1}|{\svxBar}_{k}^{l-1})+\frac{n+1-l}{n+1-k}H({\svxBar}_{l}^{n},{\svyBar}_{l}^{n}),\

3457: \ \ \ l>k.\nonumber

3458: \end{eqnarray}

3459:

3460: We define the \textit{score} of $({\svxBar}^n, {\svyBar}^n)$ as the

3461: pair of integers $i_x(\svxBar^n,\svyBar^n)$, $

3462: i_y(\svxBar^n,\svyBar^n)$ s.t.,

3463: \begin{eqnarray}

3464: i_x(\svxBar^n,\svyBar^n)&=&\max\{i:H_S(l,k,(\svxBar^n,\svyBar^n))<

3465: H_S(l,k,\svxtil^n,\svytil^n) \forall k=1,2,...n+1, \forall

3466: l=1,2,...i, \nonumber\\

3467: && \forall (\svxtil^n,\svytil^n)\in\binX(\svx^n) \times

3468: \binY(\svy^n)\cap

3469: \mathcal{F}_n(l,k,\svxBar^n,\svyBar^n) \}\\

3470: i_y(\svxBar^n,\svyBar^n)&=&\max\{i:H_S(l,k,(\svxBar^n,\svyBar^n))<

3471: H_S(l,k,\svxtil^n,\svytil^n) \forall l=1,2,...n+1, \forall

3472: k=1,2,...i,\nonumber\\

3473: && \forall (\svxtil^n,\svytil^n)\in \binX(\svx^n) \times

3474: \binY(\svy^n)\cap \mathcal{F}_n(l,k,\svxBar^n,\svyBar^n) \}

3475: \end{eqnarray}

3476: While $\mathcal{F}_n(l,k,\svx^n,\svy^n)$ is the same set as defined

3477: in (\ref{eq.jointPart}), we repeat the definition here for

3478: convenience,

3479:

3480:

3481: \begin{equation}

3482:  \mathcal{F}_n(l,k,x^n,y^n)=\{(\svxBar^n,\svytil^n)\in

3483: \mathcal{X}^{n} \times\mathcal{Y}^{n} \; \mbox{s.t.} \;

3484: \svxBar^{l-1} = x^{l-1},\svxBar_l \neq x_l,\svyBar^{k-1}= y^{k-1},

3485: \svyBar_k\neq y_k\}.\nonumber

3486: \end{equation}

3487:

3488:

3489:

3490: The definition of

3491: $(i_x(\svxBar^n,\svyBar^n),i_y(\svxBar^n,\svyBar^n))$ can be

3492: visualized in the following procedure. As shown in

3493: Fig.~\ref{fig.scoresheet},  for all $ 1\leq l,k \leq n+1$, if there

3494: exists $({\svxBBar}^n, {\svyBBar}^n)\in

3495: \mathcal{F}_n(l,k,(\svxBar^n,\svyBar^n))\cap \binX(\svx^n) \times

3496: \binY(\svy^n) $ s.t.  $H_S(l,k,\svxBar^n, \svyBar^n)\geq

3497: H_S(l,k,\svxBBar^n,\svyBBar^n)$ , then we mark $(l,k)$ on the plane

3498: as shown in Fig.\ref{fig.scoresheet}. Eventually we pick the maximum

3499: integer which is smaller than all marked $x$-coordinates as

3500: $i_x(\svxBar^n,\svyBar^n)$ and the maximum integer which is smaller

3501: than all marked $y$-coordinates as $i_y(\svxBar^n,\svyBar^n)$. The

3502: score of $({\svxBar}^n, {\svyBar}^n)$ tells us the first

3503: branch(either $x$ or $y$) point where  a ``better sequence pair''

3504: (with a smaller weighted entropy) exists.

3505:

3506:

3507:

3508: Define the set of the winners as the sequences (not sequence pair)

3509: with the maximum   score:

3510:

3511: $$\mathcal{W}_n^x=\{\svxBar^n\in  \binX(\svx^n) :\exists \svyBar^n\in \mathcal{B}_y(y^n), s.t.

3512: i_x(\svxBar^n,\svyBar^n)\geq i_x(\svxtil^n,\svytil^n), \forall

3513: (\svxtil^n,\svytil^n)\in \binX(\svx^n) \times \binY(\svy^n)\}$$

3514: $$\mathcal{W}_n^y=\{\svyBar^n\in  \binY(\svy^n) :\exists \svxBar^n\in

3515: \binX(\svx^n)  , s.t. i_y(\svxBar^n,\svyBar^n)\geq

3516: i_y(\svxtil^n,\svytil^n),  \forall (\svxtil^n,\svytil^n)\in

3517: \binX(\svx^n) \times \binY(\svy^n)\}$$

3518:

3519: Then arbitrarily pick one sequence from $\mathcal{W}_n^x$ and one

3520: from $\mathcal{W}_n^y$ as the decision $(\svxhat^n,\svyhat^n)$.

3521:

3522:

3523:

3524:

3525: \setlength{\unitlength}{1mm}

3526:

3527:  \begin{figure}

3528: \begin{picture}(100,100)

3529:

3530: \multiput(50,20)(5,0){12}{\multiput(0,0)(0,5){12}{\circle {1.5}}}

3531: \put(45, 15){\vector(1,0){70}} \put(45, 15){\vector(0,1){70}}

3532: \put(45,87){$k$} \put(117,15){$l$} \put(45, 39){\line(1,0){65}}

3533:  \put(74, 15){\line(0,1){65}}

3534:

3535:  \put(37,74){\rotatebox{90}{$n+1$}}

3536:

3537:  \put(104,10){$n+1$}

3538:  \put(48,10){$1$}

3539: \put(37,20){\rotatebox{90}{$1$}}

3540:

3541:

3542: \put(41, 33){\rotatebox{90}{$i_y$}}

3543:  \put(68, 11){$i_x$}

3544:

3545: \put(95,40){\circle*{1.5}} \put(75,45){\circle*{1.5}}

3546: \put(105,50){\circle*{1.5}} \put(100,65){\circle*{1.5}}

3547: \put(100,60){\circle*{1.5}} \put(95,60){\circle*{1.5}}

3548: \put(95,65){\circle*{1.5}} \put(95,70){\circle*{1.5}}

3549: \put(100,60){\circle*{1.5}} \put(75,65){\circle*{1.5}}

3550: \put(95,75){\circle*{1.5}} \put(85,55){\circle*{1.5}}

3551: \put(105,75){\circle*{1.5}}

3552:  \end{picture}

3553:      \caption[]{2D interpretation of the \textit{score}, $(i_x(\svxBar^n,\svyBar^n),

3554:      i_y(\svxBar^n,\svyBar^n))$, of a sequence

3555:      pair $(\svxBar^n,\svyBar^n)$. If there exists a sequence pair in

3556:      $\mathcal{F}_n(l,k,\svxBar^n,\svyBar^n)$ with less or the same score, then $(l,k)$ is marked with a solid dot.

3557:      The \textit{score} $i_x(\svxBar^n,\svyBar^n)$ is the largest

3558:      integer which is smaller than all the $x$-coordinates of the

3559:      marked points. Similarly for $i_y(\svxBar^n,\svyBar^n),$

3560:      }

3561:      \label{fig.scoresheet}

3562:  \end{figure}

3563:

3564:

3565: We bound the probability that there exists a sequence pair in

3566: $\mathcal{F}_n(l,k,(\rvx^n,\rvy^n))\cap  \binX(\svx^n) \times

3567: \binY(\svy^n)$ with smaller weighted minimum-entropy suffix score

3568: as:

3569: \begin{eqnarray}

3570:  p_n(l,k)&=&\sum_{x^n}\sum_{y^n}p_{\rvx\rvy}(x^n,y^n)

3571: P(\exists(\svxtil_{1}^{n},\svytil_{1}^{n})\in \binX(\svx^n) \times

3572: \binY(\svy^n)\cap \mathcal{F}_n(l,k,x^n,y^n),\nonumber\\

3573: && s.t. H_S(l,k,\svxtil^n,\svytil^n)\leq

3574: H_S(l,k,(x^n,y^n)))\nonumber

3575: \end{eqnarray}

3576: Note that the $p_n(l,k)$ here differs from the $p_n(l,k)$ defined in

3577: the ML decoding by replacing $p_{\rvx\rvy}(\svx^n, \svy^n) \leq

3578: p_{\rvx\rvy}(\svxtil^n, \svytil^n)$     with

3579: $H_S(l,k,\svxtil^n,\svytil^n)\leq H_S(l,k,(x^n,y^n))$.

3580:

3581: The following lemma, analogous  to (\ref{eq.defPn}) for ML decoding,

3582: tells us that the ``suffix weighted entropy'' decoding rule is a

3583: good one.

3584:

3585: \begin{lemma} Upper bound on symbol-wise decoding error

3586: $P_{ex}(k,k+d)$ :\label{Lemma_3_UNI_SW}

3587:

3588: \begin{eqnarray}

3589: \Pr[\rvxhat^{n-\delay}   \neq \rvx^{n-\delay}]  \leq

3590: \sum_{l=1}^{n-\delay}\sum_{k=1}^{n+1}p_n(l,k)\nonumber

3591: \end{eqnarray}

3592:

3593: \end{lemma}

3594:

3595: \pf According to the decoding rule, $\svxhat^{n-\delay}\neq

3596: \svx^{n-\delay}$ implies that there exists a sequence $\svxtil^n\in

3597: \mathcal{W}_n^x$ s.t.$\svxtil^{n-\delay}\neq x^{n-\delay}$. This

3598: means that there exists a sequence $\svytil^n\in

3599:  \binY(\svy^n)$, s.t. $i_x(\svxtil^n,\svytil^n)\geq

3600: i_x(\svx^n,\svy^n)$. Suppose that $(\svxtil^n,\svytil^n)\in

3601: \mathcal{F}_n(l,k , x^n,y^n)$, then $l\leq n-\delay $ because

3602: $\svxtil^{n-\delay}\neq x^{n-\delay}$. By the definition of $i_x$,

3603: we know that $H_S(l,k,\svxtil^n,\svytil^n)\leq H_S(l,k, x^n,y^n )$.

3604: And using the union bound argument we get the desired inequality.

3605: \hfill $\blacksquare$

3606:

3607:

3608: We only need to bound each single error probability $p_n(l,k)$ to

3609: finish the proof.

3610:

3611: \begin{lemma}{Upper bound on $p_n(l,k)$, $l\leq k$:} $\forall \gamma>0$, $\exists  K_1 <

3612: \infty$, s.t.

3613: $$p_n(l,k)\leq \exp\{-(n-l+1) [E_{x}

3614: (\Rx, \Ry, \lambda) - \gamma]\}$$ where  $\lambda = (k-l)/(n-l+1)

3615: \in [0,1]$. \label{Lemma:UpperBoundon2_UN}

3616: \end{lemma}

3617: %

3618: \pf  Here the error probability $p_n(l,k)$  can be thought as

3619: starting from~(\ref{eq.indepBin}) with the condition $(k-l)

3620: H(\svxtil_{l}^{k-1}|\svytil_{l}^{k-1}) + (n-k+1) H(\svxtil_k^n,

3621: \svytil_{k}^n) < (k-l) H(\svx_{l}^{k-1}|\svy_{l}^{k-1}) + (n-k+1)

3622: H(\svx_k^n, \svy_{k}^n)$ substituted for $p(\svxtil_l^n,

3623: \svytil_l^n) > p(\svx_l^n, \svy_l^n)$, we get

3624: %

3625: \begin{align}

3626: %%

3627: p_n(l,k) = &

3628: %

3629: \sum_{\PNK, \PKL}

3630: %

3631: \sum_{\VNK, \VKL}

3632: %

3633: \sum_{\tiny \begin{array}{c}

3634: \svy_{l}^{k-1} \in \tclass_{\PKL},\\

3635: \svy_{k}^n \in \tclass_{\PNK}

3636: \end{array}}

3637: %

3638: \sum_{\tiny \begin{array}{c}

3639: \svx_l^{k-1} \in \tclass_{\VKL}(\svy_l^{k-1}), \\

3640: \svx_{k}^n \in \tclass_{\VNK(\svy_{k}^n)} \end{array}}

3641: \min \Big[1,

3642: %%

3643: %%\hspace{-1em}

3644: \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \; \mbox{s.t.}\\

3645: \minEntTil < \\\minEnt

3646: \end{array}} \nonumber\\

3647: %

3648: & \sum_{\svytil_{k}^n \in \tclass_{\PtilNK}} \sum_{\svxtil_{l}^{k-1}

3649: \in \tclass_{\VtilKL}(\svy_{l}^{k-1})} \sum_{\svxtil_{k}^n \in

3650: \tclass_{\VtilNK}(\svytil_{k}^n)} \exp\{-(n-l +1) \Rx - (n-k+1)

3651: \Ry\} \Big] p_{\rvx\rvy}(x^n, y^n) \label{eq.enumTil}

3652: \end{align}

3653: %

3654: In~(\ref{eq.enumTil}) we enumerate all the source sequences in a way

3655: that allows us to focus on the types of the important subsequences.

3656: We enumerate the possibly misleading candidate sequences in terms of

3657: their suffixes types.  We restrict the sum to those pairs

3658: $(\svxtil^n, \svytil^n)$ that could lead to mistaken decoding,

3659: defining the compact notation $\minEnt \defeq (k-l) H(\VKL|\PKL) +

3660: (n-k+1) H(\PNK \times \VNK)$, which is the weighted suffix entropy

3661: condition rewritten in terms of types.

3662:

3663: Note that the summations within the minimization

3664: in~(\ref{eq.enumTil}) do not depend on the arguments within these

3665: sums.  Thus, we can bound this sum separately to get a bound on the

3666: number of possibly misleading source pairs $(\svbxtil, \svbytil)$.

3667: %

3668: \begin{align}

3669: & \hspace{-5em}

3670: \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \;  \mbox{s.t.}\\

3671: \minEntTil < \\ \minEnt \end{array}}

3672: %

3673: \sum_{\svytil_{k}^n \in \tclass_{\PtilNK}}

3674: \sum_{\svxtil_{l}^{k-1} \in \tclass_{\VtilKL}(\svy_{l}^{k-1})}

3675: \sum_{\svxtil_{k}^n \in \tclass_{\VtilNK}(\svytil_{k}^n)} \nonumber\\

3676: %%

3677: %%

3678: %%

3679: & \leq \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK

3680: \; \mbox{s.t.}\\  \minEntTil < \\ \minEnt \end{array}}

3681: \sum_{\svytil_{k}^n \in \tclass_{\PtilNK}}

3682: |\tclass_{\VtilKL(\svy_{l}^{k-1})}|

3683: |\tclass_{\VtilNK(\svytil_{k}^n)}| \label{eq.condTypeSize}\\

3684: %%

3685: %%

3686: %%

3687: \leq & \sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \;

3688: \mbox{s.t.}\\

3689: \minEntTil < \\ \minEnt \end{array}}

3690: |\tclass_{\PtilNK}|

3691: \exp\{(k-l)H(\VtilKL|\PKL)\}

3692: \exp\{(n-k+1)H(\VtilNK|\PtilNK)\}

3693: \displaybreak[2]

3694: \label{eq.condTypeBnd}\\

3695: %%

3696: %%

3697: %%

3698: \leq &\sum_{\tiny \begin{array}{c}\VtilNK, \VtilKL, \PtilNK \; \mbox{s.t.}\\

3699: \minEntTil < \\ \minEnt \end{array}}

3700: %

3701: \exp\{(k-l)H(\VtilKL|\PKL) + (n-k+1) H(\PtilNK \times \VtilNK) \}

3702: \displaybreak[2]

3703: \label{eq.margTypeBnd} \displaybreak[2]\\

3704: %%

3705: %%

3706: %%

3707: \leq &  \sum_{\VtilNK, \VtilKL, \PtilNK }

3708: \exp\{(k-l)H(\VKL|\PKL) + (n-k+1) H(\PNK \times \VNK) \}

3709: \displaybreak[2] \label{eq.scoring} \displaybreak[2] \\

3710: %%

3711: %%

3712: %%

3713: \leq & \; (n-l+2)^{2 |\cX| |\cY|}

3714: \exp\{(k-l)H(\VKL|\PKL) + (n-k+1) H(\PNK \times \VNK) \}

3715: \label{eq.numTypes}

3716: \end{align}

3717: %

3718: In~(\ref{eq.condTypeSize}) we sum over all $\svxtil_{l}^{k-1} \in

3719: \tclass_{\VtilKL}(\svy_{l}^{k-1})$.

3720: In~(\ref{eq.condTypeBnd}) we use standard bounds, e.g., $|

3721: \tclass_{\VtilKL}(\svy_{l}^{k-1})| \leq \exp\{(k-l)

3722: H(\VtilKL|\PKL)\}$ since $\svy_{l}^{k-1} \in \tclass_{\PKL}$.

3723: We also sum over all $\svxtil_{k}^{n} \in

3724: \tclass_{\VtilNK}(\svytil_{k}^{n})$ and over all

3725: $\svytil_{k}^n \in

3726: \tclass_{\PtilNK}$ in~(\ref{eq.condTypeBnd}).  By definition of the

3727: decoding rule $(\svbxtil, \svbytil)$ can only lead to a decoding error

3728: if $(k-l) H(\VtilKL|\PKL)] + (n-k+1) H(\PtilNK \times \VtilNK) <

3729: (k-l)H(\VKL|\PKL) + (n-k+1) H(\PNK \times \VNK)$.

3730: In~(\ref{eq.numTypes}) we apply the polynomial bound on the number of

3731: types.

3732:

3733:

3734: We substitute~(\ref{eq.numTypes}) into~(\ref{eq.enumTil}) and pull out

3735: the polynomial term, giving

3736: %

3737: %

3738: %%

3739: \begin{align}

3740: &\hspace{-1em} p_n(l,k) \leq

3741: (n-l+2)^{2 |\cX| |\cY|}

3742: %

3743: \sum_{\PNK, \PKL}

3744: %

3745: \sum_{\VNK, \VKL}

3746: %

3747: \sum_{\tiny \begin{array}{c}

3748: \svy_{l}^{k-1} \in \tclass_{\PKL},\\

3749: \svy_{k}^n \in \tclass_{\PNK}

3750: \end{array}}

3751: %

3752: \sum_{\tiny \begin{array}{c}

3753: \svx_l^{k-1} \in \tclass_{\VKL}(\svy_l^{k-1}), \\

3754: \svx_{k}^n \in \tclass_{\VNK(\svy_{k}^n)} \end{array}}

3755: \nonumber \\

3756: %

3757: &\min \Big[1, \exp\{-(k-l)[\Rx - H(\VKL|\PKL)]

3758: - (n-k+1) [\Rx + \Ry - H(\VNK \times \PNK)] \} \Big]

3759: \jointSource{l}{n}{l}{n} \nonumber \\

3760: %%

3761: %%

3762: \leq &

3763: (n-l+2)^{2 |\cX| |\cY|}

3764: %

3765: \sum_{\PNK, \PKL}

3766: %

3767: \sum_{\VNK, \VKL} \nonumber \\

3768: %

3769: & \exp\Big\{\max \Big[0, -(k-l)[\Rx - H(\VKL|\PKL)]

3770: - (n-k+1)  [\Rx + \Ry - H(\VNK \times \PNK)] \Big]\Big\}

3771: \nonumber \\

3772: %

3773: &\exp\left\{-(k-l)D(\VKL \times \PKL \| \PxyRV)

3774: - (n-k+1) D(\VNK \times \PNK \| \PxyRV) \right\}

3775: \label{eq.srcProb} \displaybreak[2]\\

3776: %%

3777: %%

3778: \leq &

3779: (n-l+2)^{2 |\cX| |\cY|}

3780: %

3781: \sum_{\PNK, \PKL}

3782: %

3783: \sum_{\VNK, \VKL}

3784: %

3785: \exp\Big\{-(n-l+1) \Big[\lambda D(\VKL \times \PKL \| \PxyRV)

3786: + \bar{\lambda} D(\VNK \times \PNK \| \PxyRV)

3787: \nonumber \\

3788: %

3789: &+ \left|\lambda [\Rx - H(\VKL|\PKL)]

3790: + \bar{\lambda}  [\Rx + \Ry - H(\VNK \times \PNK)]\right|^{+}

3791: \Big] \Big\}

3792: \label{eq.combineDiv} \displaybreak[2]\\

3793: %%

3794: %%

3795: %%

3796: \leq &

3797: (n-l+2)^{2 |\cX| |\cY|}

3798: %

3799: \sum_{\PNK, \PKL}

3800: %

3801: \sum_{\VNK, \VKL}

3802: %

3803: \exp \Big\{-(n-l+1) \inf_{\tiny \rvxtil, \rvytil,

3804: \rvxBar, \rvyBar}

3805: \Big[\lambda D(p_{\rvxtil, \rvytil} \| \PxyRV)

3806: + \bar{\lambda} D(p_{\rvxBar, \rvyBar} \| \PxyRV) \nonumber \\

3807: %

3808: &  + \left|\lambda [\Rx - H(\rvxtil|\rvytil)]

3809: + \bar{\lambda}

3810: [\Rx + \Ry - H(\rvxBar, \rvyBar)]\right|^{+} \Big] \Big\}

3811: \label{eq.infExp} \displaybreak[2]\\

3812: %%

3813: %%

3814: %%

3815: \leq &

3816: (n-l+2)^{4 |\cX| |\cY|} \exp\{-(n-l+1)  E_{x} (\Rx, \Ry, \lambda)\}

3817: %%

3818: \leq K_1 \exp\{-(n-l+1) [E_{x}

3819: (\Rx, \Ry, \lambda) - \gamma]\} \label{eq.defExp} \displaybreak[2]\\

3820: %%

3821: %\nonumber

3822: \end{align}

3823: %

3824: In~(\ref{eq.srcProb}) we use the memoryless property of the source,

3825: and exponential bounds on the probability of observing

3826: $(\svx_{l}^{k-1}, \svy_l^{k-1})$ and $(\svx_k^n, \svy_k^n)$.

3827: In~(\ref{eq.combineDiv}) we pull out $(n-l+1)$ from all terms,

3828: noticing that $\lambda = (k-l)/(n-l+1) \in [0,1]$ and $\bar{\lambda}

3829: \defeq 1- \lambda = (n-k+1)/(n-l+1)$.  In~(\ref{eq.infExp}) we

3830: minimize the exponent over all choices of distributions $p_{\rvxtil,

3831: \rvytil}$ and $p_{\rvxBar, \rvyBar}$.  In~(\ref{eq.defExp}) we

3832: define the universal random coding exponent $E_{x}(\Rx, \Ry,

3833: \lambda) \defeq \inf_{\tiny \rvxtil, \rvytil, \rvxBar, \rvyBar} \{

3834: \lambda  D(p_{\rvxtil,\rvytil} \| \PxyRV) + \bar{\lambda}

3835: D(p_{\rvxBar, \rvyBar} \| \PxyRV) + \left|\lambda [\Rx -

3836: H(\rvxtil|\rvytil)] + \bar{\lambda} [\Rx + \Ry - H(\rvxBar,

3837: \rvyBar)]\right|^{+}\}$ where $0 \leq \lambda \leq 1$ and

3838: $\bar{\lambda} = 1 - \lambda$.  We also incorporate the number of

3839: conditional and marginal types into the polynomial bound, as well as

3840: the sum over $k$, and then push the polynomial into the exponent

3841: since for any polynomial $F$, $\forall E, \epsilon >0$, there exists

3842: $C>0$, s.t. $F(\delay)e^{-\delay E}\leq Ce^{-\delay(E-\epsilon)}$ .

3843: \hfill $\blacksquare$

3844:

3845: A similar derivation yields a bound on $p_n(l, k)$ for $l \geq k$.

3846:

3847:

3848:

3849: Combining Lemmas \ref{Lemma:UpperBoundon2_UN} and

3850: \ref{Lemma_3_UNI_SW}, and then following the same derivation for ML

3851: decoding yields Theorem~\ref{thm.jointCode}.

3852:

3853:

3854: %%%%%%%%%%%%%%%%%%%%

3855: \section{Future Directions}

3856:

3857: \subsection{Stationary-ergodic sources and universality}

3858:

3859: \cite{cover:75} extends the block-coding proofs to the Slepian-Wolf

3860: problem for stationary-ergodic sources using AEP arguments. To have a

3861: similar extension to the streaming context, possibly additional

3862: regularity conditions will be required so that error exponents can be

3863: achieved. To achieve universality over sources, it is possible that

3864: further technical restrictions will be required. For the case of

3865: distributed Markov sources however, it seems quite clear that all the

3866: arguments in this paper will easily generalize. In that case,

3867: following the approach we take in \cite{SahaiUnstable}, the source can

3868: be ``segmented'' into small blocks and the endpoints\footnote{For a

3869:   Markov source of known order $k$, the endpoint is just $k$

3870:   successive symbols at the end of the block.} of the blocks can be

3871: encoded perfectly at essentially zero rate. Conditioned on these

3872: endpoints, the blocks are then iid, with the endpoints representing a

3873: third stream of perfectly known side-information.

3874:

3875: \subsection{Upper bounds and demonstrating optimal delays}

3876:

3877: This paper dealt entirely with achievability of certain error

3878: exponents. Ideally, we would have corresponding upper bounds

3879: demonstrating that no higher exponents are possible. In the

3880: block-coding case, problem 3.7.1 in \cite{csiszarKorner} provides a

3881: simple upper-bound. However, the nature of the error exponents in the

3882: streaming case might be more complicated. \cite{Chang:06} provides an

3883: upper bound and matching achievable scheme for point-to-point

3884: source-coding with delay and this bound extends naturally to the case

3885: where side-information is known at both the encoder and the decoder.

3886: \cite{ChangISIT:06} provides an upper bound for the case of

3887: side-information known only at the decoder, and this bound is tight

3888: for certain symmetric cases. However, both of these extended single

3889: encoder arguments from \cite{SahaiBlockLength} that do not immediately

3890: generalize to the case of multiple encoders.

3891:

3892: \subsection{Trading off error exponents for the different source terminals}

3893: For multiple terminal systems, different error exponents can be

3894: achieved for different users or sources. For channel coding, the

3895: encoders can choose different distributions while generating the

3896: randomized code book to achieve an error exponent trade-off among

3897: different users. In \cite{Weng:05}, the error exponent region is

3898: studied for the Gaussian multiple access channel and the broadcast

3899: channel within the block-coding paradigm. It is unclear whether

3900: similar tradeoffs are possible within the streaming Slepian Wolf

3901: problems considered here since there is nothing immediately comparable

3902: to the flexibility we have in choosing the ``input distribution'' for

3903: channel coding problems.

3904:

3905: \subsection{Adaptation and limited feedback}

3906: An interesting extension is to adaptive universal streaming Slepian

3907: Wolf encoders.  The decoders we use in this paper are based on

3908: empirical statistics.  Therefore they can be used even if source

3909: statistics are unknown.  The current proposal will work regardless of

3910: source and side information statistics as long as the conditional

3911: entropy $H(\rvx|\rvy)$ is less than the encoding rate.  Even if there

3912: is uncertainty in statistics, the anytime nature of the coding system

3913: should enable the system to adapt on-line to the unknown entropy rate

3914: if some feedback channel is available.  The feedback channel would be

3915: used to order increases (or decreases) in the binning rate.  An

3916: increase (or decrease) could be triggered by examining the difference

3917: between two quantities: the minimal empirical joint entropy between

3918: the decoded sequence and observation, and the empirical joint entropy

3919: between the particular sequence and observation yielding the

3920: second-lowest joint entropy.  If there is a large difference between

3921: these two entropies, we are using rate excessively, and the rate of

3922: communication can be reduced.  If the difference is negligible, then

3923: it's likely we are not decoding correctly.  Our target should be to

3924: keep this difference at roughly $\epsilon$.  In the current context,

3925: this is analogous to the rate margin by which we choose to exceed the

3926: known conditional entropy.

3927:

3928: \section*{Acknowledgments}

3929: The authors wish to acknowledge a desire expressed by Zixiang Xiong

3930: and subsequent hallway discussions during ITW 2004 that helped

3931: precipitate the current line of research. This work was supported in

3932: part by NSF ITR Grant No.~CNS-0326503.

3933:

3934: \appendix

3935:

3936: \newcommand{\pBar}{\bar{p}}

3937:

3938: \section{Proof of Theorem \ref{THM:Universal_ML_SW}}

3939:

3940: In this section we show that the maximum likelihood (ML) error

3941: exponent equals the universal error exponent.  We show that for all

3942: $\gamma$,

3943: $$E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$$

3944: Where the ML error exponent:

3945: \begin{eqnarray}\label{eqn:LEMMAAPPDC0_SW}

3946: E^{ML}_x(R_x,R_y,\gamma)&=&\sup_{\rho\in[0,1]}\{\gamma

3947: E_{x|y}(R_x,\rho)+(1-\gamma)E_{xy}(R_x,R_y,\rho)\}\nonumber\\

3948: &=&\sup_{\rho\in[0,1]}\{\rho R^{(\gamma)} -\gamma \log(\sum_{y

3949: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})-

3950: (1-\gamma)(1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})\}\nonumber\\

3951: &=&\sup_{\rho\in[0,1]}\{E^{ML}_x(R_x,R_y,\gamma,\rho)\}\nonumber

3952: \end{eqnarray}

3953:

3954: Write the function inside the $\sup$ argument as

3955: $E^{ML}_x(R_x,R_y,\gamma,\rho)$. The universal error exponent:

3956: \begin{eqnarray}

3957: E^{UN}_x(R_x,R_y,\gamma)&=&\inf_{ q_{xy},o_{xy}} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\

3958: &&+\max\{0,\gamma (R_x-H(q_{x|y}))

3959: +(1-\gamma)(R_x+R_y-H(o_{xy}))\}\}\nonumber\\

3960: &=&\inf_{ q_{xy},o_{xy}} \{\gamma

3961: D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max\{0,R^{(\gamma)}-\gamma

3962: H(q_{x|y}) -(1-\gamma)H(o_{xy})\}\}\nonumber

3963: \end{eqnarray}

3964: Here we define $R^{(\gamma)}=\gamma R_x +(1-\gamma)(R_x+R_y)>\gamma

3965: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})$. For notational

3966: simplicity, we write $q_{xy}$ and $o_{xy}$ as two arbitrary joint

3967: distributions on $\mathcal{X}\times\mathcal{Y}$ instead of

3968: $p_{\rvxBar\rvyBar}$ and $p_{\rvxBBar\rvyBBar}$. We still write

3969: $p_{\rvx\rvy}$ as the distribution of the source.

3970:

3971:

3972:

3973: Before the proof, we define a pair of distributions that we will need. \\

3974: %------------------------------------ Definitions ------------------------------------------

3975:

3976: \begin{defn}{Tilted distribution of $p_{\rvx\rvy}$}: $p^\rho_{\rvx\rvy}$, for all $ \rho\in [-1,\infty)$

3977:

3978: $$p^\rho_{\rvx\rvy}(x,y)=\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_t\sum_s

3979: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}}$$ The entropy of the tilted

3980: distribution is written as $H(p^\rho_{\rvx\rvy})$. Obviously

3981: $p^0_{\rvx\rvy}=p_{\rvx\rvy}$.\\

3982: \end{defn}

3983:

3984:

3985: \begin{defn}   {$\rvx-\rvy$ tilted distribution of $p_{\rvx\rvy}$}: $\pBar^\rho_{\rvx\rvy}$, for all $\rho \in

3986: [-1,+\infty)$

3987: \begin{eqnarray}

3988:  \pBar^\rho_{\rvx\rvy}(x,y) &=&\frac{[\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}]^{1+\rho}}{\sum_t[\sum_s p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}]^{1+\rho}}\times\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}} \nonumber\\

3989: &=&\frac{A(y,\rho)}{B(\rho)}\times\frac{C(x,y,\rho)}{D(y,\rho)}\nonumber

3990: \end{eqnarray}

3991: Where

3992: \begin{eqnarray}

3993: A(y,\rho)&=&[\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}]^{1+\rho}=D(y,\rho)^{1+\rho}\nonumber\\

3994: B(\rho)&=& \sum_s[\sum_t p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}]^{1+\rho} = \sum_y A(y,\rho) \nonumber\\

3995: C(x,y,\rho)&=&p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}\nonumber\\

3996: D(y,\rho)&=&\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}} =\sum_x

3997: C(x,y,\rho)\nonumber

3998: \end{eqnarray}

3999: \end{defn}

4000:

4001: The marginal distribution for $\rvy$ is $\frac{A(y,\rho)}{B(\rho)}$.

4002: Obviously $\pBar^0_{\rvx\rvy}=p_{\rvx\rvy}$. Write the conditional

4003: distribution of $\rvx$ given $\rvy$ under distribution

4004: $\pBar^\rho_{\rvx\rvy}$ as $\pBar^\rho_{\rvx|\rvy}$, where

4005: $\pBar^\rho_{\rvx|\rvy}(x,y)=\frac{C(x,y,\rho)}{D(y,\rho)}$,  and

4006: the conditional entropy of $\rvx$ given $\rvy$ under distribution

4007: $\pBar^\rho_{\rvx\rvy}$ as

4008: $H(\pBar^\rho_{\rvx|\rvy})$. Obviously $H(\pBar^0_{\rvx|\rvy})=H(p_{\rvx|\rvy})$.\\

4009: The conditional  entropy of $\rvx$ given $\svy$ for the $\rvx-\rvy$

4010: tilted distribution is

4011: $$ H(\pBar^\rho_{\rvx|\rvy=\svy})=-\sum_x

4012: \frac{C(x,y,\rho)}{D(y,\rho)}\log(\frac{C(x,y,\rho)}{D(y,\rho)})$$\\

4013:

4014: We introduce $ A(y,\rho)$, $ B(\rho)$, $ C(x, y,\rho)$, $ D(y,\rho)$

4015: to simplify the notations. Some of their properties are shown in

4016: Lemma~\ref{LEMMAAPP1_SI}.

4017:

4018:

4019: While tilted distributions are common optimal distributions in large

4020: deviation theory, it is useful to contemplate why we need to introduce

4021: these {\em two} tilted distributions. In the proof of Theorem

4022: \ref{THM:Universal_ML_SW}, through a Lagrange multiplier argument, we

4023: will show that $\{p^\rho_{\rvx\rvy}:\rho\in [-1,+\infty)\}$ is the

4024: family of distributions that minimize the Kullback$-$Leibler distance

4025: to $p_{\rvx\rvy}$ with fixed \textit{entropy} and

4026: $\{\pBar^\rho_{\rvx\rvy}:\rho\in [-1,+\infty)\}$ is the family of

4027: distributions that minimize the Kullback$-$Leibler distance to

4028: $p_{\rvx\rvy}$ with fixed \textit{conditional entropy}. Using a

4029: Lagrange multiplier argument, we parametrize the universal error

4030: exponent $E^{UN}_x(R_x,R_y,\gamma)$ in terms of $\rho$ and show the

4031: equivalence of the universal and maximum likelihood error exponents.

4032: %-------------------------END of Definitions ------------------------------------------

4033:

4034: Now we are ready to prove Theorem~\ref{THM:Universal_ML_SW}:

4035: $E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$.

4036:

4037:

4038: \pf

4039:

4040:

4041: \subsection{case 1: $\gamma

4042: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})< R^{(\gamma)} < \gamma

4043: H(\pBar^1_{\rvx|\rvy} )+(1-\gamma)H(p^1_{\rvx\rvy}

4044: )$.}\label{case:1}

4045:

4046: First, from Lemma~\ref{LEMMA_APP10} and Lemma~\ref{LEMMA_APP11}:

4047:

4048:  $$\frac{\partial  E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial

4049: \rho }=R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy}

4050: )-(1-\gamma)H(p^\rho_{\rvx\rvy})$$

4051:

4052: Then, using Lemma~\ref{LEMMAAPP2} and Lemma~\ref{LEMMAAPP2_SI}, we

4053: have:

4054:

4055:  $$\frac{\partial^2  E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial

4056: \rho }  \leq 0$$.

4057:

4058: So  $\rho$ maximize $E^{ML}_x(R_x,R_y,\gamma,\rho)$,  if and only

4059: if:

4060:

4061: \begin{eqnarray}

4062: 0=\frac{\partial E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial

4063: \rho}=R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy}

4064: )-(1-\gamma)H(p^\rho_{\rvx\rvy})

4065: \end{eqnarray}

4066:

4067: Because $R^{(\gamma)}$ is in the interval $[\gamma

4068: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}), \gamma

4069: H(\pBar^1_{\rvx|\rvy})+(1-\gamma)H(p^1_{\rvx\rvy})]$ and the

4070: entropy functions monotonically-increase over $\rho$,

4071: we can find $\rho^*\in (0,1)$, s.t.

4072: $$\gamma H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})=R^{(\gamma)}$$

4073:

4074: Using  Lemma~\ref{LEMMA_APP8}  and Lemma~\ref{LEMMA_APP9} we get:

4075: \begin{eqnarray}

4076: E^{ML}_x(R_x,R_y,\gamma)&=&\gamma

4077: D(\pBar^{\rho^*}_{\rvx\rvy}\|p_{\rvx\rvy})+(1-\gamma)

4078: D(p^{\rho^*}_{\rvx\rvy}\|p_{\rvx\rvy})\label{eqn:ML_error_expression}

4079: \end{eqnarray}

4080: Where $\gamma

4081: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})=R^{(\gamma)}$

4082: , $\rho^*$ is generally unique because

4083:  both $H(\pBar^\rho_{\rvx|\rvy})$ and $H(p^\rho_{\rvx\rvy})$ are strictly increasing with

4084: $\rho$.\\

4085:

4086: Secondly

4087: \begin{eqnarray}\label{eqn:LEMMAAPPDC2_SW}

4088: & & E^{UN}_x(R_x,R_y,\gamma)\nonumber\\

4089: &=&\inf_{ q_{xy},o_{xy}} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max\{0,R^{(\gamma)}-\gamma H(q_{x|y}) -(1-\gamma)H(o_{xy})\}\}\nonumber\\

4090: &=&  \inf_{b} \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\

4091: &=&  \inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})}

4092: \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) +

4093: (1-\gamma)H(o_{xy})=b}\{\gamma

4094: D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\

4095: &&+\max(0,R^{(\gamma)}-b)\}\}\label{eqn:optimization_equality}

4096: \end{eqnarray}

4097: The last equality is true because, for $b< \gamma

4098: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})<R^{(\gamma)}$,

4099: \begin{eqnarray}

4100: &&\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\

4101: &\geq& 0 + R^{(\gamma)}-b \nonumber\\

4102: &=&\inf_{q_{xy},o_{xy}:H(q_{x|y})=H(p_{\rvx|\rvy}),H(o_{xy})=H(p_{\rvx\rvy})}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\

4103: &\geq&\inf_{q_{xy},o_{xy}:H(q_{x|y})=H(p_{\rvx|\rvy}),H(o_{xy})=H(p_{\rvx\rvy})}\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\

4104: &&+\max(0,R^{(\gamma)}-\gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}))\}\}\nonumber\\

4105: &\geq&\inf_{q_{xy},o_{xy}: \gamma H(q_{x|y})+(1-\gamma)H(o_{xy}) =\gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})  }\{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\

4106: &&+\max(0,R^{(\gamma)}-\gamma

4107: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}))\}\}\nonumber

4108: \end{eqnarray}

4109: Fixing $b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})$, the

4110: inner infimum in (\ref{eqn:optimization_equality}) is an

4111: optimization problem on $q_{xy}, o_{xy}$ with equality constraints

4112: $\sum_x\sum_y q_{xy}(x,y)=1$, $\sum_x\sum_y o_{xy}(x,y)=1$ and

4113: $\gamma H(q_{x|y})+(1-\gamma)H(o_{xy})=b$ and the obvious inequality

4114: constraints $ 0\leq q_{xy}(x,y)\leq 1, 0\leq o_{xy}(x,y)\leq 1,

4115: \forall x,y$. In the following formulation of the optimization

4116: problem, we relax one equality constraint to an inequality

4117: constraint $\gamma H(q_{x|y})+(1-\gamma)H(o_{xy})\geq b$ to make the

4118: optimization problem $convex$. It turns out later that the optimal

4119: solution to the relaxed problem is also the optimal solution to the

4120: original problem because $b\geq \gamma

4121: H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy}) $. The resulting

4122: optimization problem is:

4123: \begin{eqnarray}

4124: &&\inf_{q_{xy}, o_{xy}} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\} \nonumber\\

4125: &&\mbox{s.t.}\sum_x\sum_y  q_{xy}(x,y)=1\nonumber\\

4126: &&\sum_x\sum_y  o_{xy}(x,y)=1\nonumber\\

4127: && b- \gamma H(q_{x|y})-(1-\gamma)H(o_{xy})\leq 0 \nonumber\\

4128: && 0\leq q_{xy}(x,y)\leq 1, \ \ \forall (x,y)\in

4129:  \mathcal{X}\times\mathcal{Y}\nonumber\\

4130:  && 0\leq o_{xy}(x,y)\leq 1, \ \ \forall (x,y)\in

4131:  \mathcal{X}\times\mathcal{Y}\label{eqn:convex_opt_setup}

4132: \end{eqnarray}

4133: The above optimization problem is {\em convex} because the objective

4134: function and the inequality constraint functions are convex and the

4135: equality constraint functions are affine\cite{Boyd2004}.  The

4136: Lagrange multiplier function for this convex optimization problem is:

4137:

4138: \begin{eqnarray}

4139: &&L(q_{xy},o_{xy},\rho,\mu_1,\mu_2, {\nu}_1, {\nu}_2, {\nu}_3 , {\nu}_4)\nonumber\\

4140: &=& \gamma

4141: D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})\nonumber\\

4142: %

4143: %

4144: &&+\mu_1(\sum_x\sum_y  q_{xy}(x,y)-1) +\mu_2(\sum_x\sum_y

4145: o_{xy}(x,y)-1)\nonumber\\

4146: %

4147: %

4148: &&+\rho(b-\gamma H(q_{x|y})-(1-\gamma)H(o_{xy}))\nonumber\\

4149: %

4150: %

4151: &&+\sum_x\sum_y\big\{   {\nu}_1(x,y)(-q_{xy}(x,y))+

4152: {\nu}_2(x,y)(1-q_{xy}(x,y)) +{\nu}_3(x,y)(-o_{xy}(x,y))

4153: +{\nu}_4(x,y)(1-o_{xy}(x,y))\big\}\nonumber\\

4154: \end{eqnarray}

4155: Where $\rho,\mu_1,\mu_2$ are real numbers and ${\nu}_i\in R^{

4156: |\mathcal{X}||\mathcal{Y}|}$, $i=1,2,3,4$.

4157:

4158: According to the KKT conditions for convex

4159: optimization\cite{Boyd2004},  $q_{xy}, o_{xy}$  minimize the

4160: convex optimization problem in (\ref{eqn:convex_opt_setup}) if and

4161: only if the following conditions are simultaneously satisfied for

4162: some $q_{xy}$, $o_{xy}$, $\mu_1$, $\mu_2$, $\nu_1$, $\nu_2$,

4163: $\nu_3$, $\nu_4$ and $\rho$:

4164:

4165:

4166: \begin{eqnarray}

4167: 0&=&\frac{\partial L(q_{xy},o_{xy},\rho,\mu_1,\mu_2,{\nu}_1, {\nu}_2, {\nu}_3 , {\nu}_4)}{\partial q_{xy}(x,y)} \nonumber\\

4168: &=& \gamma[-\log (p_{\rvx\rvy}(x,y))+(1+\rho) (1+\log(q_{xy}(x,y)))+ \rho \log(\sum_{s}q_{xy}(s,y))] +\mu_1- \nu_1(x,y)- \nu_2(x,y)\nonumber\\

4169: %

4170: %

4171: %

4172: 0&=&\frac{\partial L(q_{xy},o_{xy},\rho,\mu_1,\mu_2,{\nu}_1, {\nu}_2, {\nu}_3 , {\nu}_4)}{\partial o_{xy}(x,y)} \nonumber\\

4173: &=& (1-\gamma) [-\log (p_{\rvx\rvy}(x,y))+(1+\rho)

4174: (1+\log(o_{xy}(x,y)))]+\mu_2- \nu_3(x,y)- \nu_4(x,y)

4175: \label{eqn:multiplier1}

4176: \end{eqnarray}

4177: For all $x$, $y$ and

4178: %

4179: %

4180: \begin{eqnarray}

4181: &&\sum_x\sum_y  q_{xy}(x,y)=1\nonumber\\

4182: &&\sum_x\sum_y  o_{xy}(x,y)=1\nonumber\\

4183: &&\rho( \gamma H(q_{x|y})+(1-\gamma)H(o_{xy})-b)=0\nonumber\\

4184: && \rho \geq 0\nonumber\\

4185: && \nu_1(x,y) (-q_{xy}(x,y))=0, \ \ \  \nu_2(x,y) (1-q_{xy}(x,y))=0\

4186: \ \ \forall x,y \nonumber\\

4187: && \nu_3(x,y) (-o_{xy}(x,y))=0, \ \ \  \nu_4(x,y) (1-o_{xy}(x,y))=0\

4188: \ \ \forall x,y \nonumber\\

4189: &&\nu_i(x,y)\geq 0, \ \ \ \forall x,y, i=1,2,3,4

4190: \label{eqn:multiplier2}

4191: \end{eqnarray}

4192:

4193: Solving the above standard Lagrange multiplier equations

4194: (\ref{eqn:multiplier1}) and (\ref{eqn:multiplier2}), we have:

4195:

4196: \begin{eqnarray}

4197: q_{xy}(x,y)&=&\frac{[\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho_b}}]^{1+\rho_b}}{\sum_t[\sum_s p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho_b}}]^{1+\rho_b}}\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho_b}}}{\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho_b}}} \nonumber\\

4198: &=& {\pBar^{\rho_b}_{\rvx\rvy}(x,y)}\nonumber\\

4199: o_{xy}(x,y)&=&\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho_b}}}{\sum_t\sum_s p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho_b}}} \nonumber\\

4200: &=& {p^{\rho_b}_{\rvx\rvy}(x,y)}\nonumber\\

4201: %

4202: %

4203: \nu_i(x,y)&=&0\ \ \  \forall x,y, i=1,2,3,4\nonumber\\

4204: \rho &=&\rho_b

4205: \end{eqnarray}

4206: Where $\rho_b$ satisfies the following condition $$\gamma

4207: H(\pBar^{\rho_b}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho_b}_{\rvx\rvy})=b

4208: \geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})$$

4209: and thus

4210: $\rho_b\geq 0$ because both $H(\pBar^{\rho}_{\rvx|\rvy})$ and

4211: $H(p^{\rho}_{\rvx\rvy})$ are monotonically increasing with $\rho$ as

4212: shown in Lemma~\ref{LEMMAAPP2} and Lemma~\ref{LEMMAAPP2_SI}.

4213:

4214: Notice that all the KKT conditions are simultaneously satisfied with

4215: the inequality constraint $\gamma H(q_{x|y})+(1-\gamma)H(o_{xy})\geq

4216: b$ being met with equality. Thus, the relaxed optimization problem has

4217: the same optimal solution as the original problem as promised. The

4218: optimal $q_{xy}$ and $o_{xy}$ are the $\rvx-\rvy$ tilted distribution

4219: $\pBar^{\rho_b}_{\rvx\rvy}$ and standard tilted distribution

4220: $p^{\rho_b}_{\rvx\rvy}$ of $p_{\rvx\rvy}$ with the same parameter

4221: $\rho_b\geq 0$. chosen s.t.

4222: $$\gamma H(\pBar^{\rho_b}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho_b}_{\rvx\rvy})=b$$

4223:  Now we have :

4224: \begin{eqnarray}

4225: &&E^{UN}_x(R_x,R_y,\gamma)\nonumber\\

4226: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})} \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\

4227: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})}\{\gamma D(\pBar^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\nonumber\\

4228: &=& \min [\inf_{\rho\geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p_{\rvx\rvy_{\rho}}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})\},\nonumber\\

4229: &&\inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma

4230: H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma

4231: D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4232: )D(p_{\rvx\rvy_{\rho}}||p_{\rvx\rvy})\}]\label{eqn:big_two_conditions}

4233: \end{eqnarray}

4234: Notice that $H(p^\rho_{\rvx\rvy})$, $H(\pBar^\rho_{\rvx|\rvy})$,

4235: $D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})$ and

4236: $D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})$ are all strictly increasing with

4237: $\rho>0$ as shown in Lemma~\ref{LEMMAAPP2_SI},

4238: Lemma~\ref{LEMMAAPP3_SI}, Lemma~\ref{LEMMAAPP2} and

4239: Lemma~\ref{LEMMAAPP3} later in this appendix. We have:

4240: \begin{eqnarray}

4241: & & \inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma

4242: H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma

4243: D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4244: )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})\} \nonumber\\

4245: &=&\gamma

4246: D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4247: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\label{eqn:condition1}

4248: \end{eqnarray}

4249: where $R^{(\gamma)} =\gamma

4250: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})$.

4251: Applying the results in Lemma~\ref{LEMMAAPP4_SI} and

4252: Lemma~\ref{LEMMAAPP4}, we get:

4253: \begin{eqnarray}

4254: &&\inf_{\rho \geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})\}\nonumber\\

4255: &&=\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4256: )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy}) +R^{(\gamma)}-\gamma

4257: H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})|_{\rho=\rho^*}\nonumber\\

4258: &&=\gamma D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4259: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\label{eqn:condition2}

4260: \end{eqnarray} This is true because for $\rho : R^{(\gamma)} \geq

4261: \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})$,

4262: we know $\rho\leq 1$ because of the range of $R^{(\gamma)}$:

4263: $R^{(\gamma)} < \gamma H(\pBar^1_{\rvx|\rvy}

4264: )+(1-\gamma)H(p^1_{\rvx\rvy} )$. Substituting (\ref{eqn:condition1})

4265: and (\ref{eqn:condition2}) into (\ref{eqn:big_two_conditions}), we

4266: get

4267: \begin{eqnarray}

4268: E^{UN}_x(R_x,R_y,\gamma)&=&\gamma

4269: D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4270: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\nonumber\\

4271: && \mbox{where }  \ \ R^{(\gamma)} =\gamma

4272: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})

4273: \end{eqnarray}

4274: So for $\gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})\leq

4275: R^{(\gamma)} \leq \gamma

4276: H(\pBar^1_{\rvx|\rvy})+(1-\gamma)H(p^1_{\rvx\rvy})$, from

4277: (\ref{eqn:ML_error_expression}) we have the desired property:

4278: $$E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$$

4279:

4280:

4281: \subsection{case 2: $ R^{(\gamma)} \geq \gamma

4282: H(\pBar^1_{\rvx|\rvy})+(1-\gamma)H(p^1_{\rvx\rvy})$.}\label{case:2}

4283:

4284:

4285: In this case, for all $0\leq \rho\leq 1$

4286: $$\frac{\partial  E^{ML}_x(R_x,R_y,\gamma,\rho)}{\partial

4287: \rho }=R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy}

4288: )-(1-\gamma)H(p^\rho_{\rvx\rvy})\geq R^{(\gamma)}-\gamma

4289: H(\pBar^1_{\rvx|\rvy} )-(1-\gamma)H(p^1_{\rvx\rvy})\geq 0$$

4290:

4291: So $\rho$ takes value $1$ to maximize the error exponent

4292: $E^{ML}_x(R_x,R_y,\gamma,\rho)$, thus

4293: \begin{eqnarray}

4294: E^{ML}_x(R_x,R_y,\gamma)=R^{(\gamma)} -\gamma

4295: \log(\sum_{y}(\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{2}})^{2})-

4296: 2(1-\gamma)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{2}})

4297: \end{eqnarray}

4298:

4299: Using the same convex optimization techniques as case \ref{case:1}, we

4300: notice the fact that $\rho^*\geq 1$ for $R^{(\gamma)}

4301: =\gamma

4302: H(\pBar^{\rho^*}_{\rvx|\rvy})+(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})$.

4303: Then applying Lemma~\ref{LEMMAAPP4_SI} and Lemma~\ref{LEMMAAPP4}, we

4304: have:

4305: \begin{eqnarray}

4306:  &&\inf_{\rho\geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p_{\rvx\rvy_\rho})\},\nonumber\\

4307: &&=\gamma D(\pBar^1_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4308: )D(p^1_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma

4309: H(\pBar^{1}_{\rvx|\rvy})-(1-\gamma)H(p^1_{\rvx\rvy})\nonumber

4310: \end{eqnarray}

4311: And

4312: %%

4313: %%

4314: \begin{eqnarray}

4315: &&\inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})\}]\nonumber\\

4316: &&=\gamma D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4317: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})\nonumber\\

4318: &&=\gamma D(\pBar^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4319: )D(p^{\rho^*}_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma

4320: H(\pBar^{\rho^*}_{\rvx|\rvy})-(1-\gamma)H(p^{\rho^*}_{\rvx\rvy})\nonumber\\

4321: &&\leq \gamma D(\pBar^1_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4322: )D(p^1_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma

4323: H(\pBar^{1}_{\rvx|\rvy})-(1-\gamma)H(p^1_{\rvx\rvy})\nonumber

4324:  \end{eqnarray}

4325:

4326: Finally:

4327: \begin{eqnarray}

4328: &&E^{UN}_x(R_x,R_y,\gamma)\nonumber\\

4329: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})} \{\inf_{q_{xy},o_{xy}:\gamma H(q_{x|y}) + (1-\gamma)H(o_{xy})=b} \{\gamma D(q_{xy}||p_{\rvx\rvy})+(1-\gamma)D(o_{xy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\}\nonumber\\

4330: &=&\inf_{b\geq \gamma H(p_{\rvx|\rvy})+(1-\gamma)H(p_{\rvx\rvy})}\{\gamma D(\pBar^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^{\rho_b}_{\rvx\rvy}||p_{\rvx\rvy})+\max(0,R^{(\gamma)}-b)\}\nonumber\\

4331: &=&\min [\inf_{\rho\geq 0: R^{(\gamma)} \geq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma H(\pBar^\rho_{\rvx|\rvy})-(1-\gamma)H(p^\rho_{\rvx\rvy})\},\nonumber\\

4332: &&\inf_{\rho \geq 0: R^{(\gamma)} \leq \gamma H(\pBar^\rho_{\rvx|\rvy})+(1-\gamma)H(p^\rho_{\rvx\rvy})}\{\gamma D(\pBar^\rho_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma )D(p^\rho_{\rvx\rvy}||p_{\rvx\rvy})\}]\nonumber\\

4333: &=&\gamma D(\pBar^1_{\rvx\rvy}||p_{\rvx\rvy})+(1-\gamma

4334: )D(p^1_{\rvx\rvy}||p_{\rvx\rvy})+R^{(\gamma)}-\gamma

4335: H(\pBar^1_{\rvx|\rvy})-(1-\gamma)H(p^1_{\rvx\rvy})\nonumber\\

4336: &=&R^{(\gamma)} -\gamma \log(\sum_{y }(\sum_{x

4337: }p_{\rvx\rvy}(x,y)^{\frac{1}{2}})^{2})- 2(1-\gamma)\log(\sum_{y

4338: }\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{2}})

4339: \end{eqnarray}

4340: The last equality is true by setting $\rho =1$ in

4341: Lemma~\ref{LEMMA_APP8} and Lemma~\ref{LEMMA_APP9}.

4342:

4343:

4344: Again,  $E^{ML}_x(R_x,R_y,\gamma)=E^{UN}_x(R_x,R_y,\gamma)$, thus we finish the

4345: proof.\hfill$\blacksquare$\\

4346:

4347:

4348:

4349: %------------------------------------ Lemmas ------------------------------------------

4350: \subsection{Technical Lemmas}

4351: Some technical lemmas we used in the above proof of

4352: Theorem~\ref{THM:Universal_ML_SW} are now discussed:

4353:

4354: \begin{lemma}\label{LEMMAAPP2}

4355: $\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho}\geq0$

4356: \end{lemma}

4357: \pf  From the definition of the tilted distribution we have the

4358: following observation:

4359:

4360: $\log(p^\rho_{\rvx\rvy}(x_1,y_1))-\log

4361: (p^\rho_{\rvx\rvy}(x_2,y_2))=\log(p_{\rvx\rvy}(x_1,y_1)^{\frac{1}{1+

4362:  \rho}})-\log(p_{\rvx\rvy}(x_2,y_2)^{\frac{1}{1+

4363:  \rho}})$\\ Using the above equality, we first derive the derivative

4364:  of the tilted distribution,  for all  $x,y$

4365:

4366: \begin{eqnarray}

4367:  \frac{\partial p^\rho_{\rvx\rvy}(x,y)  }{\partial \rho}

4368:  &=&\frac{-1}{(1+\rho)^2}

4369:  \frac{ p_{\rvx\rvy}(x,y)^{\frac{1}{1+

4370:  \rho}}\log(p_{\rvx\rvy}(x,y)) (\sum_t\sum_s

4371: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})}{(\sum_t\sum_s

4372: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})^2}

4373: \nonumber\\

4374: &&-\frac{-1}{(1+\rho)^2}

4375:  \frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+

4376:  \rho}} (\sum_t\sum_s

4377: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}

4378: \log(p_{\rvx\rvy}(s,t)))}{(\sum_t\sum_s

4379: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})^2}

4380: \nonumber\\

4381: &=&\frac{-1}{1+\rho}

4382:  p^\rho_{\rvx\rvy}(x,y)[ \log(p_{\rvx\rvy}(x,y)^{\frac{1}{1+

4383:  \rho}})-\sum_t\sum_s

4384: p^\rho_{\rvx\rvy}(s,t)\log(p_{\rvx\rvy}(s,t)^{\frac{1}{1+

4385:  \rho}})]

4386: \nonumber\\

4387: &=&\frac{-1}{1+\rho}

4388:  p^\rho_{\rvx\rvy}(x,y)[ \log(p^\rho_{\rvx\rvy}(x,y))-\sum_t\sum_s

4389: p^\rho_{\rvx\rvy}(s,t)\log(p^\rho_{\rvx\rvy}(s,t))]\nonumber\\

4390: &=&-\frac{p^\rho_{\rvx\rvy}(x,y)}{1+\rho}[\log(p^\rho_{\rvx\rvy}(x,y))+H(p^\rho_{\rvx\rvy})]

4391: \end{eqnarray}

4392: Then:

4393:

4394: \begin{eqnarray}

4395: \frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho}&=&-\frac{\partial \sum_{x,y} p^\rho_{\rvx\rvy}(x,y) \log( p^\rho_{\rvx\rvy}(x,y))}{\partial \rho}\nonumber\\

4396: &=&-\sum_{x,y} (1+\log(p^\rho_{\rvx\rvy}(x,y)))\frac{\partial p^\rho_{\rvx\rvy}(x,y)}{\partial \rho}\nonumber \\

4397: &=&\sum_{x,y} (1+\log(p^\rho_{\rvx\rvy}(x,y)))\frac{p^\rho_{\rvx\rvy}(x,y)}{1+\rho}(\log(p^\rho_{\rvx\rvy}(x,y))+H(p^\rho_{\rvx\rvy}))\nonumber\\

4398: &=&\frac{1}{1+\rho}\sum_{x,y} p^\rho_{\rvx\rvy}(x,y) \log(p^\rho_{\rvx\rvy}(x,y)) (\log(p^\rho_{\rvx\rvy}(x,y))+H(p^\rho_{\rvx\rvy}))\nonumber\\

4399: &=&\frac{1}{1+\rho}[\sum_{x,y}p^\rho_{\rvx\rvy}(x,y) (\log(p^\rho_{\rvx\rvy}(x,y)))^2-H(p^\rho_{\rvx\rvy})^2]\nonumber\\

4400: &=&\frac{1}{1+\rho}[\sum_{x,y} p^\rho_{\rvx\rvy}(x,y) (\log(p^\rho_{\rvx\rvy}(x,y)))^2\sum_{x,y} p^\rho_{\rvx\rvy}(x,y)-H(p^\rho_{\rvx\rvy})^2]\nonumber\\

4401: &\geq_{(a)}&\frac{1}{1+\rho}[(\sum_{x,y}p^\rho_{\rvx\rvy}(x,y) \log(p^\rho_{\rvx\rvy}(x,y)))^2-H(p^\rho_{\rvx\rvy})^2]\nonumber\\

4402: &=& 0

4403: \end{eqnarray}

4404: where (a) is true by the Cauchy-Schwartz inequality. \hfill$\blacksquare$\\

4405:

4406:

4407:

4408: \begin{lemma}\label{LEMMAAPP3}

4409: $\frac{\partial D(p^\rho_{\rvx\rvy}\|P)}{\partial

4410: \rho}=\rho\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho} $

4411: \end{lemma}

4412: \pf  As shown in Lemma~\ref{LEMMA_APP8} and Lemma~\ref{LEMMA_APP10}

4413: respectively:

4414: $$D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})=\rho

4415: H(p^\rho_{\rvx\rvy})-(1+\rho) \log(\sum_{x,y

4416: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})$$

4417: $$  H(p^\rho_{\rvx\rvy})=\frac{\partial (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial \rho} $$

4418:

4419: We have:

4420: \begin{eqnarray}

4421: \frac{\partial D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})}{\partial \rho}&=&

4422: H(p^\rho_{\rvx\rvy}) +\rho\frac{\partial

4423: H(p^\rho_{\rvx\rvy})}{\partial \rho}-\frac{\partial (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial \rho}\nonumber\\

4424: &=&  H(p^\rho_{\rvx\rvy}) +\rho\frac{\partial

4425: H(p^\rho_{\rvx\rvy})}{\partial \rho}-H(p^\rho_{\rvx\rvy})  \nonumber\\

4426: &=&\rho\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial \rho}

4427: \end{eqnarray}

4428:  \hfill$\blacksquare$\\

4429:

4430:

4431: \begin{lemma}\label{LEMMAAPP4}

4432: $sign\frac{\partial

4433: [D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(p^\rho_{\rvx\rvy})]}{\partial

4434: \rho}=sign(\rho-1)$.

4435:

4436: \end{lemma}

4437: \pf  Combining the results of the previous two lemmas, we have:

4438: \begin{eqnarray}

4439: &&\frac{\partial

4440: D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(p^\rho_{\rvx\rvy})}{\partial

4441: \rho}=(\rho-1)\frac{\partial H(p^\rho_{\rvx\rvy})}{\partial

4442: \rho}=sign(\rho-1)\nonumber

4443: \end{eqnarray} \hfill$\blacksquare$\\

4444:

4445:

4446: \begin{lemma}\label{LEMMAAPP1_SI} Properties of

4447: $\frac{\partial A(y,\rho)}{\partial \rho}$, $\frac{\partial

4448: B(\rho)}{\partial \rho}$,  $\frac{\partial C(x, y,\rho)}{\partial

4449: \rho}$, $\frac{\partial D(y,\rho)}{\partial \rho}$ and

4450: $\frac{\partial H(\pBar^\rho_{\rvx|\rvy=\svy})}{\partial \rho}$

4451: \end{lemma}

4452:

4453: First,

4454: \begin{eqnarray}

4455: \frac{\partial C(x, y,\rho)}{\partial \rho}&=&\frac{\partial p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\partial \rho}= -\frac{1}{1+\rho}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}} \log(p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})\nonumber\\

4456: &=&-\frac{C(x,y,\rho)}{1+\rho}\log(C(x,y,\rho))\nonumber\\

4457: \frac{\partial D(y,\rho)}{\partial \rho}&=&\frac{\partial \sum_ s

4458: p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}}{\partial

4459: \rho}=-\frac{1}{1+\rho}\sum_s p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}}

4460: \log(p_{\rvx\rvy}(s,y)^{\frac{1}{1+\rho}})\nonumber\\&=&-\frac{\sum_x

4461: C(x,y,\rho)\log(C(x,y,\rho))}{1+\rho}

4462: \end{eqnarray}

4463:

4464: For a differentiable function $f(\rho)$,

4465: $$\frac{\partial f(\rho)^{1+\rho}}{\partial \rho}=f(\rho)^{1+\rho}\log(f(\rho))+ (1+\rho)f(\rho)^\rho\frac{\partial f(\rho)}{\partial

4466: \rho}$$ So

4467: \begin{eqnarray}

4468: \frac{\partial A(y,\rho)}{\partial \rho}&=&\frac{\partial D(y,\rho)^{1+\rho}}{\partial \rho}= D(y,\rho)^{1+\rho}\log( D(y,\rho))+ (1+\rho) D(y,\rho)^\rho\frac{\partial  D(y,\rho)}{\partial \rho}\nonumber\\

4469: &=&D(y,\rho)^{1+\rho}(\log(D(y,\rho))- \sum_x \frac{C(x,y,\rho)}{D(y,\rho)}\log(C(x,y,\rho)))\nonumber\\

4470: &=&D(y,\rho)^{1+\rho} (- \sum_x \frac{C(x,y,\rho)}{D(y,\rho)}\log(\frac{C(x,y,\rho)}{D(y,\rho))}))\nonumber\\

4471: &=& A(y,\rho) H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\

4472: \frac{\partial B(\rho)}{\partial \rho}&=&\sum_y \frac{\partial

4473: A(y,\rho)}{\partial \rho}=\sum_y

4474: A(y,\rho)H(\pBar^\rho_{\rvx|\rvy=\svy})=B(\rho)\sum_y

4475: \frac{A(y,\rho)}{B(\rho)}H(\pBar^\rho_{\rvx|\rvy=\svy})

4476: =B(\rho)H(\pBar^\rho_{\rvx|\rvy})\nonumber

4477: \end{eqnarray}

4478: And last:

4479:

4480: \begin{eqnarray}

4481: & & \frac{\partial H(\pBar^\rho_{\rvx|\rvy=\svy})}{\partial

4482: \rho} \nonumber\\

4483: &=&-\sum_x [\frac{\frac{\partial C(x,y,\rho)}{\partial \rho}}{D(y,\rho)}-\frac{C(x,y,\rho)\frac{\partial D(y,\rho)}{\partial \rho}}{D(y,\rho)^2}][1+\log(\frac{C(x,y,\rho)}{D(y,\rho)})]\nonumber\\

4484:  &=&-\sum_x

4485:  [\frac{-\frac{C(x,y,\rho)}{1+\rho}\log(C(x,y,\rho))}{D(y,\rho)}+\frac{C(x,y,\rho)\frac{\sum_s

4486: C(s,y,\rho)\log(C(s,y,\rho))}{1+\rho}}{D(y,\rho)^2}][1+\log(\frac{C(x,y,\rho)}{D(y,\rho)})]\nonumber\\

4487:  &=&\frac{1}{1+\rho}\sum_x

4488:  [\pBar^\rho_{\rvx|\rvy}(x,y)\log(C(x,y,\rho))-\pBar^\rho_{\rvx|\rvy}(x,y) \sum_s

4489: \pBar^\rho_{\rvx|\rvy}(s,y)\log(C(s,y,\rho))][1+\log(\pBar^\rho_{\rvx|\rvy}(x,y))]\nonumber\\

4490:  &=&\frac{1}{1+\rho}\sum_x

4491:  \pBar^\rho_{\rvx|\rvy}(x,y)[\log(\pBar^\rho_{\rvx|\rvy}(x,y))- \sum_s

4492: \pBar^\rho_{\rvx|\rvy}(s,y)\log(\pBar^\rho_{\rvx|\rvy}(s,y))][1+\log(\pBar^\rho_{\rvx|\rvy}(x,y))]\nonumber\\

4493:  &=&\frac{1}{1+\rho}\sum_x

4494:  \pBar^\rho_{\rvx|\rvy}(x,y)\log(\pBar^\rho_{\rvx|\rvy}(x,y))[\log(\pBar^\rho_{\rvx|\rvy}(x,y))- \sum_s

4495: \pBar^\rho_{\rvx|\rvy}(s,y)\log(\pBar^\rho_{\rvx|\rvy}(s,y))]\nonumber\\

4496:  &=&\frac{1}{1+\rho}\sum_x

4497:  \pBar^\rho_{\rvx|\rvy}(x,y)\log(\pBar^\rho_{\rvx|\rvy}(x,y))\log(\pBar^\rho_{\rvx|\rvy}(x,y)) -\frac{1}{1+

4498:  \rho} [\sum_x

4499: \pBar^\rho_{\rvx|\rvy}(x,y)\log(\pBar^\rho_{\rvx|\rvy}(x,y))]^2\nonumber\\

4500: &\geq& 0

4501: \end{eqnarray}

4502: The inequality is true by the Cauchy-Schwartz inequality and by

4503: noticing that $\sum_x \pBar^\rho_{\rvx|\rvy}(x,y)=1$.  \hfill$\blacksquare$

4504:

4505: These properties will again be used in the proofs in the following

4506: lemmas.

4507:

4508: \begin{lemma}\label{LEMMAAPP2_SI}

4509: $\frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}\geq0$

4510: \end{lemma}

4511: \pf

4512: \begin{eqnarray}

4513: \frac{\partial \frac{A(y,\rho)}{B(\rho)}}{\partial \rho}&=&\frac{1}{B(\rho)^2}(\frac{\partial A(y,\rho)}{\partial \rho}B(\rho)-\frac{\partial B(\rho)}{\partial \rho}A(y, \rho))\nonumber\\

4514: &=&\frac{1}{B(\rho)^2}( A(y,\rho)H(\pBar^\rho_{\rvx|\rvy=\svy})B(\rho)- H(\pBar^\rho_{\rvx|\rvy})B(\rho)A(y,\rho))\nonumber\\

4515: &=&\frac{A(y,\rho)}{B(\rho)}( H(\pBar^\rho_{\rvx|\rvy=\svy})-

4516: H(\pBar^\rho_{\rvx|\rvy}))\nonumber

4517: \end{eqnarray}

4518: Now,

4519:

4520:

4521: \begin{eqnarray}

4522: \frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial\rho}&=& \frac{\partial}{\partial\rho}\sum_y\frac{A(y,\rho)}{B(\rho)}\sum_x \frac{C(x,y,\rho)}{D(y,\rho)}[-\log(\frac{C(x,y,\rho)}{D(y,\rho)})] \nonumber\\

4523: &=& \frac{\partial}{\partial\rho}\sum_y\frac{A(y,\rho)}{B(\rho)}H(\pBar^\rho_{\rvx|\rvy=\svy}) \nonumber\\

4524: &=& \sum_y\frac{A(y,\rho)}{B(\rho)}\frac{\partial H(\pBar^\rho_{\rvx|\rvy=\svy})}{\partial\rho} + \sum_y  \frac{\partial \frac{A(y,\rho)}{B(\rho)}}{\partial\rho}H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\

4525: &\geq& \sum_y  \frac{\partial \frac{A(y,\rho)}{B(\rho)}}{\partial\rho}H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\

4526: &=& \sum_y \frac{A(y,\rho)}{B(\rho)}( H(\pBar^\rho_{\rvx|\rvy=\svy})-H(\pBar^\rho_{\rvx|\rvy})) H(\pBar^\rho_{\rvx|\rvy=\svy})\nonumber\\

4527: &=& \sum_y \frac{A(y,\rho)}{B(\rho)} H(\pBar^\rho_{\rvx|\rvy=\svy})^2-H(\pBar^\rho_{\rvx|\rvy}) ^2\nonumber\\

4528: &=& (\sum_y \frac{A(y,\rho)}{B(\rho)} H(\pBar^\rho_{\rvx|\rvy=\svy})^2)(\sum_y \frac{A(y,\rho)}{B(\rho)})-H(\pBar^\rho_{\rvx|\rvy}) ^2\nonumber\\

4529: &\geq_{(a)}& (\sum_y \frac{A(y,\rho)}{B(\rho)} H(\pBar^\rho_{\rvx|\rvy=\svy}))^2-H(\pBar^\rho_{\rvx|\rvy}) ^2\nonumber\\

4530: &=&0

4531: \end{eqnarray}

4532: where (a) is again true by the Cauchy-Schwartz inequality. \hfill$\blacksquare$

4533:

4534:

4535:

4536: \begin{lemma}\label{LEMMAAPP3_SI}

4537: $\frac{\partial D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})}{\partial

4538: \rho}=\rho \frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}$

4539: \end{lemma}

4540: \pf As shown in Lemma~\ref{LEMMA_APP9} and Lemma~\ref{LEMMA_APP11}

4541: respectively:

4542: $$D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})=\rho H(\pBar^\rho_{\rvx|\rvy})- \log(\sum_{y

4543: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})$$

4544: $$  H(\pBar^\rho_{\rvx|\rvy})=\frac{\partial \log(\sum_{y

4545: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial

4546: \rho} $$

4547:

4548: We have:

4549: \begin{eqnarray}

4550: \frac{\partial D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})}{\partial

4551: \rho}&=&   H(\pBar^\rho_{\rvx|\rvy}) +\rho\frac{\partial

4552: H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}-\frac{\partial

4553: \log(\sum_{y

4554: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial \rho }\nonumber\\

4555: &=&  H(\pBar^\rho_{\rvx|\rvy}) +\rho\frac{\partial

4556: H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}-H(\pBar^\rho_{\rvx|\rvy})  \nonumber\\

4557: &=&\rho\frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial \rho}

4558: \end{eqnarray} \hfill$\blacksquare$

4559:

4560:

4561: \begin{lemma}\label{LEMMAAPP4_SI}

4562: $sign\frac{\partial

4563: [D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(\pBar^\rho_{\rvx|\rvy})]}{\partial

4564: \rho}=sign(\rho-1)$.

4565: \end{lemma}

4566: \pf Using the previous lemma, we get:

4567: \begin{eqnarray}

4568: &&\frac{\partial

4569: D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})-H(\pBar^\rho_{\rvx|\rvy})}{\partial

4570: \rho}=(\rho-1)\frac{\partial H(\pBar^\rho_{\rvx|\rvy})}{\partial

4571: \rho}\nonumber

4572: \end{eqnarray}

4573: Then by Lemma~\ref{LEMMAAPP2_SI}, we get the

4574: conclusion.\hfill$\blacksquare$\\

4575:

4576:

4577:

4578:

4579:

4580:

4581:

4582: \begin{lemma}\label{LEMMA_APP8}

4583:  $$\rho H(p^\rho_{\rvx\rvy})-(1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})=D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})$$

4584: \end{lemma}

4585: \pf  By noticing that

4586: $\log(p_{\rvx\rvy}(x,y))=(1+\rho)[\log(p^\rho_{\rvx\rvy}(x,y))+\log(\sum_{s,t}

4587: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})]$. We have:

4588: \begin{eqnarray}

4589:  D(p^\rho_{\rvx\rvy}\|p_{\rvx\rvy})&=&-H(p^\rho_{\rvx\rvy})-\sum_{x,y}p^\rho_{\rvx\rvy}(x,y)\log(p_{\rvx\rvy}(x,y))\nonumber\\

4590: &=&-H(p^\rho_{\rvx\rvy})-\sum_{x,y}p^\rho_{\rvx\rvy}(x,y)(1+\rho)[\log(p^\rho_{\rvx\rvy}(x,y))+\log(\sum_{s,t}

4591: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})]\nonumber\\

4592: &=&-H(p^\rho_{\rvx\rvy})+(1+\rho)H(p^\rho_{\rvx\rvy})-(1+\rho)\sum_{x,y}p^\rho_{\rvx\rvy}(x,y)\log(\sum_{s,t}

4593: p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})\nonumber\\

4594: &=&\rho

4595: H(p^\rho_{\rvx\rvy})-(1+\rho)\log(\sum_{s,t}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})\end{eqnarray}

4596:

4597:

4598: \hfill$\blacksquare$

4599:

4600:

4601:

4602: \begin{lemma}\label{LEMMA_APP9}

4603:  $$\rho H(\pBar^\rho_{\rvx|\rvy})- \log(\sum_{y

4604: }(\sum_{x

4605: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})=D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})$$

4606: \end{lemma}

4607: \pf

4608: \begin{eqnarray}

4609: D(\pBar^\rho_{\rvx\rvy}\|p_{\rvx\rvy})&=&\sum_y\sum_x \frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}\log(\frac{\frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}}{p_{\rvx\rvy}(x,y)})\nonumber\\

4610: &=& \sum_y\sum_x \frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}[\log(\frac{A(y,\rho)}{B(\rho)})+\log(\frac{C(x,y,\rho)}{D(y,\rho)})-\log(p_{\rvx\rvy}(x,y))]\nonumber\\

4611: &=& -\log(B(\rho)) - H(\pBar^\rho_{\rvx|\rvy}) + \sum_y\sum_x \frac{A(y,\rho)}{B(\rho)}\frac{C(x,y,\rho)}{D(y,\rho)}[\log(D(y,\rho)^{1+\rho})-\log(C(x,y,\rho)^{1+\rho})]\nonumber\\

4612: &=& -\log(B(\rho)) - H(\pBar^\rho_{\rvx|\rvy}) +(1+\rho) H(\pBar^\rho_{\rvx|\rvy})\nonumber\\

4613: &=& - \log(\sum_{y }(\sum_{x

4614: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho}) + \rho

4615: H(\pBar^\rho_{\rvx|\rvy}) \nonumber

4616: \end{eqnarray}

4617:  \hfill$\blacksquare$

4618:

4619: \begin{lemma}\label{LEMMA_APP10}

4620: $$  H(p^\rho_{\rvx\rvy})=\frac{\partial (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial \rho} $$

4621: \end{lemma}

4622: \pf

4623:

4624: \begin{eqnarray}

4625:  & & \frac{\partial

4626: (1+\rho)\log(\sum_{y}\sum_{x}p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})}{\partial

4627: \rho} \nonumber\\

4628:  &=&\log(\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}})-

4629: \sum_{y}\sum_{x}\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}}\log(p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})\nonumber\\

4630: &=& -

4631: \sum_{y}\sum_{x}\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}}\log(\frac{p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}}}{\sum_{t}\sum_{s}p_{\rvx\rvy}(s,t)^{\frac{1}{1+\rho}}})\nonumber\nonumber\\

4632: &=&H(p^\rho_{\rvx\rvy})

4633: \end{eqnarray}

4634:

4635:

4636:

4637:

4638: \hfill$\blacksquare$

4639: \begin{lemma}\label{LEMMA_APP11}

4640: $$  H(\pBar^\rho_{\rvx|\rvy})=\frac{\partial \log(\sum_{y

4641: }(\sum_{x }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial

4642: \rho} $$\\

4643: \end{lemma}

4644: \pf Notice that $B(\rho)=\sum_{y }(\sum_{x

4645: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho}$, and $

4646: \frac{\partial B(\rho)}{\partial \rho}

4647: =B(\rho)H(\pBar^\rho_{\rvx|\rvy})$ as shown in

4648: Lemma~\ref{LEMMAAPP1_SI}. It is clear that:

4649:

4650: \begin{eqnarray}

4651: \frac{\partial \log(\sum_{y }(\sum_{x

4652: }p_{\rvx\rvy}(x,y)^{\frac{1}{1+\rho}})^{1+\rho})}{\partial

4653: \rho}&=&\frac{\partial \log(B(\rho))}{\partial \rho}\nonumber\\

4654: &=& \frac{1}{B(\rho)}\frac{\partial B(\rho)}{\partial

4655: \rho}\nonumber\\

4656: &=&H(\pBar^\rho_{\rvx|\rvy})

4657: \end{eqnarray}

4658: \hfill$\blacksquare$

4659:

4660: %-------------------------       End of Lemmas       ------------------------------------------

4661:

4662: \bibliographystyle{IEEEtran}

4663: \bibliography{IEEEabrv,references}

4664:

4665: \end{document}

4666:

4667: % LocalWords:  Lossless IEEE Symp Sahai Cheng Anant iid Slepian

4668: % LocalWords:  lossless Slepian encodings cardinalities iid memoryless Lempel

4669: % LocalWords:  atypicality blocklengths tradeoffs Ziv convolutional multi

4670: % LocalWords:  Bolded Subsequences subsequences subsequence entropies

4671: % LocalWords:  lclcl jR iid convolutional Slepian xy lll

4672: % LocalWords:  Slepian BSC xy iff

4673: % LocalWords:  Chernoff iid exponentials

4674: % LocalWords:  Slepian Gallager's

4675: % LocalWords:  achievability exponentials

4676: % LocalWords:  memoryless Chernoff

4677: % LocalWords:  Slepian multiuser

4678: % LocalWords:  indices infimum lllll xy memoryless exponentials

4679: % LocalWords:  entropies subsequences memoryless

4680: % LocalWords:  Zixiang Xiong ITW ITR

4681: % LocalWords:  xy Kullback Leibler