0605:cs0605135/pp.tex

1: \documentclass[onecolumn,draftcls,dvips,letter]{IEEEtran}

2:

3:

4: \usepackage{amsmath,amssymb,epsfig,color}

5: \usepackage{graphicx,verbatim}

6: \usepackage[section]{placeins}

7: \usepackage{afterpage}

8: %\usepackage{isuthesis}

9:

10:

11: \newtheorem{definition}{Definition}

12: \newtheorem{theorem}{Theorem}

13: \newtheorem{lemma}{Lemma}

14: \newtheorem{proposition}{Proposition}

15: \newtheorem{corollary}{Corollary}

16: \newtheorem{remark}{Remark}

17:

18: \setlength{\unitlength}{1mm} \setlength\arraycolsep{2pt}

19:

20: \newcommand{\eps}{\epsilon}

21: \newcommand{\styp}{A^{*(n)}_{\eps}}

22: \newcommand{\stypd}{A^{*(n)}_{\delta}}

23: \newcommand{\stypdp}{A^{*(n)}_{\delta'}}

24: \newcommand{\stypm}{A^{*(m)}_{\delta}}

25: \newcommand{\stypp}{A^{*(n-m)}_{\eps'}}

26: \newcommand{\typ}{A_{\epsilon}^{(n)}}

27: \newcommand{\typm}{A_{\delta}^{(m)}}

28: \newcommand{\ttyp}{\tilde{A}_{\epsilon, \delta}^{(n)}}

29: \newcommand{\btyp}{\bar{A}_{\epsilon, \delta}^{(n)}}

30: \newcommand{\hP}{\hat{P}}

31: \newcommand{\hY}{\hat{Y}}

32: \newcommand{\hhY}{\hat{\hat{Y}}}

33: \newcommand{\hy}{\hat{y}}

34: \newcommand{\hhy}{\hat{\hat{y}}}

35: \newcommand{\hw}{\hat{w}}

36: \newcommand{\hm}{\hat{m}}

37: \newcommand{\hs}{\hat{s}}

38: \newcommand{\hq}{\hat{q}}

39: \newcommand{\mN}{\mathcal{N}}

40: \newcommand{\mC}{\mathcal{C}}

41: \newcommand{\mD}{\mathcal{D}}

42: \newcommand{\mS}{\mathcal{S}}

43: \newcommand{\mU}{\mathcal{U}}

44: \newcommand{\mV}{\mathcal{V}}

45: \newcommand{\mL}{\mathcal{L}}

46: \newcommand{\mA}{\mathcal{A}}

47: \newcommand{\mW}{\mathcal{W}}

48: \newcommand{\mX}{\mathcal{X}}

49: \newcommand{\mY}{\mathcal{Y}}

50: \newcommand{\mZ}{\mathcal{Z}}

51: \newcommand{\mM}{\mathcal{M}}

52: \newcommand{\mQ}{\mathcal{Q}}

53: \newcommand{\mR}{\mathcal{R}}

54: \newcommand{\mhY}{\hat{\mathcal{Y}}}

55: \newcommand{\mhhY}{\hat{\hat{\mathcal{Y}}}}

56: \newcommand{\tmL}{\tilde{\mathcal{L}}}

57: \newcommand{\xvec}{\mathbf{x}}

58: \newcommand{\yvec}{\mathbf{y}}

59: \newcommand{\Xvec}{\mathbf{X}}

60: \newcommand{\Yvec}{\mathbf{Y}}

61: \newcommand{\hYvec}{\hat{\mathbf{Y}}}

62: \newcommand{\tYvec}{\tilde{\mathbf{Y}}}

63: \newcommand{\tZvec}{\tilde{\mathbf{Z}}}

64: \newcommand{\tTvec}{\tilde{\mathbf{T}}}

65: \newcommand{\tXvec}{\tilde{\mathbf{X}}}

66: \newcommand{\uvec}{{\bf u}}

67: \newcommand{\vvec}{{\bf v}}

68: \newcommand{\svec}{{\bf s}}

69: \newcommand{\wvec}{{\bf w}}

70: \newcommand{\avec}{{\bf a}}

71: \newcommand{\hyvec}{\hat{\mathbf{y}}}

72: \newcommand{\hhyvec}{\hat{\hat{\mathbf{y}}}}

73: \newcommand{\Pe}{P_{e}^{(n)}}

74: \newcommand{\rend}{\hfill$\square$}

75: \newcommand{\tend}{\hfill$\blacksquare$}

76: \newcommand{\muvec}{\boldsymbol{\mu}}

77: \newcommand{\Rgood}{R_{x1}}

78: \newcommand{\Rbad}{R_{x2}}

79: \newcommand{\dvec}{\boldsymbol{\delta}}

80: \newcommand{\lvec}{\boldsymbol{\lambda}}

81: \newcommand{\cardY}{||\mY_1||}

82: \newcommand{\cardYY}{||\mY_2||}

83: \newcommand{\cardX}{||\mX||}

84: \newcommand{\cardS}{||\mS||}

85: \newcommand{\reals}{\mathbb{R}}

86: \newcommand{\Bt}{\mbox{Bin}_N(\theta)}

87: \newcommand{\Bl}{\mbox{Bin}(l)}

88: \newcommand{\Bj}{\mbox{Bin}_{L_i'}(j)}

89: \newcommand{\Blp}{\mbox{Bin}(l')}

90: \newcommand{\sigR}{\sigma_1^2}

91: \newcommand{\sigW}{\sigma_W^2}

92: \newcommand{\sigD}{\sigma^2}

93: \newcommand{\sigQ}{\sigma_Q^2}

94: \newcommand{\sigQs}{\sigma_Q^{2*}}

95: \newcommand{\nQ}{N_Q}

96: \newcommand{\negdista}{\!\!\!\!\!\!\!\!\!\!}

97: \newcommand{\ners}{\mbox{\scriptsize no erase}}

98: \newcommand{\ers}{\mbox{\scriptsize erase}}

99:

100: \title{On the Role of Estimate-and-Forward with Time-Sharing in Cooperative Communication

101: \thanks{The authors are with the School of Electrical and Computer

102: Engineering, Cornell University, Ithaca, NY. URL: {\tt

103: http://cn.ece.cornell.edu/}.

104: Work supported by the National Science Foundation, under awards

105: CCR-0238271 (CAREER), CCR-0330059, and ANR-0325556.}}

106: \author{Ron Dabora \hspace{2cm} Sergio D.\ Servetto}

107:

108: \begin{document}

109: \maketitle

110: \begin{picture}(0,0)

111: \put(0,70){\tt\small Submitted to the IEEE Transactions on

112: Information Theory, October 2006.}

113: \end{picture}

114: \begin{abstract}

115:     \it\noindent

116:     In this work we focus on the general relay channel.

117:     We investigate the application of estimate-and-forward (EAF) to different scenarios. Specifically,

118:     we consider assignments of the auxiliary random variables that always satisfy the feasibility constraints.

119:     We first consider the multiple relay channel and obtain an achievable rate without decoding at the relays. We demonstrate

120:     the benefits of this result via an explicit discrete memoryless multiple relay scenario

121:     where multi-relay EAF is superior to multi-relay decode-and-forward (DAF).

122:     We then consider the Gaussian relay channel with coded modulation, where we show that a three-level quantization outperforms the

123:     Gaussian quantization commonly used to evaluate the achievable rates in this scenario. Finally we consider

124:     the cooperative general broadcast scenario with a multi-step conference. We apply

125:     estimate-and-forward to obtain a general multi-step achievable rate region. We then give an

126:     explicit assignment of the auxiliary random variables, and use this result to

127:     obtain an explicit expression for the single common message broadcast scenario with a two-step conference.

128: \end{abstract}

129:

130:

131: \section{Introduction}

132: The relay channel was introduced by van der Meulen in 1971

133: \cite{Meulen:71}. In this setup, a single transmitter with channel input $X^n$ communicates with a single receiver with channel

134: output $Y^n$, where the superscript $n$ denotes the length of a vector. In addition, an external transceiver, called a relay,

135: listens to the channel and is able to output signals to the channel. We denote the relay output with $Y_1^n$ and its input with $X_1^n$.

136: This setup is depicted in figure \ref{fig:relay_setup}.

137: \begin{figure}[h]

138:     \centering

139:     \scalebox{0.6}{\includegraphics{Relay_Channel.eps}}

140:     \caption{The relay channel. The encoder sends a message $W$ to the decoder.}

141:     \label{fig:relay_setup}

142: \end{figure}

143:

144: \subsection{Relaying Strategies}

145: \label{sec:relay_strategies}

146: In \cite{CoverG:79} Cover \& El-Gamal introduced two relaying

147: strategies commonly referred to as decode-and-forward (DAF) and

148: estimate-and-forward (EAF). In DAF the relay decodes the message

149: sent from the transmitter and then, at the next time interval,

150: transmits a codeword based on the decoded message. The rate

151: achievable with DAF is given in \cite[theorem 1]{CoverG:79}:

152: \begin{theorem}

153:     \label{thm:CEG_DAF}

154:     \it (achievability of \cite[theorem 1]{CoverG:79}) For the general relay channel any rate $R$ satisfying

155:     \begin{equation}

156:     \label{eqn:CEG_DAF}

157:         R \le \min \left\{I(X,X_1;Y), I(X;Y_1|X_1)\right\}

158:     \end{equation}

159:     for some joint distribution $p(x,x_1,y,y_1) = p(x,x_1)p(y,y_1|x,x_1)$, is achievable.

160: \end{theorem}

161: We note that for DAF to be effective, the rate to the relay has to be greater than the point-to-point rate

162: i.e.

163: \begin{equation}

164:     \label{eqn:DAF_condition}

165:     I(X;Y_1|X_1) > I(X;Y|X_1),

166: \end{equation}

167: otherwise higher rates could be obtained without using the relay at all.

168: For relay channels where DAF is not useful or not optimal, \cite{CoverG:79} proposed the EAF strategy. In this strategy,

169: the relay sends an estimate of its channel input to the destination, without decoding the source message at all.

170: The achievable rate with EAF is given in

171: \cite[theorem~6]{CoverG:79}:

172: \begin{theorem}

173:     \label{thm:CEG_EAF}

174:     \it (\cite[theorem 6]{CoverG:79}) For the general relay channel any rate $R$ satisfying

175:     \begin{eqnarray}

176:     \label{eqn:EAF_rate}

177:         R  &\le & I(X;Y,\hY_1|X_1),\\

178:     \label{eqn:EAF_feasible}

179:      \mbox{subject to }   I(X_1;Y) & \ge & I(Y_1;\hY_1|X_1,Y),

180:     \end{eqnarray}

181:     for some joint distribution $p(x,x_1,y,y_1,\hy_1) = p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|y_1,x_1)$, where

182:     $||\mhY_1|| < \infty$, is achievable.

183: \end{theorem}

184:

185: Of course, one can combine the DAF and EAF schemes by performing partial decoding at the relay, thus obtaining

186: higher rates as in \cite[theorem 7]{CoverG:79}.

187:

188: \subsection{Related Work}

189: In recent years, the research in relaying has mainly focused on multiple-level

190: relaying and the MIMO relay channel. In the context of multiple-level relaying based on DAF, several DAF variations

191: were considered.

192:  In \cite{GuptaKumar:2003} Cover \& El-Gamal's block Markov encoding/succesive decoding DAF method was applied to the

193: multiple-relay case. Later work \cite{XieKumar:2004}, \cite{XieKumar:2005} and

194: \cite{Kramer:2003} applied the so-called regular encoding/sliding-window decoding and

195: the regular encoding/backward decoding techniques to the multiple-relay scenario.

196: In \cite{Madsen:2005} the DAF strategy was applied to the MIMO relay channel.

197: The EAF strategy was also applied to the multiple-relay scenario.

198: The work in \cite{Kramer:2005}, for example, considered the EAF strategy for multiple relay scenarios and the Gaussian relay

199: channel, in addition to considering the DAF strategy.

200: Also \cite{Gastpar:2002} considered the EAF strategy in the multiple-relay setup.

201: % and in \cite{SchienGallager:2000}

202: %communication over two parallel relay channels to a destination, without a direct link

203: %between the source and the destination, was considered .

204: Another approach applied recently to the relay channel is that

205: of iterative decoding. In \cite{ElGamalH:2006} the three-node network in the half-duplex regime was considered.

206: In the relay case, \cite{ElGamalH:2006} uses a feedback scheme where the receiver first uses EAF to

207: send information to the relay and then the relay decodes and uses DAF at the next time interval to help

208: the receiver decode its message. Combinations of EAF and DAF were also considered in \cite{Goldsmith:2006}, where

209: conferencing schemes over orthogonal relay-receiver channels were analyzed and compared.

210: Both \cite{ElGamalH:2006} and \cite{Goldsmith:2006} focus on the Gaussian case.

211: % In \cite{Mine:06} we applied simultaneous decoding to the EAF method which resulted in an

212: % increased feasible region for this strategy compared to \cite[theorem 6]{CoverG:79}.

213: % Another work that should be noted in that context is \cite{Motani:2005} where simultaneous decoding

214: % is used to improve upon Cover and El-Gamal's combined DAF/EAF result of \cite[theorem 7]{CoverG:79}.

215: % However, when specialized to the EAF setup, the result of \cite{Motani:2005} converges to

216: % \cite[theorem 6]{CoverG:79}.

217:

218: An extension of the relay scenario to a hybrid broadcast/relay system was

219: introduced in \cite{DraperFK:03}

220: in which the authors applied a combination of EAF and DAF strategies to the independent broadcast

221: channel with a single common message, and then extended this strategy to the multi-step conference.

222: In \cite{RonSer:2005} we used both a single-step and a two-step conference

223: with orthogonal conferencing channels in the discrete memoryless framework.

224: A thorough investigation of the broadcast-relay channel was done in \cite{LiagV:2005}, where the authors

225: applied the DAF strategy to the case where only one user is helping the other user, and also presented an upper bound for

226: this case. Then, the fully cooperative scenario was analyzed. The authors applied both the

227: DAF and the EAF methods to that case.

228:

229:

230: \subsection{The Gaussian Relay Channel with Coded Modulation}

231: One important instance of the relay channel we consider in this work is the Gaussian relay channel with

232: coded modulation. This scenario is important in evaluating the rates achievable with practical communication

233: systems, where components in the receive chain, such as equalization for example, require

234: a uniformly distributed finite constellation for optimal operation.

235: In Gaussian relay channel scenarios, most often three types for relaying techniques are encountered:

236: \begin{itemize}

237:     \item The first technique is decode-and-forward. This technique achieves capacity for the physically degraded

238:         Gaussian relay channel (see \cite[section IV]{CoverG:79}), and also for more general relay

239:         channels under certain conditions (see \cite{Goldsmith:2006}).

240:

241:     \item The second technique is estimate-and-forward, where the auxiliary variable $\hY_1$ is assigned a Gaussian

242:     distribution. For example, in \cite[section IV]{ElGamal:06} a Gaussian auxiliary random variable (RV) is used in conjunction with

243:     time-sharing at the transmitter, and in \cite{HostMadsen:05} the ergodic capacity for full duplex

244:     transmission with Gaussian EAF is obtained.

245:

246:     \item The third technique is linear relaying, where the relay transmits a weighted sum of

247:     all its previously received inputs \cite[section V]{ElGamal:06}. An important subclass of this

248:     family of relaying functions is when the relay transmits a scaled version of its input. This method is called

249:     amplify-and-forward \cite{Laneman:2000}, and was later combined with DAF to produce the

250:     decode-amplify-and-forward method of \cite{Bao:2005}.

251: \end{itemize}

252: % In this paper we also consider the relay channel with coded modulation.

253: % The coded modulation relay scenario is important when evaluating the rates that can be obtained by practical

254: %systems,

255: Several recent papers consider the Gaussian relay channel with coded modulation.

256: In \cite{Kramer:Asi05} the author considered variations of DAF for different practical systems. In

257: \cite{Laneman:2000} DAF and amplify-and-forward were considered for coherent orthogonal BPSK signalling, and in

258: \cite{Stankovic:05} a practical construction that implements a half-duplex EAF coding scheme was proposed.

259:

260: As indicated by several authors (see \cite{ElGamal:06}) it is not obvious if a Gaussian relay function is

261: indeed optimal. In this paper we show that for the case of coded modulation, there are scenarios where

262: non-Gaussian assignments of the auxiliary RV result in a higher rate than the commonly applied Gaussian assignment.

263:

264:

265: \subsection{Main Contributions}

266:

267: In the following we summarize the main contributions of this work:

268: \begin{itemize}

269:     \item We give an intuitive insight into the relay channel in terms of information flow on a graph,

270:     and show how to obtain \cite[theorem 6]{CoverG:79} from flow considerations. Using flow considerations

271:     we also obtain the rate of the EAF strategy when the receiver uses joint-decoding.

272:     A similar expression can be obtained by specializing the result of

273:     \cite{Motani:06} to the case where the relay does not perform partial decoding.

274:     We then show that joint-decoding does not increase the maximum rate of the EAF strategy, and

275:     find the time-sharing assignment that obtains the joint-decoding rate from the general EAF expression. We also

276:     present another time-sharing assignment that always exceeds the joint-decoding rate.

277:

278:     \item We introduce an  achievable rate expression for the multiple relay scenario based on EAF, that is also practically computabe.

279:     As discussed in section \ref{sec:relay_strategies}, in

280:     the ``noisy relay" case EAF outperforms DAF. However, for the multiple relay scenario there is no explicit, computationally practical  expression

281:     based on EAF that can be compared with the DAF-based result presented in \cite{XieKumar:2005}, so that the

282:     best strategy can be selected. As indicated in \cite[remark 22, remark 23]{Kramer:2005}, applying general EAF to

283:     a network with an arbitrary number of relays

284:     is computationally impractical due to the large number of constraints that characterize the feasible region.

285:     Therefore, it is interesting to explore a computationally simple assignment that allows to derive a

286:     result that extends to an arbitrary number of relays. We also provide an explicit numerical example

287:     to demonstrate that indeed there are cases where multi-relay EAF outperforms the multi-relay DAF.

288:

289:     \item We consider the optimization of the EAF auxiliary random variable for the

290:     Gaussian relay channel with an orthogonal relay. We consider the coded modulation scenario, and

291:     show that there are three regions: high SNR on the source-relay link, where DAF is the best strategy,

292:     low SNR on the source-relay link in which the common

293:     EAF with Gaussian assignment is best, and an intermediate region where EAF with hard-decision

294:     per symbol is optimal. For this intermediate SNR region we consider two kinds of hard-decisions: deterministic and

295:     probabilistic, and show that each one of them can be superior, depending on the channel conditions.

296:

297:     \item Lastly, we consider the cooperative broadcast scenario with a multi-step conference. We present a

298:     general rate region, extending the Marton rate region of \cite{Marton:79} to the case where the

299:     receivers hold a $K$-cycle conference prior to decoding the messages. We then specialize this result

300:     to the single common message case and obtain explicit expressions (without auxiliary RVs)

301:     for the two-step conference.

302:     %  that demonstrate that indeed it exceeds the two-step conference.

303:  %   Contrary to the two-step conference, the three-step scheme achieves

304:  %   the full cooperation bound when the conference capacities are less than those given by the Slepian-Wolf

305:  %   theorem \cite[theorem 14.4.1]{cover-thomas:it-book}.

306:

307: \end{itemize}

308:

309:

310: %In the third part of this paper we demonstrate our new strategy in the cooperative broadcast channel with a

311: %single common message scenario. For this setup

312: %we present an explicit three-step cooperation scheme that does not require

313: %auxiliary random variables. This new cooperation scheme yields a rate

314: %increase over the non-cooperative rate for any given cooperation capacity. In addition, this scheme achieves

315: %the full cooperation bound when the conference capacities are less than those given by the Slepian-Wolf

316: %theorem \cite[theorem 14.4.1]{cover-thomas:it-book}.

317:

318: The rest of this paper is organized as follows:

319: %in section \ref{sec:defs} we define the mathematical framework and also

320: %present an intuitive formulation of the relay channel using information flow on a graph.

321: in section \ref{sec:timeshare_single} we discuss the single relay case. We consider the EAF strategy with

322: time-sharing (TS) and relate it to the EAF rate expression for joint-decoding at the destination receiver.

323: In section \ref{sec:MultipleRelays} we present an achievable region for the multiple-relay channel, and

324: in section \ref{sec:Gauss_relay} we examine the Gaussian relay channel with coded modulation.

325: In section \ref{sec:application_multi_step} we investigate the general cooperative broadcast scenario, and

326: obtain an explicit rate expression by applying TS-EAF to the general multi-step conference.

327: Finally, section \ref{sec:conclu} presents concluding remarks.

328:

329:

330:

331:

332:

333: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

334: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

335: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

336: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

337: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

338: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

339:

340:

341:

342:

343:

344:

345: \section{Time-Sharing for the Single-Relay Case}

346: \label{sec:timeshare_single}

347:

348:

349: \subsection{Definitions}

350: \label{sec:defs}

351: First, a word about notation:

352: we denote discrete random variables with capital letters e.g. $X$, $Y$, and their realizations with lower case letters

353: $x$, $y$. A random variable $X$ takes values in a set $\mX$. We use $||\mX||$ to denote the cardinality

354: of a finite discrete set $\mX$, and $p_X(x)$ denotes the probability distribution function (p.d.f.) of $X$ on $\mX$. For brevity we may omit the subscript $X$ when it is obvious from

355: the context. We denote vectors with boldface letters, e.g. $\xvec$, $\yvec$; the $i$'th element of a vector $\xvec$ is

356: denoted by $x_i$ and we use $\xvec_i^j$ where $i<j$ to denote $(x_i, x_{i+1},...,x_{j-1},x_j)$.

357: We use $\styp(X)$ to denote the set of $\eps$-strongly typical sequences w.r.t. distribution

358: $p_X(x)$ on $\mX$, as defined in \cite[ch. 5.1]{YeungBook} and $\typ(X)$ to denote the $\eps$-weakly typical set

359: as defined in \cite[ch. 3]{cover-thomas:it-book}.

360:

361: We also have the following definitions:

362: \begin{definition}

363:     \label{def:relay_channel}

364:     The {\em discrete relay channel} is defined by two discrete input alphabets $\mX$ and $\mX_1$, two

365:     discrete output alphabets $\mY$ and $\mY_1$ and a probability density function $p(y,y_1|x,x_1)$ giving the

366:     probability distribution on $\mY \times \mY_1$ for each $(x,x_1) \in \mX \times \mX_1$.

367:     The relay channel is called {\em memoryless} if the probability of a block of $n$ transmissions is given by

368:     $p(\yvec,\yvec_1|\xvec,\xvec_1) = \prod_{i=1}^n p\left(y_i, y_{1,i}|x_i,x_{1,i}\right)$.

369: \end{definition}

370: In this paper we consider only the memoryless relay channel.

371: \begin{definition}

372:     \label{def:code}

373:     A {\em $(2^{nR},n)$ code} for the relay channel consists of a source message set

374:     $\mW = \left\{1,2,...,2^{nR}\right\}$, a mapping function $f$ at the encoder,

375:     \[

376:         f: \mW \mapsto \mX^n,

377:     \]

378:     a set of $n$ relay functions

379:     \[

380:         x_{1,i} = t_i\left(y_{1,1},y_{1,2},...,y_{1,i-1} \right),

381:     \]

382:     where the $i$'th relay function $t_i$ maps the first $i-1$ channel outputs at the relay into a transmitted

383:     relay symbol at time $i$. Lastly we have a decoder

384:     \[

385:         g: \mY^n \mapsto \mW.

386:     \]

387: \end{definition}

388: \begin{definition}

389:     \label{def:Perr}

390:     The {\em average probability of error} for a code of length $n$ for the relay channel is defined as

391:     \[

392:         \Pe = \Pr(g(Y^n) \ne W),

393:     \]

394:     where $W$ is selected uniformly over $\mW$.

395: \end{definition}

396: \begin{definition}

397:     A rate $R$ is called {\em achievable} if there exists a sequence of $(2^{nR},n)$ codes with

398:     $\Pe \rightarrow 0$ as $n \rightarrow \infty$.

399: \end{definition}

400:

401:

402:

403:

404:

405:

406:

407:

408:

409:

410:

411:

412:

413: \subsection{The Single Relay EAF with Time-Sharing}

414: \label{sec:ts-single-subsec}

415: Consider the following assignment of the auxiliary random variable

416: of theorem \ref{thm:CEG_EAF}:

417: \begin{equation}

418:     \label{eqn:time-sharing-mapping}

419:     p(\hy_1|y_1,x_1) = \left\{

420:             \begin{array}{cl}

421:                 q &, \hy_1 = y_1\\

422:                 1-q & ,\hy_1 = \Omega \notin \mY_1.

423:             \end{array}

424:         \right.

425: \end{equation}

426: Under this assignment, the feasibility condition of

427: \eqref{eqn:EAF_feasible} becomes

428: \begin{eqnarray*}

429:     I(X_1;Y) & \ge & I(Y_1;\hY_1|X_1,Y) \\

430:              & = &   H(Y_1|X_1,Y) - H(Y_1|X_1,Y,\hY_1) \\

431:              & = &   H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y) - q H(Y_1|X_1,Y,Y_1)\\

432:              & = &   q H(Y_1|X_1,Y),

433: \end{eqnarray*}

434: and the rate expression \eqref{eqn:EAF_rate} becomes

435: \begin{eqnarray*}

436:     R & \le & I(X;Y,\hY_1|X_1)\\

437:         & = & I(X;Y|X_1) + I(X; \hY_1|X_1,Y)\\

438:         & = & I(X;Y|X_1) + H(X| X_1,Y) - H(X|X_1,Y,\hY_1)\\

439:         & = & I(X;Y|X_1) + H(X| X_1,Y) - (1-q) H(X|X_1,Y) - q H(X|X_1,Y,Y_1)\\

440:         & = & I(X;Y|X_1) + q I(X;Y_1|X_1,Y).

441: \end{eqnarray*}

442: Clearly, maximizing the rate implies maximizing $q$ subject to the

443: constraint $q\in [0,1]$. This gives the following corollary to theorem \ref{thm:CEG_EAF}:

444: \begin{corollary}

445:     \label{corr:single_relay_TAF}

446:     \it For the general relay channel any rate $R$ satisfying

447:     \begin{equation}

448:     \label{eqn:main_corr}

449:         R \le I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{H(Y_1|X_1,Y)} \right]^* I(X;Y_1|X_1,Y),

450:     \end{equation}

451:     for the joint distribution $p(x,x_1,y,y_1) = p(x) p(x_1) p(y,y_1|x,x_1)$, with $[x]^* \triangleq \min(x,1)$,

452:     is achievable.

453: \end{corollary}

454:

455: Now, consider the following distribution chain:

456: \begin{equation}

457: \label{eqn:extended_prob_chain}

458:     p(x,x_1,y,y_1,\hy_1,\hhy_1) = p(x)p(x_1)p(y,y_1|x,x_1) p(\hy_1|x_1,y_1) p(\hhy_1 | \hy_1).

459: \end{equation}

460: We note that this extended chain can be put into the standard form by letting $p(\hhy_1|x_1,y_1) = \sum_{\mhY_1}p(\hy_1,\hhy_1|x_1,y_1) =

461: \sum_{\mhY_1}p(\hy_1|x_1,y_1)p(\hhy_1|\hy_1) $.

462: After compression of $Y_1$ into $\hY_1$, there is a second compression operation, compressing $\hY_1$ into $\hhY_1$. The output

463: of the second compression is used to facilitate cooperation between the relay and the destination. Therefore, the

464: receiver decodes the message based on $\hhyvec_1$ and $\yvec$, repeating exactly the same step as in the standard relay decoding, with

465: $\hhyvec$ replacing $\hyvec$. Then, the expressions of theorem \ref{thm:CEG_EAF} become

466: \begin{eqnarray}

467:     \label{eqn:EAF_rate_extended}

468:         R  &\le & I(X;Y,\hhY_1|X_1),\\

469:     \label{eqn:EAF_feasible_extended}

470:      \mbox{subject to }   I(X_1;Y) & \ge & I(Y_1;\hhY_1|X_1,Y).

471: \end{eqnarray}

472: Now, applying TS to $\hhY_1$ with

473: \begin{equation}

474:     \label{eqn:assignment_hhy}

475:         p(\hhy_1|\hy_1) = \left\{

476:             \begin{array}{cl}

477:                 q &,\hhy_1 = \hy_1\\

478:                 1-q & ,\hhy_1 = \Delta \notin \mhY_1

479:             \end{array}

480:         \right.,

481: \end{equation}

482: the expressions in \eqref{eqn:EAF_rate_extended} and \eqref{eqn:EAF_feasible_extended} become

483: \begin{eqnarray}

484:     R & \le & I(X;Y|X_1) + I(X;\hhY_1|X_1,Y)\nonumber\\

485:      & = & I(X;Y|X_1) + H(X|X_1,Y) - H(X|\hhY_1,X_1,Y)\nonumber\\

486:      & = & I(X;Y|X_1) + q(H(X|X_1,Y)  -  H(X|\hY_1,X_1,Y)) \nonumber\\

487:      \label{eqn:rate_CEG_extended_chain}

488:      & = & I(X;Y|X_1) + q I(X;\hY_1|X_1,Y), \\

489:     I(X_1;Y) & \ge & I(Y_1;\hhY_1|X_1,Y)\nonumber\\

490:         & = & H(Y_1|X_1,Y) - H(Y_1|\hhY_1,X_1,Y)\nonumber\\

491:         & = & H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y) - q H(Y_1|\hY_1,X_1,Y)\nonumber\\

492:     \label{eqn:feasibility_CEG_extended_chain}

493:         & = & q I(Y_1;\hY_1|X_1,Y).

494: \end{eqnarray}

495: Combining this with the constraint $q \in [0,1]$ we obtain the following corollary to theorem \ref{thm:CEG_EAF}:

496:     \begin{proposition}

497:     \label{prop:TAF}

498:     \it

499:         For the general relay channel, any rate $R$ satisfying

500:         \[

501:             R \le I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{I(Y_1;\hY_1|X_1,Y)}\right]^* I(X;\hY_1|X_1,Y),

502:         \]

503:         for some joint distribution $p(x,x_1,y,y_1,\hy_1) = p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$,

504:         is achievable.

505:     \end{proposition}

506: \smallskip

507: This proposition generalizes on corollary \ref{corr:single_relay_TAF} by performing a general Wyner-Ziv (WZ) compression combined with

508: TS (which is a specific type of WZ compression), intended to guarantee feasibility of the first compression step.

509: In section \ref{sec:Gauss_relay} we apply a similar idea to the EAF relaying in the Gaussian relay channel scenario with coded modulation.

510: Before we discuss the relationship between joint-decoding and time-sharing we present an intuitive way to view the EAF strategy.

511:

512:

513:

514:

515:

516:

517:

518: \subsection{An Intuitive View of Estimate-and-Forward}

519: \label{sec:intuitive_explanation}

520: Consider the rate bound and the feasible region of theorem \ref{thm:CEG_EAF}

521: given in equations \eqref{eqn:EAF_rate} and \eqref{eqn:EAF_feasible}.

522: We note that the following intuitive explanation does not constitute a proof but it does provide an insight into the

523: relay achievability results. We emphasize that the achievable rates stated in this section can also be proved rigorously.

524: In the following we provide an intuitive insight into these expressions in terms of a flow on a graph.

525:

526: In constructing the intuitive information flow representation for the relay channel, we first need to specify

527: the underlaying assumptions and the operations performed at the source, the relay and the destination receiver:

528: \begin{itemize}

529:     \item The source and the relay generate their codebooks independently.

530:

531:     \item The relay compresses its channel output $\yvec_1$ into $\hyvec_1$, which represents the information

532:     conveyed to the destination receiver to assist in decoding the source message.

533:

534:     \item Based on the above two restrictions we have the following Markov chain:

535:     $p(x)p(x_1)x(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$.

536:

537:     \item The relay input signal $\xvec_1$ is based only on the compressed $\hyvec_1$.

538:

539:     \item The destination uses $\xvec_1$, $\hyvec_1$ and $\yvec$ to decode the source message $\xvec$.

540: \end{itemize}

541: We also use the following representation for transmission, reception and compression:

542: \begin{itemize}

543:     \item  We represent an information source

544:         as a source whose output flow is equal to its information rate.

545:

546:     \item We represent the compression

547:         operation as a flow sink whose flow consumption is equal to the mutual information between the

548:         original and the compressed sequences.

549:

550:     \item The destination is represented as a flow sink.

551:

552:     \item  As in a standard flow on a graph, the flows are additive, following the

553:         chain rule of mutual information.

554: \end{itemize}

555:

556: Now consider the following flow diagram of figure \ref{fig:Relay_flow}.

557: \begin{figure}[ht]

558:     \centering

559:     \scalebox{0.6}{\includegraphics{Intuitive_Current_Flow.eps}}

560:     \caption{The information flow budget for the general relay channel with compression at the relay.}

561:     \label{fig:Relay_flow}

562: \end{figure}

563: As can be observed from the figure, the source has an output flow of

564: \[

565:     i_T = I(X;Y,\hY_1,X_1) = I(X;Y,\hY_1|X_1).

566: \]

567: This follows from the fact that the destination uses $\xvec_1,\hyvec_1$ and $\yvec$ to decode $\xvec$ and the fact that

568: $X$ and $X_1$ are independent. This total

569: flow reaches the receiver through two branches, the direct branch (D) which carries a flow of $i_D = I(X;Y|X_1)$ and

570: the relay branch (ABCE). Now, the quantities in the relay branch are calculated given $X_1$ and $Y$ to represent only the

571: rate increase over the direct path.

572: The relay branch has four parts: an edge

573: (A) which carries a flow of $I(X;\hY_1|X_1,Y)$, a sink (B) with consumption $I(Y_1;\hY_1|X_1,Y)$,

574: a relay source (C) with an output flow of $I(X_1;Y)$ and an edge (E) from the relay to the destination.

575: Here, the relay transmission to the destination (C) is done at a fixed rate $I(X_1;Y)$, independent of the type

576: of compression $p(\hy_1|x_1)$ used at the relay, since we always transmit from the relay to the destination

577: at the maximum possible rate in order to obtain the best performance.

578: The rate loss due to compression is represented by $I(\hY_1;Y_1|X_1,Y)$, since we consider only the excess

579: rates over the direct one.

580:

581: Now, from the laws of flow addition and conservation, the overall flow from the source to the destination through

582: the relay branch is $i_E = i_A + i_B + i_C$. To assist the direct link (D) we need

583: the flow on (ABCE) to be positive. In theorem \ref{thm:CEG_EAF} the scheme considers only the last two elements,

584: $i_B + i_C$, and verifies that their net flow is positive, namely

585: \begin{equation}

586: \label{eqn:intuive_CEG}

587:     -I(Y_1;\hY_1|X_1,Y) + I(X_1;Y) > 0.

588: \end{equation}

589: This condition guarantees a net positive flow on (ABCE) since always $i_A \ge 0$.

590: Now, the flow to the destination can be obtained as the minimum

591: \begin{equation}

592: \label{eqn:intuit_CEG_Rate}

593:     R \le \min \left\{ i_D + i_E, i_T\right\},

594: \end{equation}

595: where, the second term in the minimum is obtained from the transmitter, since

596: trivially the information rate at the receiver cannot exceed $i_T$. We note that because $i_B + i_C \ge 0$, the minimum in \eqref{eqn:intuit_CEG_Rate}

597: is $i_T$. Therefore, the resulting achievable rate is

598: \[

599:     R \le I(X;Y,\hY_1|X_1),

600: \]

601: which combined with \eqref{eqn:intuive_CEG} gives the result of \cite[theorem 6]{CoverG:79}.

602:

603:

604: However, the condition in \eqref{eqn:intuive_CEG} is not tight since

605: even when $i_B + i_C < 0$ the  flow on (ABCE) is still non-negative if the entire sum $i_A + i_B + i_C$ is

606: non-negative, i.e.

607: \begin{equation}

608: \label{eqn:tighter_condition}

609:     I(X;\hY_1|X_1,Y) - I(\hY_1;Y_1|X_1,Y) + I(X_1;Y) \ge 0.

610: \end{equation}

611: Then, the achievable rate to the destination is bounded by

612: \begin{equation}

613: \label{eqn:intuit_jt_rate}

614:     R \le i_D + i_E = I(X;Y|X_1) + I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y).

615: \end{equation}

616: Indeed, when the flow through the relay branch (ABCE) is zero we obtain the

617: non-cooperative rate $I(X;Y|X_1)$.

618:  Plugging the expression \eqref{eqn:intuit_jt_rate} into \eqref{eqn:intuit_CEG_Rate} yields the following achievable rate:

619: \begin{eqnarray*}

620:     R  & \le & \min\left\{i_D + i_E , i_T\right\}\\

621:        &  =  & \min\left\{ I(X;Y|X_1) + I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y), I(X;Y,\hY_1|X_1)\right\}\\

622:        &  =  & I(X;Y|X_1) + \min\left\{  I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y), I(X;\hY_1|X_1,Y)\right\}.

623: %       &  =  & \min\left\{ I(X,X_1;Y)  - I(\hY_1;Y_1|X,X_1,Y), I(X;Y,\hY_1|X_1)\right\}.

624: \end{eqnarray*}

625: Combining this with \eqref{eqn:tighter_condition}, (informally) proves the following proposition:

626: \begin{proposition}

627:     \label{prop:jt-rate}

628:     \it

629:     For the general relay channel, any rate $R$ satisfying

630:     \begin{eqnarray*}

631:         R  & \le & I(X;Y|X_1) + \min\left\{  I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y), I(X;\hY_1|X_1,Y)\right\},\\

632:         \mbox{subject to } I(X_1;Y) & \ge & I(\hY_1;Y_1|X,X_1,Y) =  I(\hY_1;Y_1|X_1,Y) - I(X;\hY_1|X_1,Y),

633:     \end{eqnarray*}

634:     for some joint distribution $p(x,x_1,y,y_1,\hy_1) = p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$, is achievable.

635: \end{proposition}

636: \bigskip

637: The proof of proposition \ref{prop:jt-rate} can be made formal using joint-decoding at the destination receiver,

638: but as in the next subsection we show that this expression is a special case of \cite[theorem 6]{CoverG:79} obtained by time-sharing, we omit the

639: details of the proof here.

640:

641:

642:

643:

644:

645:

646:

647:

648:

649:

650: \subsection{Joint-Decoding and Time-Sharing}

651: In the original work of \cite[theorem 6]{CoverG:79}, the decoding

652: procedure at the destination receiver for decoding the message

653: $w_{i-1}$ at time $i$ is composed of three steps (the notations

654: below are identical to \cite[theorem 6]{CoverG:79}. The reader is referred to the proof of \cite[theorem 6]{CoverG:79}

655: to recall the definitions of the sets and variables used in the following description):

656: \begin{enumerate}

657:     \item Decode the relay index $s_i$ using $\yvec(i)$, the received signal at time $i$.

658:     \item Decode the relay message $z_{i-1}$, using $s_i$, the received

659:         signal $\yvec(i-1)$ and the previously decoded $s_{i-1}$.

660:     \item Decode the source message $w_{i-1}$ using $\yvec(i-1)$,

661:     $z_{i-1}$ and $s_{i-1}$.

662: \end{enumerate}

663:

664: Evidently, when decoding the relay message $z_{i-1}$ at the second step, the receiver does not make use of

665: the statistical dependence

666: between $\hyvec_{1}(i-1)$, the relay sequence at time $i-1$, and $\xvec(w_{i-1})$, the transmitted source codeword at time $i-1$.

667: The way to use this dependence is to jointly decode $z_{i-1}$ and $w_{i-1}$ after decoding $s_i$ and $s_{i-1}$. The joint-decoding procedure

668: then has the following steps:

669:     \begin{enumerate}

670:         \item From $\yvec(i)$, the received signal at time $i$, the receiver decodes $s_i$ by looking for a unique

671:         $s \in \mS$, the set of indices used to select $\xvec_1$, such that $\big(\xvec_1(s), \yvec(i)\big) \in \styp$.

672:         As in \cite[theorem 6]{CoverG:79},

673:         the correct $s_i$ can be decoded with an arbitrarily small probability of error by taking $n$ large

674:         enough as long as

675:         \begin{equation}

676:         \label{eqn:R0_conds}

677:             R_0 \le I(X_1;Y),

678:         \end{equation}

679:         where $||\mS|| = 2^{nR_0}$.

680:

681:         \item The receiver now knows the set $S_{s_i}$ into which $z_{i-1}$ (the relay message at time $i-1$) belongs.

682:         Additionally, from decoding at time $i-1$

683:         the receiver knows $s_{i-1}$, used to generate $z_{i-1}$.

684:

685:         \item The receiver generates the set

686:         $\mL(i-1) = \left\{ w \in \mW: \big(\xvec(w), \yvec(i-1), \xvec_1(s_{i-1})\big) \in \styp\right\}$.

687:

688:         \item The receiver now looks for a unique $w \in \mL(i-1)$ such that

689:             $\big( \xvec(w), \yvec(i-1), \hyvec_1(z|s_{i-1}), \xvec_1(s_{i-1})\big) \in \styp$ for some

690:             $z \in S_{s_i}$.

691:             If such a unique $w$ exists then it is the decoded $\hw_{i-1}$,

692:             otherwise the receiver declares an error.

693:     \end{enumerate}

694: We do not give here a formal proof for the resulting rate expression, but as indicated in section

695: \ref{sec:intuitive_explanation}, the rate expression resulting from this decoding procedure is given by

696: proposition \ref{prop:jt-rate}.

697:

698: Let us now compare the the rates obtained with joint-decoding (proposition \ref{prop:jt-rate}) with the rates obtained with the sequential

699: decoding of \cite[thoerem 6]{CoverG:79}:

700: to that end we consider the joint-decoding result of proposition \ref{prop:jt-rate} with the extended probability chain of

701: \eqref{eqn:extended_prob_chain}:

702: \[

703: %    \label{eqn:extended_chain}

704:     p(x,x_1,y,y_1,\hy_1,\hhy_1) = p(x)p(x_1)p(y,y_1|x,x_1) p(\hy_1|x_1,y_1) p(\hhy_1 | \hy_1),

705: \]

706: where $\hhY_1$ represents the information relayed to the destination.

707: %Applying exactly the same steps as in the proof of proposition \ref{prop:jt-rate} we obtain the expression:

708: %\begin{eqnarray}

709: %    \label{eqn:rate_2_hats}

710: %        R & \le & I(X;Y|X_1)  + \min\left\{I(X_1;Y) - I(\hhY_1;Y_1|X,X_1,Y),  I(X;\hhY_1|X_1,Y)\right\}\\

711: %    \label{eqn:constr_2_hats}

712: %        \mbox{subject to }I(X_1;Y) & \ge & I(\hhY_1;Y_1|X,X_1,Y) = I(\hhY_1;Y_1|X_1,Y) - I(X;\hhY_1|X_1,Y).

713: %\end{eqnarray}

714: %Now consider the expressions in \eqref{eqn:rate_2_hats} and \eqref{eqn:constr_2_hats}. Setting $p(\hhy_1 | \hy_1)$ the same as in

715: %\eqref{eqn:assignment_hhy}

716: %subject to $q \in [0,1]$, we obtain that \eqref{eqn:rate_2_hats} and \eqref{eqn:constr_2_hats} become

717: Expanding the expressions of proposition \ref{prop:jt-rate} using the assignment \eqref{eqn:assignment_hhy}, similarly to proposition

718: \ref{prop:TAF}, we obtain the expressions:

719: \begin{eqnarray}

720:     \label{eqn:rate_2_hats_q}

721:         R & \le & I(X;Y|X_1)  + \min\left\{I(X_1;Y) - q I(\hY_1;Y_1|X,X_1,Y),  q I(X;\hY_1|X_1,Y)\right\}\\

722:     \label{eqn:constr_2_hats_q}

723:         \mbox{subject to }I(X_1;Y) & \ge & q I(\hY_1;Y_1|X,X_1,Y) = q\left(I(\hY_1;Y_1|X_1,Y) - I(X;\hY_1|X_1,Y)\right).

724: \end{eqnarray}

725:

726: We can now make the following observations:

727: \begin{enumerate}

728:     \item Setting $q = 1$ we obtain proposition \ref{prop:jt-rate}. Additionally, if

729:         $I(X_1;Y) > I(\hY_1;Y_1|X_1,Y)$ then both proposition \ref{prop:jt-rate} and \cite[theorem 6]{CoverG:79} give

730:         identical expressions.

731:     \item When $q=1$ and

732:         \begin{equation}

733:         \label{eqn:cond_joint}

734:             I(\hY_1;Y_1|X_1,Y) - I(X;\hY_1|X_1,Y) < I(X_1;Y) < I(\hY_1;Y_1|X_1,Y),

735:         \end{equation}

736:         then {\em for the same} mapping $p(\hy_1|x_1, y_1)$ we obtain that proposition \ref{prop:jt-rate} provides rate but

737:         \cite[theorem 6]{CoverG:79} does not. The rate expression under these conditions is

738:         \begin{equation}

739:         \label{eqn:rate_q_is_one}

740:             R  \le  I(X;Y|X_1)  + I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y).

741:         \end{equation}

742:

743:     \item

744:         Now, fix the probability chain $p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$ and  examine the expressions

745:         \eqref{eqn:rate_2_hats_q} and \eqref{eqn:constr_2_hats_q} when \eqref{eqn:cond_joint} holds:

746:         when $q < 1$, then \eqref{eqn:cond_joint}

747:         guarantees that condition \eqref{eqn:constr_2_hats_q} is still satisfied.

748:         If $q$ is close enough to $1$ such that we also have

749:         $I(X_1;Y) \le q I(\hY_1;Y_1|X_1,Y)$, the rate from \eqref{eqn:rate_2_hats_q}, i.e.,

750:         \[

751:             R \le  I(X;Y|X_1)  + I(X_1;Y) - qI(\hY_1;Y_1|X,X_1,Y),

752:         \]

753:         is now greater than \eqref{eqn:rate_q_is_one}. In this case can  keep decreasing $q$ until

754:         \begin{equation}

755:         \label{eqn:optim_q}

756:             I(X_1;Y) - qI(\hY_1;Y_1|X,X_1,Y) = qI(X;\hY_1|X_1,Y)

757:         \end{equation}

758:         at which point the rate becomes

759:         \begin{equation}

760:         \label{eqn:rate-jt-optim}

761:             R \le I(X;Y|X_1)  + q I(X;\hY_1|X_1,Y).

762:         \end{equation}

763:         This rate can be obtained from \cite[theorem 6]{CoverG:79} by applying the extended probability chain of \eqref{eqn:extended_prob_chain},

764:         as long as $I(X_1;Y) \ge q I(\hY_1,Y_1|X_1,Y)$.

765: \end{enumerate}

766: We therefore conclude that all the rates that joint decoding allows can also be obtained  or exceeded by the original EAF with an

767: appropriate time sharing\footnote{This argument is due to Shlomo Shamai and Gerhard Kramer.}.

768:

769:     Note that equality in \eqref{eqn:optim_q} implies

770:     \[

771:         q_{opt} = \min\left\{1,\frac{I(X_1;Y)}{I(\hY_1;Y_1|X,X_1,Y)+ I(X;\hY_1|X_1,Y)}\right\}

772:             =\min\left\{1, \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)}\right\}

773:             ,

774:     \]

775:     hence $q_{opt}$ is the maximum $q$ that makes the mapping $p(\hy_1|x_1,y_1)$ feasible for \cite[theorem 6]{CoverG:79}.

776:     Plugging $q_{opt}$ into \eqref{eqn:rate-jt-optim}, we obtain the rate expression of proposition \ref{prop:TAF}.

777:

778:

779:     Finally, consider again the region where joint decoding is useful \eqref{eqn:cond_joint}:

780:     \begin{eqnarray*}

781:         I(\hY_1;Y_1|X,X_1,Y) & \le I(X_1;Y) \le &   I(\hY_1;Y_1|X_1,Y)\\

782:     \Rightarrow   0 & \le I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y) \le &   I(\hY_1;Y_1|X_1,Y) - I(\hY_1;Y_1|X,X_1,Y)\\

783:    \Rightarrow   0 & \le I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y) \le &    I(X_1;\hY_1|X_1,Y)\\

784:         \Rightarrow   0 & \le \frac{I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y)}{I(X;\hY_1|X_1,Y)} \le &   1.

785:     \end{eqnarray*}

786:     If $I(X;\hY_1|X_1,Y) > 0$, then using time-sharing on $\hY_1$ with

787:     \begin{equation}

788:     \label{eqn:assign_q_joint}

789:         q = \frac{I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y)}{I(X;\hY_1|X_1,Y)}

790:     \end{equation}

791:     into equations \eqref{eqn:rate_CEG_extended_chain} and \eqref{eqn:feasibility_CEG_extended_chain} yields:

792:     \[

793:         I(X;Y|X_1) + q I(X;\hY_1|X_1,Y) = I(X;Y|X_1) + I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y),

794:     \]

795:     as long as $I(X_1;Y) \ge qI(\hY_1;Y_1|X_1,Y)$, or equivalently

796:     \begin{equation}

797:     \label{eqn:cond_TS_CEG6}

798:         q \le \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)}.

799:     \end{equation}

800:     Plugging assignment \eqref{eqn:assign_q_joint} into \eqref{eqn:cond_TS_CEG6} we obtain:

801:     \begin{eqnarray*}

802:         \frac{I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y)}{I(X;\hY_1|X_1,Y)} & \le & \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)}\\

803:         \Rightarrow \quad \left(I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y)\right)I(\hY_1;Y_1|X_1,Y)

804:             & \le & I(X_1;Y)I(X;\hY_1|X_1,Y)\\

805:         \Rightarrow \quad I(X_1;Y)I(\hY_1;Y_1|X_1,Y)- I(X_1;Y)I(X ;\hY_1|X_1,Y)

806:             & \le & I(\hY_1;Y_1|X ,X_1,Y)I(\hY_1;Y_1|X_1,Y)\\

807:         \Rightarrow \quad I(X_1;Y)I(\hY_1;Y_1|X ,X_1,Y) & \le & I(\hY_1;Y_1|X ,X_1,Y)I(\hY_1;Y_1|X_1,Y)\\

808:         \Rightarrow \quad I(X_1;Y) & \le & I(\hY_1;Y_1|X_1,Y),

809:     \end{eqnarray*}

810:     as long as $I(\hY_1;Y_1|X ,X_1,Y) > 0$,

811:     which is the region where joint-decoding is supposed to be useful.

812:     Hence the joint-decoding rate of proposition \ref{prop:jt-rate} can be obtained by time sharing

813:     on the \cite[theorem 6]{CoverG:79} expression. Therefore, joint-decoding does not improve on the

814:     rate of \cite[theorem 6]{CoverG:79}. In fact the rate of proposition \ref{prop:TAF} is always at least as large as

815:     that of proposition \ref{prop:jt-rate}.

816:

817:

818:

819:

820:

821:

822: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

823: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

824: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

825: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

826: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

827:

828:

829:

830:

831:

832:

833:

834:

835:

836:

837:

838:

839: \section{An Achievable Rate for the Relay Channel with Multiple Relays}

840: \label{sec:MultipleRelays}

841: When the source-relay

842: % The multiple-relay channel was studied by in \cite{GuptaKumar:2003}, \cite{XieKumar:2004}, \cite{XieKumar:2005}

843: % and \cite{Kramer:2003}. These results are based on decoding at the relays (according to an hierarchy)

844: % prior to generating the relay codeword. However, when the

845: channel is very noisy then,

846: as discussed in the introduction, it may be better not to use the relay at all than to

847: employ the decode-and-forward strategy. Alternatively, when decode-and-forward is not useful, one

848: could employ estimate-and-forward. One result for multiple relays based on EAF can be found in

849: \cite{Gastpar:2002} which considered the two-relay case. In \cite[theorem 3]{Kramer:2005} the EAF

850: strategy, with partial decoding was applied to the multiple-relay case, and in \cite[theorem 4]{Kramer:2005} a mixed

851: EAF and DAF strategy was applied.

852: However, as stated in \cite[remark 22, remark 23]{Kramer:2005} applying the general estimate-and-forward to

853: a network with an arbitrary number of relays

854: is computationally impractical due to the large number of constraints that

855: characterize the feasible region (for two relays

856: we need to satisfy $9$ constraints). Moreover, the rate computation is prohibitive since

857: it would imply solving a non-convex optimization problem. In conclusion, an alternative achievable rate

858: to that based on decode-and-forward, which can also be evaluated with a reasonable effort, has not been presented to date.

859: In this section we derive an explicit achievable rate based on estimate-and-forward.

860: The strategy we use is to pick the auxiliary random variable

861: such that the feasibility constraints are satisfied. This is not a trivial choice since setting the

862: auxiliary random variable in theorem \ref{thm:CEG_EAF} to be the relay channel output (i.e. $\hY_1 = Y_1$) does

863: not remove this constraint, and we therefore need to incorporate time-sharing as discussed in the following.

864:

865:

866:

867:

868:

869:

870:

871:

872: \subsection{A General Achievable Rate}

873: \label{sec:achieve_general}

874: %The rate derived in the previous section is based on separate decoding of the pair $(\xvec_i, \hyvec_i)$ for

875: %the relay $i$. However, we can improve on this rate if, when decoding the information for relay $i$, we

876: %use all the information obtained from decoding for previous relays. This results in the following theorem:

877: We extend the idea of section \ref{sec:ts-single-subsec} to the relay channel with $N$ relays. This channel consists of

878: a source with channel input $X$, $N$ relays where for relay $i$, $X_i$ denotes the channel input and $Y_i$ denotes the channel output,

879: and a destination with channel output $Y$. This channel is denoted by

880: $\left( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N), \mY \times_{i=1}^N \mY_i\right)$.

881: Let $\Xvec = \left(X_1,X_2,...,X_N\right)$ and $\Yvec = \left(Y_1,Y_2,...,Y_N\right)$. We now have the

882: following theorem:

883: \begin{theorem}

884:     \label{thm:achieve_N_result_2}

885:     \it

886:     For the general multiple-relay channel with $N$ relays,

887:         $\Big( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N),$ ${\mY \times_{i=1}^N \mY_i}\Big)$, any rate $R$ satisfying

888:         \[

889:             R \le I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y),

890:         \]

891:         where $\Bt$ is an $N$-element vector that contains $'1'$ in the locations where the $N$-bit binary representation

892:         of the integer $\theta$ contains $'1'$,

893:         $P(\Bt) = \prod_{i:\Bt_i = 0} (1-q_i) \prod_{i:\Bt_i = 1} q_i$, $\Bt_i$ is the $i$'th bit in the

894:         $N$-bit binary representation of $\theta$,

895:         $\Yvec_{\Bt} = \left(Y_{i_1}, Y_{i_2},...,Y_{i_M} \right)$, where $i_1$, $i_2$, ..., $i_M$ are

896:         the locations of the $'1'$ in $\Bt$, and

897:         \begin{equation}

898:             \label{eqn:q_i_assgn_full_thm}

899:   %     q_i = \left[\frac{\sum_{j = 1}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)) }

900:            q_i = \left[\frac{I(X_i;Y|\tZvec_i) }

901:                         {H(Y_i|\Xvec,Y) -\sum_{j = 1}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;\tYvec_{l',\Bj}(\tTvec_i)|\Xvec,Y)}

902:             \right]^*,

903:         \end{equation}

904:                 for the joint distribution

905:         $p(x,x_1,x_2,...,x_N,y,y_1,y_2,...,y_N)=p(x)p(x_1)...p(x_N)p(y,y_1,...,y_N|x,x_1,...,x_N)$ is achievable.

906:         In \eqref{eqn:q_i_assgn_full_thm} $\tZvec_i$ is the vector containing all the variables

907:         $X_j$ decoded prior to decoding $X_i$, $\tTvec_i$ is a vector that contains all the variables $\hY_p$ decoded

908:         prior to decoding $\hY_i$, and $\tYvec_{l',\Bj}(\tTvec_i)$ contains all the $Y_{l_r'}$, such that

909:          $\hY_{l_r'} \in \tTvec_i$, and $r$ is a location of  $\; '1'$ in the $L_i'$-bit binary representation of $j$.

910:          $L_i'$ if the number of elements in $\tTvec_i$. Note that if $\hY_p \in \tTvec_i$ then we must have

911:          $X_p \in \tZvec_i$.

912:

913: \end{theorem}

914:

915:         To facilitate the understanding of the expressions in theorem \ref{thm:achieve_N_result_2}, we first look at a simplified case

916:         where the destination decodes each relay message independently of the messages of the other relays.

917: %Therefore, each $s_{i,k}$ is decoded using only $\yvec(k)$

918: %and each $\mL_i(k-1)$ is generated based only on $\xvec_1(s_{1,k-1}),\xvec_2(s_{2,k-1}),...,\xvec_N(s_{N,k-1})$ and $\yvec(k-1)$.

919: %This is the simplest implementation of the multi-relay EAF strategy.

920: This can be obtained from theorem

921: \ref{thm:achieve_N_result_2} by setting $\tZvec_i = \varnothing$ and $\tTvec_i = \varnothing$, $i = 1,2,...,N$. The result is summarized in the

922: following corollary:

923: \begin{corollary}

924:     \label{corr:achieve_N_result_1}

925:     \it

926:     For the general multiple-relay channel

927:         $\left( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N), \mY \times_{i=1}^N \mY_i\right)$, any rate $R$ satisfying

928:         \begin{equation}

929:         \label{eqn:rate_expression_multi_relay}

930:             R \le I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y),

931:         \end{equation}

932:         is achievable,

933: %        where $\Bt$ is an $N$-element vector that contains $'1'$ in the location where the $N$-bit binary representation

934: %        of the integer $\theta$ contains $'1'$,

935: %        $P(\Bt) = \prod_{i:\Bt_i = 0} (1-q_i) \prod_{i:\Bt_i = 1} q_i$,

936: %        $\Yvec_{\Bt} = \left(Y_{i_1}, Y_{i_2},...,Y_{i_M} \right)$, where $i_1$, $i_2$, ..., $i_M$ are

937: %        the locations of the $'1'$ in $\Bt$, and

938:         where

939:         \begin{equation}

940:             \label{eqn:q_i_assgn_simple}

941:             q_i = \left[ \frac{I(X_i;Y)}{H(Y_i|\Xvec,Y)} \right]^*,

942:         \end{equation}

943:         for the joint distribution

944:         $p(x,x_1,x_2,...,x_N,y,y_1,y_2,...,y_N)=p(x)p(x_1)...p(x_N)p(y,y_1,...,y_N|x,x_1,...,x_N)$.

945: \end{corollary}

946: \bigskip

947:

948:     In the multi-relay strategy we employ in this section

949:     each relay transmits its channel output $Y_i$ with probability $q_i$, independent of the other relays.

950:     Therefore, when considering a group of $N$ relays,

951:     the probability that any subgroup of relays will transmit their channel outputs simultaneously is simply the product of all transmission

952:     probabilities $q_i$ at each relay in the group, multiplied by the product of erasure probabilities $(1-q_i)$ for each relay in the complement

953:     group. Now, considering the rate expression of \eqref{eqn:rate_expression_multi_relay} we observe

954:     that the rate is obtained by taking all possible groupings of relays. For each grouping the resulting rate is the

955:     rate obtained when using all the channel outputs of all the relays in that group to assist in decoding. This is indicated by the

956:     term $\Yvec_{\Bt}$. This rate has to be weighted by the probability of such an overlap occurring, which is given by $P(\Bt)$.

957:     We then sum over all such groupings to obtain the achievable rate. The parameter $q_i$ for each relay, which is determined by

958:     \eqref{eqn:q_i_assgn_simple}, can be interpreted by considering the terms in the denominator and numerator: the denominator

959:     $H(Y_i|\Xvec,Y)$ is the (exponent of the) size of uncertainty at the destination receiver about relay $i$'s output $Y_i$. The numerator is

960:     the (exponent of the) size of the information set that can be transmitted from relay $i$ to the destination receiver. Therefore, the fraction

961:     $\frac{I(X_i;Y)}{H(Y_i|\Xvec,Y)}$

962:      can be interpreted as the maximal fraction of the uncertainty at the destination about relay $i$'s channel output $Y_i$,

963:      that can be compensated by the relay transmission. Of course, this faction has to be upper bounded by one. In the more general setup

964:      of theorem \ref{thm:achieve_N_result_2}, the decoding of the relay information from relay $i$ is done by using the information

965:      from the relays which were decoded before relay $i$ to assist in decoding. This results in the conditioning at the numerator and

966:      the negative terms in the denominator, both contribute to increasing the value of $q_i$.

967:

968:

969: \subsection{Proof of Theorem \ref{thm:achieve_N_result_2}}

970:     \subsubsection{Overview of Coding Strategy}

971:     The transmitter generates its codebook independent of the relays. Next, each relay generates

972:     its own codebook independent of the other relays following the construction of \cite[theorem 6]{CoverG:79}, with the mapping

973:     $p(\hy_i|x_i,y_i)$ at each relay set to the time-sharing mapping of \eqref{eqn:time-sharing-mapping} with parameter

974:     $q_i$. The destination receiver first needs to decode all the relay codewords $\left\{X_i^n\right\}_{i=1}^N$ and use this information to decode the relay messages

975:     $\left\{\hY_i^n\right\}_{i=1}^N$. To this end, the relay decides on a decoding order for the $X_i^n$ sequences and

976:     a decoding order for the $\hY_i^n$ sequences. These decoding orders determine the maximum value of $q_i$ that can be selected for each relay,

977:     thereby allowing us to determine the auxiliary variables' mappings and obtain an explicit rate expression. Finally, the receiver uses all the

978:     decoded $\left\{X_i^n\right\}_{i=1}^N$ and $\left\{\hY_i^n\right\}_{i=1}^N$ sequences, together with its channel input to decode the

979:     source message.

980:

981:     \bigskip

982:     We now give the details of the construction:

983:     fix the distributions $p(x)$, $p(x_1)$, $p(x_2)$,...,$p(x_N)$, and

984:     \begin{equation}

985:     \label{eqn:aux_assign_simple}

986:         p(\hy_i|x_i,y_i) = \left\{

987:             \begin{array}{cl}

988:                 q_i &, \hy_i = y_i\\

989:                 1-q_i & ,\hy_i = \Omega \notin \mY_i

990:             \end{array}

991:         \right.,

992:     \end{equation}

993:     $i = 1,2,...,N$. Let $\mW = \left\{1,2,...,2^{nR}\right\}$ be the source message set.

994:     \subsubsection{Code Construction at the Transmitter and the Relays}

995:     \begin{itemize}

996:         \item  Code construction and transmission at the transmitter are the same as in \cite[theorem 6]{CoverG:79}.

997:

998:         \item Code construction at the relays is done by repeating the relay code construction

999:             of \cite[theorem 6]{CoverG:79}

1000:             for each relay, where relay $i$ uses the distributions $p(\hy_i|x_i,y_i)$ and

1001:             $p(x_i)$. We denote the relay message, the transmitted message and the partition

1002:             set at relay $i$ at time $k$ with $z_{i,k}$, $s_{i,k}$ and $S^{(i)}_{s_{i,k}}$ respectively. The message set

1003:             for $s_i$ is denoted $\mW_i$, where $||\mW_i|| = 2^{n R_i}$. The message set for $z_i$ is denoted

1004:             $\mW_i'$, $||\mW_i'|| = 2^{n R_i'}$. The relay codewords at relay $i$ are denoted $\hyvec_i(z_i|s_i)$, and

1005:             the transmitted codewords at relay $i$ are denoted $\xvec_i(s_i)$, $s_i \in \mW_i$, $z_i \in \mW_i'$.

1006:     \end{itemize}

1007:

1008:     \subsubsection{Decoding and Encoding at the Relays}$ $

1009:

1010:     Consider relay $i$ at time $k-1$:

1011:     \begin{itemize}

1012:         \item From the relay transmission at time $k-1$, the relay knows $s_{i,k-1}$. Now the relay looks for a message

1013:             $z_i \in \mW_i'$, such that

1014:             \[

1015:                 \big(\hyvec_i(z_i|s_{i,k-1}), \yvec_i(k-1), \xvec_i(s_{i,k-1}) \big) \in \styp(\hY_i, Y_i,X_i).

1016:             \]

1017:             Following the argument in \cite[theorem 6]{CoverG:79}, for $n$ large enough there is such a message $z_i$ with

1018:             a probability that is arbitrarily close to $1$, as long as

1019:             \begin{equation}

1020:             \label{eqn:relay_rate_constr_1}

1021:                 R_i' > I(\hY_i;Y_i|X_i) + \eps = q_iH(Y_i|X_i) + \eps.

1022:             \end{equation}

1023:         Denote this message with $z_{i,k-1}$.

1024:         \item Let $s_{i,k}$ be the index of the partition of $\mW_i'$ into which $z_{i,k-1}$ belongs, i.e.,

1025:             $z_{i,k-1} \in S^{(i)}_{s_{i,k}}$.

1026:

1027:         \item At time $k$ relay $i$ transmits $\xvec_i(s_{i,k})$.

1028:     \end{itemize}

1029:

1030:     \subsubsection{Decoding at the Destination}

1031: %    Therefore, at each relay we have the rate constraint \ref{eqn:relay_rate_constr_1}, i.e.

1032: %    \[

1033: %        R'_i > q_i H(Y_i|X_i) + \eps

1034: %    \]

1035:     \begin{itemize}

1036:     \item Consider the decoding of $w_{k-1}$ at time $k$, for a

1037:     fixed decoding order: let $\tZvec_i$ contain all the $X_j$'s whose $s_{j,k}$'s are decoded prior

1038:     to decoding $s_{i,k}$. Therefore, decoding $s_{i,k}$ is done by looking for a unique message $s_i \in \mW_i$

1039:     such that

1040:     \begin{eqnarray*}

1041:        &  &  \big(\xvec_i(s_{i}),\xvec_{m_1}(s_{m_1,k}), \xvec_{m_2}(s_{m_2,k}),...,\xvec_{m_{M_i}}(s_{m_{M_i},k}),\yvec(k)

1042:        \big) \in \styp(X_i,\tZvec_i,Y),

1043: %       &  &  \qquad\qquad \hyvec_{l_1}(z_{l_1,k-1}|s_{l_1,k-1}),\hyvec_{l_2}(z_{l_2,k-1}|s_{l_2,k-1}),...,

1044: %            \hyvec_{l_{L_i}}(z_{l_{L_i},k-1}|s_{l_{L_i},k-1})\big) \in \styp(X_i,Y,\tZvec_i),

1045:     \end{eqnarray*}

1046:     where $m_1$, $m_2$,...,$m_{M_i}$ enumerate all the $X_j$'s in

1047:     $\tZvec_i = \left(X_{m_1},X_{m_2},...X_{m_{M_i}} \right)$.

1048: %    and $l_1$, $l_2$,...,$l_{L_i}$ enumerate all the $\hY_l$'s in $\tZvec_i$. Of course, if $\hY_l$ is in

1049: %    $\tZvec_i$ then also $X_l$ must be there (i.e. we use only legal orderings).

1050:     Assuming correct decoding at the previous steps, then by the point-to-point channel achievability proof

1051:     we obtain that the probability of error for decoding

1052:     $s_{i,k}$ can be made arbitrarily small by taking $n$ large enough as long as

1053:     \begin{equation}

1054:         R_i < I(X_i;Y,\tZvec_i) - \eps = I(X_i;Y|\tZvec_i) - \eps.

1055:     \end{equation}

1056: \begin{comment}

1057:                 Evaluating $I(X_i;Y|\tZvec_i)$ we can write

1058:                 \begin{eqnarray*}

1059:                     I(X_i;Y|\tZvec_i) & = &  I(X_i;Y|\tYvec_i(\tZvec_i),\tXvec_i(\tZvec_i))\\

1060:                                       & = &  H(X_i|\tYvec_i(\tZvec_i),\tXvec_i(\tZvec_i)) - H(X_i|\tYvec_i(\tZvec_i),\tXvec_i(\tZvec_i),Y)\\

1061:                                       & = &  (1-q_{l_1})(H(X_i|\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i)) - H(X_i|\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i),Y)) +\\

1062:                     &  & \qquad\qquad             q_{l_1}(H(X_i|Y_1,\tYvec_{i,2}^{L_i},\tXvec_i(\tZvec_i)) - H(X_i|Y_1,\tYvec_{i,2}^{L_i},\tXvec_i(\tZvec_i),Y))\\

1063:                                       & = &  (1-q_{l_1}) I(X_i;Y|\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i))+

1064:                                             q_{l_1}I(X_i;Y|Y_1,\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i))\\

1065:                                       & ... & \\

1066:                                       & = & \sum_{j = 1}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)),

1067:                 \end{eqnarray*}

1068:                 \[

1069:                     P_l(\Bj) = \left\{ \begin{array}{cl}

1070:                                             q_{l_i}, & \Bj_i = 1\\

1071:                                         1 - q_{l_i}, & \Bj_i = 0

1072:                                         \end{array}

1073:                                         \right.,

1074:                 \]

1075:                 $\tYvec_{l,{\Bj}}(\tZvec_i) = (Y_{l_{m_1}}, Y_{l_{m_2}},...,Y_{l_{m_M}})$, $m_1, m_2,...,m_M$ are the locations

1076:                 of $'1'$ in the binary representation of $j$ and $l_i$'s are the indices of the elements of $\Yvec$ in $\tZvec_i$.

1077:                 For example, if $j = 3$ then $m_1 = m_3 = 1$ and $m_2 = 0$. Then,

1078:                 \begin{eqnarray*}

1079:                     P_l(\mbox{Bin}(3)) & = & q_{l_1}(1-q_{l_2})q_{l_3},\\

1080:                     \tYvec_{l,{\mbox{Bin}(3)}}(\tZvec_i)) & = & (Y_{l_1}, Y_{l_3}).

1081:                 \end{eqnarray*}

1082:                 Now, decoding $z_{i,k-1}$ is done only after decoding $s_{i,k}$ therefore the vector

1083:                 of priors should contain $X_i$. Also, if $\hY_j \in \tTvec_i$ then also $X_j \in \tTvec_i$. Now,

1084:                 the destination decodes $z_{i,k-1}$ in the following way:

1085:

1086: \end{comment}

1087:     Let $\tTvec_i$ contain all the $\hY_{l'}$'s whose $z_{l',k-1}$'s are decoded prior to decoding $z_{i,k-1}$. Note that all

1088:     the $\left\{s_{i,k-1}\right\}_{i=1}^N$ were already decoded at the previous time interval when $w_{k-2}$ was decoded.

1089:

1090:         \item The destination generates the set

1091:             \begin{eqnarray}

1092:             &  & \mL_i(k-1) = \bigg\{ z_i \in \mW'_i : \big(\yvec(k-1), \hyvec_i(z_i|s_{i,k-1}),

1093:                 \hyvec_{l'_1}(z_{l'_1,k-1}|s_{l'_1,k-1}), ...,\hyvec_{l'_{L'_i}}(z_{l'_{L'_i},k-1}|s_{l'_{L'_i},k-1}),\nonumber\\

1094:             \label{eqn:set_for_multi_relay_EAF}

1095:             &  & \qquad \qquad \qquad \qquad\qquad\qquad

1096: %                    \xvec_{m'_1}(s_{m'_1,k-1}),...,\xvec_{m'_{M'_i}}(s_{m'_{M'_i},k-1}) \big) \in \styp(\hY_i,Y,\tTvec_i)\bigg\},

1097:                     \xvec_{1}(s_{1,k-1}),\xvec_{2}(s_{2,k-1}),...,\xvec_{N}(s_{N,k-1}) \big) \in \styp(Y,\hY_i,\tTvec_i,\Xvec)\bigg\},

1098:             \end{eqnarray}

1099:             where

1100: %            $m'_1$, $m'_2$,...,$m'_{M'_i}$ enumerate all the $X_j$'s in $\tTvec_i$ and

1101:             $l'_1$, $l'_2$,...,$l'_{L_i}$ enumerate all the $\hY_{l'}$'s in $\tTvec_i$.

1102:             The average size of $\mL_i(k-1)$ can be bounded using the standard technique of

1103:             \cite[equation (36)]{CoverG:79} and the fact that when $z_i \ne z_{i,k-1}$, then the corresponding

1104:             $\hyvec_i(z_i|s_{i,k-1})$ is independent of all the variables in \eqref{eqn:set_for_multi_relay_EAF}

1105:             except $\xvec_i(s_{i,k-1})$. The resulting bound is

1106:             \[

1107:                  E\left\{||\mL_i(k-1)||\right\} \le 1 + 2^{n(R_i' - I(\hY_i;Y,\Xvec_{-i},\tTvec_i|X_i) + 3\eps)},

1108:             \]

1109:             where $\Xvec_{-i}$ is an $N-1$ element vector that contains all the elements of $\Xvec$ except $X_i$.

1110: %    \begin{figure}[ht]

1111: %         \epsfxsize=0.26\textwidth \leavevmode\centering\epsffile{Markov_chain.eps}

1112: %        \caption{The Markov relationship between the random variables used for decoding $z_i$, for the case

1113: %        of $L_i' = 2$. Edges in the figure represent Markov relationship.}

1114: %        \label{fig:Markov-relation}

1115: %    \end{figure}

1116:

1117:         \item Now, the destination looks for a unique $z_{i} \in \mL_i(k-1) \bigcap S^{(i)}_{s_{i,k}}$.

1118:             Therefore, making the probability of error arbitrarily small by taking $n$ large enough

1119:             can be done as long as

1120:             \begin{equation}

1121:             \label{eqn:Ri'_upper_bound}

1122:                 R'_i < I(\hY_i;Y,\Xvec_{-i},\tTvec_i|X_i) + I(X_i;Y|\tZvec_i) -  4\eps.

1123:             \end{equation}

1124:     \end{itemize}

1125:      We note that using the assignment \eqref{eqn:aux_assign_simple} we can write

1126:          \begin{eqnarray*}

1127:              I(\hY_i;Y,\Xvec_{-i},\tTvec_i|X_i) & =  & H(Y,\Xvec_{-i},\tTvec_i|X_i) - H(Y,\Xvec_{-i},\tTvec_i|X_i,\hY_i)\\

1128:                     & = & H(Y,\Xvec_{-i},\tTvec_i|X_i)  - (1-q_i)H(Y,\Xvec_{-i},\tTvec_i|X_i) - q_i H(Y,\Xvec_{-i},\tTvec_i|X_i,Y_i)\\

1129:                     & = & q_iH(Y,\Xvec_{-i},\tTvec_i|X_i)  - q_i H(Y,\Xvec_{-i},\tTvec_i|X_i,Y_i)\\

1130:                     & = & q_i I(Y_i;Y,\Xvec_{-i},\tTvec_i|X_i)\\

1131:                     & = & q_i\left(H(Y_i|X_i) - H(Y_i|Y,\Xvec_{-i},X_i, \hY_{l_1'},\tTvec_{i,2}^{L_i'})  \right)\\

1132:                     & = & q_i\Big(q_{l_1'}H(Y_i|X_i) + (1-q_{l_1'}) H(Y_i|X_i)\\

1133:                     &   & \qquad \qquad  - q_{l_1'}H(Y_i|Y,\Xvec_{-i},X_i, Y_{l_1'},\tTvec_{i,2}^{L_i'})   - (1-q_{l_1'})H(Y_i|Y,\Xvec_{-i},X_i,\tTvec_{i,2}^{L_i'})\Big)\\

1134:                     & = & q_i\Big(q_{l_1'}I(Y_i;Y,\Xvec_{-i}, Y_{l_1'},\tTvec_{i,2}^{L_i'}|X_i)

1135:                              + (1-q_{l_1'}) I(Y_i;Y,\Xvec_{-i}, \tTvec_{i,2}^{L_i'}|X_i)\Big)\\

1136:                     & ... &\\

1137:                     & = & q_i \sum_{j = 0}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;Y,\Xvec_{-i} ,\tYvec_{l',\Bj}(\tTvec_i)|X_i),

1138:          \end{eqnarray*}

1139:     where $P_{l'}(\Bj) = \prod_{r:\Bj_{r} = 1} q_{l'_r} \times \prod_{r:\Bj_{r} = 0}(1- q_{l'_r})$,

1140:     $\Bj_{r}$ is the $r$-th bit of the $L_i'$-bit binary representation of $j$, and

1141:     $\tYvec_{l',\Bj}(\tTvec_i) = \left(Y_{l_{n_1}'}, Y_{l_{n_2}'},...,Y_{l_{n_M}'}\right)$,

1142:     $n_1, n_2,...,n_M$ are the locations of '1' in the $L_i'$-bit binary representation of $j$, and

1143:     $l_{n_1}', l_{n_2}',...,l_{n_M}'$ are the indices of the $\hY_i$'s in locations $n_1, n_2,...,n_M$ in $\tTvec_i$.

1144:     For example, if $L_i' = 3$ and $j = 3$ then $\mbox{Bin}_3(3) = (1,0,1)$ and $M = 2$,

1145:     $n_1 = 1, n_2 = 3$. Letting $\tTvec_i = \left(\hY_3,\hY_1,\hY_2\right)$

1146:     then $l_1' = 3, l_2' = 1$ and $l_3' = 2$, and

1147:                 \begin{eqnarray*}

1148:                     P_{l'}(\mbox{Bin}_3(3)) & = & q_{l_1'}(1-q_{l_2'})q_{l_3'},\\

1149:                     \tYvec_{l',{\mbox{Bin}_3(3)}}(\tTvec_i)) & = & (Y_{l_1'}, Y_{l_3'}) = (Y_3,Y_2).

1150:                 \end{eqnarray*}

1151:

1152:     \subsubsection{Combining the Bounds on $R'_i$}

1153:     Applying the above scheme requires that $R'_i$ satisfies \eqref{eqn:relay_rate_constr_1} and

1154:     \eqref{eqn:Ri'_upper_bound}:

1155:     \begin{eqnarray*}

1156:         q_i H(Y_i|X_i) + \eps < R'_i & < & q_i \sum_{j = 0}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;Y,\Xvec_{-i} ,\tYvec_{l',\Bj}(\tTvec_i)|X_i)

1157:             + I(X_i;Y|\tZvec_i) - 4\eps,

1158: %            &  & \qquad + \sum_{j = 0}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)) - 4\eps

1159:     \end{eqnarray*}

1160:     which is satisfied if

1161:     \begin{eqnarray*}

1162: %        q_i \le \frac{\sum_{j = 1}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)) - 5\eps}

1163: %                        {H(Y_i|X_i) -\sum_{j = 1}^{L'_i} P_{l'}(\Bj)I(Y_i;Y|\tXvec_i(\tTvec_i),\tYvec_{l',\Bj}(\tTvec_i))}

1164:         q_i & < & \frac{I(X_i;Y|\tZvec_i) - 5\eps}

1165:                         {H(Y_i|X_i) -\sum_{j = 0}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;Y,\Xvec_{-i} ,\tYvec_{l',\Bj}(\tTvec_i)|X_i)}\\

1166:         & = & \frac{I(X_i;Y|\tZvec_i) - 5\eps}

1167:                         {H(Y_i|X_i) - I(Y_i;Y,\Xvec_{-i}|X_i) -\sum_{j = 1}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;\tYvec_{l',\Bj}(\tTvec_i)|\Xvec,Y)}\\

1168:         & = & \frac{I(X_i;Y|\tZvec_i) - 5\eps}

1169:                         {H(Y_i|\Xvec,Y) -\sum_{j = 1}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;\tYvec_{l',\Bj}(\tTvec_i)|\Xvec,Y)}.

1170:     \end{eqnarray*}

1171:     Combining with the constraint $0 \le q_i \le 1$ gives the condition in \eqref{eqn:q_i_assgn_full_thm}.

1172:

1173:     Finally, %having set all the $q_i$'s, $i=1,2,...,N$

1174:     the achievable rate is obtained as

1175:     follows: using the decoded $\left\{\hyvec_i(z_{i,k-1}|s_{i,k-1})\right\}_{i=1}^N$ (assuming

1176:     correct decoding of all $\left\{z_{i,k-1} \right\}_{i=1}^N$) the receiver decodes the source

1177:     message $w_{k-1}$ by looking for a message $w \in \mW$ such that

1178:     \begin{eqnarray*}

1179:        &  &\Big(\xvec(w), \hyvec_1(z_{1,k-1}|s_{1,k-1}), \hyvec_2(z_{2,k-1}|s_{2,k-1}),...,

1180:             , \hyvec_N(z_{N,k-1}|s_{N,k-1}),\\

1181:        &  &\qquad \qquad       \xvec_1(s_{1,k-1}\big),\xvec_2(s_{2,k-1}\big),...,\xvec_N(s_{N,k-1}),\yvec(k-1)\Big)

1182:         \in \styp(X,\hYvec,\Xvec,Y),

1183:     \end{eqnarray*}

1184:     where $\hYvec = \left(\hY_1, \hY_2,...,\hY_N\right)$.

1185:     This results in an achievable rate of

1186:     \[

1187:         R \le I(X;Y,\hYvec,\Xvec) = I(X;Y,\hYvec|\Xvec).

1188:     \]

1189:     Plugging in the assignments of all the $\hY_i$'s, we get the following explicit rate expression:

1190:     \begin{eqnarray*}

1191:             I(X;Y,\hYvec|\Xvec) & = & I(X;Y|\Xvec) + I(X;\hYvec|\Xvec,Y)\\

1192:             & = & I(X;Y|\Xvec) + H(X|\Xvec,Y)  -  H(X|\Xvec,Y,\hYvec)\\

1193:             & = & I(X;Y|\Xvec) + H(X|\Xvec,Y)  -  (1-q_1) H(X|\Xvec,Y,\hYvec_2^N) - q_1 H(X|\Xvec,Y,\hYvec_2^N,Y_1)\\

1194:             & = & I(X;Y|\Xvec) +  (1-q_1) I(X;\hYvec_2^N|\Xvec,Y) + q_1 I(X;\hYvec_2^N,Y_1|\Xvec,Y)\\

1195:             & ... &\\

1196:             & = & I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y).

1197:         \end{eqnarray*}

1198: \tend

1199:

1200:

1201:

1202:

1203:

1204:

1205: \begin{comment}

1206: \subsection{An Achievable Rate Using Separate Decoding of the Auxiliary Variables}

1207: \label{sec:achieve_with_individual_assignments}

1208: A simple rate expression is obtained when the receiver decodes each $\xvec_i$ sequence and each $\hyvec_i$ sequence

1209:  without using the previously decoded sequences to improve decoding. Therefore, each $s_{i,k}$ is decoded using only $\yvec(k)$

1210: and each $\mL_i(k-1)$ is generated based only on $\xvec_1(s_{1,k-1}),\xvec_2(s_{2,k-1}),...,\xvec_N(s_{N,k-1})$ and $\yvec(k-1)$.

1211: This is the simplest implementation of the multi-relay EAF strategy. This can be obtained from theorem

1212: \ref{thm:achieve_N_result_2} by setting $\tZvec_i = \varnothing$ and $\tTvec_i = \varnothing$, $i = 1,2,...,N$. The result is summarized in the

1213: following corollary:

1214: \begin{corollary}

1215:     \label{corr:achieve_N_result_1}

1216:     \it

1217:     For the general multiple-relay channel

1218:         $\left( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N), \mY \times_{i=1}^N \mY_i\right)$, any rate $R$ satisfying

1219:         \[

1220:             R \le I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y),

1221:         \]

1222:         is achievable,

1223: %        where $\Bt$ is an $N$-element vector that contains $'1'$ in the location where the $N$-bit binary representation

1224: %        of the integer $\theta$ contains $'1'$,

1225: %        $P(\Bt) = \prod_{i:\Bt_i = 0} (1-q_i) \prod_{i:\Bt_i = 1} q_i$,

1226: %        $\Yvec_{\Bt} = \left(Y_{i_1}, Y_{i_2},...,Y_{i_M} \right)$, where $i_1$, $i_2$, ..., $i_M$ are

1227: %        the locations of the $'1'$ in $\Bt$, and

1228:         where

1229:         \begin{equation}

1230:             \label{eqn:q_i_assgn_simple}

1231:             q_i = \left[ \frac{I(X_i;Y)}{H(Y_i|\Xvec,Y)} \right]^*,

1232:         \end{equation}

1233:         for the joint distribution

1234:         $p(x,x_1,x_2,...,x_N,y,y_1,y_2,...,y_N)=p(x)p(x_1)...p(x_N)p(y,y_1,...,y_N|x,x_1,...,x_N)$.

1235: \end{corollary}

1236:

1237:        \bigskip

1238:        \begin{proof}

1239:

1240:

1241:            \subsubsection{Decoding at the Destination}

1242:            We first decode the relay messages and then decode the source message.

1243:            We decode the relay messages independently: decoding first $s_{i,k}$ and then

1244:            $z_{i,k}$, using only $s_{i,k}$ and $s_{i,k-1}$. Therefore, for each relay we get a constraint

1245:            \begin{equation}

1246:                I(X_i;Y) \ge I(\hY_i;Y_i|X_i).

1247:            \end{equation}

1248:

1249:

1250:

1251:            \subsubsection{Assigning the Auxiliary Variables According to \eqref{eqn:aux_assign_simple}}

1252:                Using the assignment in \eqref{eqn:aux_assign_simple} for each auxiliary random

1253:                variable, the set of constraints becomes:

1254:                \begin{equation}

1255:                    \label{eqn:cond_q_i_simple}

1256:                    I(X_i;Y) \ge q_i H(Y_i|X_i,Y).

1257:                \end{equation}

1258:

1259:                Finally, we address the question of the assignment of $q_i$: as can be seen from the derivation

1260:                step above, at each substitution we would like to maximize the $q_i$. Therefore, maximizing

1261:                \eqref{eqn:cond_q_i_simple} while keeping $q_i \le 1$ yields \eqref{eqn:q_i_assgn_simple}.

1262:        \end{proof}

1263:

1264: \end{comment}

1265:

1266:

1267: \subsection{Discussion}

1268: To demonstrate the usefulness of the explicit EAF-based achievable rate of theorem \ref{thm:achieve_N_result_2} we

1269: compare it with the DAF-based method of

1270: \cite[theorem 3.1]{XieKumar:2005} for the two-relay case.

1271: For this scenario there are five possible DAF setups, and the maximum of the five resulting rates is taken as the

1272: DAF-based rate:

1273: \begin{eqnarray*}

1274:     R^{DAF} & = & \sup_{p(x,x_1,x_2)} \max \left\{R_1, R_2, R_{12}, R_{21}, R_G \right\}\\

1275:     R_1 & = & \max_{x_2\in \mX_2} \min\left\{I(X; Y_1|X_1,x_2), I(X; Y|X_1, x_2) + I(X_1; Y|x_2) \right\}\\

1276:     R_2 & = & \max_{x_1\in \mX_1} \min\left\{I(X; Y_2|X_2,x_1), I(X; Y|X_2, x_1) + I(X_2; Y|x_1) \right\}\\

1277:     R_{12} & = & \min\left\{I(X; Y_1|X_1, X_2), I(X; Y_2|X_1, X_2) + I(X_1; Y_2|X_2), I(X; Y|X_1, X_2)+ I(X_1; Y|X_2) + I(X_2; Y)  \right\}\\

1278:     R_{21} & = & \min\left\{I(X; Y_2|X_1, X_2), I(X; Y_1|X_1, X_2) + I(X_2; Y_1|X_1), I(X; Y|X_1, X_2)+ I(X_2; Y|X_1) + I(X_1; Y)  \right\}\\

1279:     R_G & = & \min \left\{I(X; Y_1|X_1, X_2), I(X; Y_2|X_1, X_2), I(X,X_1,X_2; Y) \right\},

1280: \end{eqnarray*}

1281: where $R_1$ is the rate obtained when only relay 1 is active, $R_2$ is the rate obtained when only relay 2 is active,

1282: $R_{12}$ is the rate obtained when relay 1 decodes first and relay 2 decodes second and $R_{21}$ is

1283: the rate obtained when this order is reversed.

1284:  $R_G$ is the rate obtained when both relays form one group\footnote{In fact, since we take the supremum over all p.d.f.'s

1285:  $p(x,x_1,x_2)$ we do not need to explicitly include $R_1$ and $R_2$ in the maximization, but

1286:  it is included here to provide a complete presentation.}.

1287: Now, as in the single-relay case, DAF is limited by the worst source-relay link. Therefore, if

1288: \begin{equation}

1289:     \label{eqn:DAF_inequality}

1290:      R^{PTP} >

1291:         \max_{p(x|x_1,x_2), (x_1,x_2) \in \mX_1 \times \mX_2} \big\{I(X;Y_1|x_1,x_2), I(X;Y_2|x_1,x_2)\big\},

1292: \end{equation}

1293: where $  R^{PTP} = \max_{p(x|x_1,x_2), (x_1,x_2) \in \mX_1 \times \mX_2} I(X; Y|x_1,x_2)$

1294: is the point-to-point rate,

1295: then it is better not to use \cite[theorem 3.1]{XieKumar:2005} at all, but rather set the relays to transmit

1296: the symbol pair $(x_1,x_2) \in \mX_1 \times \mX_2$ such that the point-to-point rate is maximized.

1297: However, the rate obtained using corollary \ref{corr:achieve_N_result_1} for the two-relay case is given by

1298: \begin{eqnarray*}

1299:     R^{TS-EAF} & \le &  \sup_{p(x)p(x_1)p(x_2)} I(X; Y|X_1,X_2) + q_1(1-q_2) I(X;Y_1|X_1,X_2,Y)  \\

1300:     &  & \phantom{xxxxxxxxxxxxxxx}  +(1-q_1)q_2I(X;Y_2|X_1,X_2,Y) + q_1q_2I(X;Y_1,Y_2|X_1,X_2,Y),

1301: \end{eqnarray*}

1302: where $q_1$ and $q_2$ are positive and determined according to \eqref{eqn:q_i_assgn_simple}.

1303: This expression can, in general be greater than

1304: %$\max_{p(x|x_1,x_2), (x_1,x_2) \in \mX_1 \times \mX_2} I(X; Y|x_1,x_2)$

1305: $R^{PTP}$

1306: even when

1307: \eqref{eqn:DAF_inequality} holds, for channels where the relay to destination links are very good.

1308:  Hence, this explicit achievable expression provides an easy way to improve upon the

1309: DAF-based achievable rates when the source-to-relay links are very noisy.

1310:

1311: To demonstrate this, consider the channel given in table \ref{table:channel_for_example} over binary RVs

1312: $X$, $X_1$, $X_2$, $Y$, $Y_1$ and $Y_2$. The channel

1313: \begin{table}

1314:        \caption{$p(y,y_1,y_2|x,x_1,x_2)$ for the EAF example.}

1315:         \label{table:channel_for_example}

1316:        \begin{tabular}[h!]{|c||c|c|c|c|c|c|c|c|}

1317:                \hline

1318:                $(x,x_1,x_2)$& \multicolumn{7}{c}{$p(y,y_1,y_2|x,x_1,x_2)$} &\\

1319:                \cline{2-9}

1320:                & 000 & 001 & 010 & 011 & 100  & 101 & 110 & 111\\

1321:               \hline \hline

1322:              000 &  8.047314e-2  &  1.948360e-1 &   2.041506e-1  &  4.523933e-2 &

1323:                  2.423322e-1  &  7.057734e-3 &   1.310053e-1  &  9.490483e-2\\

1324:

1325:              001 &  8.601616e-1  &  6.643713e-2  &  1.662897e-2  &  1.937227e-2 &

1326:                  1.859104e-2  &  1.741020e-2 &   8.833169e-4  &  5.154431e-4 \\

1327:

1328:              010 &  3.131504e-1  &  1.821840e-1  &  5.618147e-2  &  1.522841e-1 &

1329:                  5.290856e-2  &  1.555570e-1 &   3.214581e-2  &  5.558854e-2  \\

1330:

1331:              011 &  5.183921e-3  &  3.704625e-1  &  1.641795e-2  &  2.208356e-1 &

1332:                  1.660775e-3  &  2.355928e-1  &  9.590170e-4  &  1.488874e-1 \\

1333:

1334:              100 &  8.116746e-3  &  8.139504e-3  &  9.387860e-2  &  1.736515e-2 &

1335:                  1.039350e-1  &  7.308714e-3  &  7.612555e-1  &  7.612563e-7\\

1336:

1337:              101 &  4.824126e-2  &  1.196128e-1  &  1.705739e-1  &  7.127199e-2 &

1338:                  4.631349e-2  &  1.955324e-1  &  1.928693e-1  &  1.555848e-1\\

1339:

1340:              110 &  9.367321e-2  &  1.248830e-1  &  1.873302e-1  &  6.161358e-2 &

1341:                  5.827773e-2  &  1.906660e-1  &  1.589616e-1  &  1.245946e-1\\

1342:

1343:              111 &  9.141272e-7  &  9.141263e-1  &  7.618061e-3  &  3.435473e-2 &

1344:                  7.974830e-4  &  4.117531e-2  &  9.302643e-4  &  9.969457e-4\\

1345:            \hline

1346:        \end{tabular}

1347: \end{table}

1348: distribution was constructed under the independence constraint

1349: \[

1350:     p(y,y_1,y_2|x,x_1,x_2) = p(y_1|x,x_1,x_2) p(y_2|x,x_1,x_2) p(y|x,x_1,x_2,y_1,y_2),

1351: \]

1352: i.e. given the channel inputs, the two relay outputs are independent.

1353: This channel is characterized by noisy source-relay links, while

1354: the link from relay $1$ to the destination has low noise. Therefore, DAF is inferior to the point-to-point

1355: transmission but EAF is able to exceed this rate, by giving up a small amount of rate on the direct link (compared

1356: to the point-to-point rate) and gaining more rate through the relays. The numerical evaluation of the

1357: rates for this channel produces\footnote{The resulting rates were obtained by optimizing for the rates with

1358: random initial input distributions. The optimization was repeated $50$ times for each rate and the maximum resulting rate

1359: was recorded. The m-files used for this evaluation are available at {\tt http://cn.ece.cornell.edu}.}

1360: \begin{eqnarray*}

1361: R^{PTP}  & = & 0.2860323,\\

1362: R^{DAF} & = & 0.2408629,\\

1363: R^{TS-EAF} & = & 0.2924798,

1364: \end{eqnarray*}

1365: where the optimal distributions that achieve these rates are summarized in tables \ref{table:opt_DAF_dist} and

1366: \ref{table:opt_EAF_dist}.

1367: \begin{table}

1368: \centering

1369: \begin{minipage}{5cm}

1370: \centering

1371:     \caption{Optimal distribution for DAF}

1372:     \label{table:opt_DAF_dist}

1373:     \vspace{-0.2cm}

1374:        \begin{tabular}[!h]{|c||c|}

1375:                \hline

1376:                $(x,x_1,x_2)$ & $p(x,x_1,x_2)$ \\

1377:                 \hline \hline

1378:                 000 & 5.698189907239905e-009\\

1379:                 001 & 5.259061814752764e-017\\

1380:                 010 & 4.301809992760095e-009\\

1381:                 011 & 4.424193267301109e-001\\

1382:                 100 & 6.792096128437060e-009\\

1383:                 101 & 4.740938235494830e-017\\

1384:                 110 & 3.207903771562940e-009\\

1385:                 111 & 5.575806532698892e-001\\

1386:                \hline

1387:        \end{tabular}

1388: \end{minipage}

1389: \phantom{xxxxxxxxx}

1390: \begin{minipage}{5cm}

1391:     \centering

1392:     \caption{Optimal distribution for EAF}

1393:     \label{table:opt_EAF_dist}

1394:         \vspace{-0.2cm}

1395:        \begin{tabular}[!h]{|c|}

1396:                \hline

1397:             $\Pr(X = 0)  = 4.3752093552645e-001$\\

1398: %            \hline

1399:             $\Pr(X_1 = 0) =1.9388669163312e-001 $\\

1400: %            \hline

1401:             $\Pr(X_2 = 0) = 1.000000000000000e-009$\\

1402:             \hline

1403:        \end{tabular}

1404: \end{minipage}

1405:

1406: \end{table}

1407: The optimal DAF distribution fixes both $X_1$ and $X_2$ to $'1'$ and sets the probability of $X$ to be

1408: $\Pr(X = 1) = 0.442419$, as expected for the case where the relays limit the achievable rate. For the EAF, the

1409: useless relay $2$ is fixed to $0$, to facilitate transmission with the useful relay $1$. In accordance, we

1410: obtain time sharing proportions of $q_1 = 0.156947$ and $q_2 \approx 0$ for relay $1$ and relay $2$ respectively.

1411: We note that in this scenario, we actually have that even the single-relay TS-EAF outperforms the two-relay DAF.

1412:

1413:

1414:

1415: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1416: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1417: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1418: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1419: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1420: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1421: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

1422:

1423:

1424:

1425:

1426:

1427:

1428:

1429:

1430:

1431:

1432:

1433:

1434: \section{The Gaussian Relay Channel}

1435: \label{sec:Gauss_relay}

1436:

1437: In this section we investigate the application of estimate-and-forward with time-sharing

1438: to the Gaussian relay channel. For this channel, the common practice it to use Gaussian codebooks and

1439: Gaussian quantization at the relay. The rate in Gaussian scenarios where coded modulation is applied, is usually

1440: analyzed by applying DAF at the relay. In this section we show that when considering coded modulation, one should select the

1441: relay strategy according to the channel condition: Gaussian selection seems a good choice when the SNR at the relay

1442: is low and DAF appears to be superior when the relay enjoys high SNR conditions. However, for

1443: intermediate SNR there is much room

1444: for optimizing the estimation mapping at the relay.

1445:

1446:

1447: In the following we first recall the Gaussian relay channel with a Gaussian codebook, and

1448: then we consider the Gaussian relay channel under BPSK modulation constraint. Since we focus on the mapping

1449: at the relay we consider here the Gaussian relay channel with an orthogonal relay of finite

1450: capacity $C$, also considered in

1451: \cite{Goldsmith:2006}. This scenario is depicted in figure \ref{fig:Gauss_relay}.

1452:

1453: \begin{figure}[ht]

1454:      \epsfxsize=0.6\textwidth \leavevmode\centering\epsffile{Gaussian_Relay_Channel.eps}

1455:     \caption{The Gaussian relay channel with a finite capacity noiseless relay link between the relay and the

1456:     destination.}

1457:     \label{fig:Gauss_relay}

1458: \end{figure}

1459:

1460: Here $Y_1 = g \cdot X + N_1$ is the channel output at the relay, $Y = X + N$ is the channel output at the receiver, which decodes

1461: the message based on $(Y^n, \hY_1^n)$. Let $\mW = \left\{1,2,...,2^{nR}\right\}$ denote the source message set, and let

1462: the source have an average power constraint $P$:

1463: \[

1464:     \frac{1}{n}\sum_{i=1}^n x_i(w) \le P, \qquad \forall w \in \mW.

1465: \]

1466: The relay signal $\hY_1^n$ is transmitted to the destination through a finite-capacity noiseless link of

1467: capacity $C$. For this scenario the expressions of \cite[theorem 6]{CoverG:79} specialize to

1468: \begin{subequations}

1469: \begin{eqnarray}

1470:     \label{eqn:rate_Gauss}

1471:     R & \le & I(X;Y,\hY_1)\\

1472:     \label{eqn:constraint_Gauss}

1473:     \mbox{subject to } C & \ge & I(\hY_1;Y_1|Y),

1474: \end{eqnarray}

1475: \end{subequations}

1476: with the Markov chain $X,Y - Y_1 - \hY_1$.

1477:

1478: We also consider in this section the DAF method whose information rate is given by (see \cite[theorem 1]{CoverG:79})

1479: \[

1480:     R_{DAF} = \min \left\{I(X;Y_1), I(X;Y)+C\right\},

1481: \]

1482: and the upper bound of \cite[theorem 3]{CoverG:79}:

1483: \[

1484:     R_{upper} = \min\left\{I(X;Y)+C, I(X;Y,Y_1)\right\}.

1485: \]

1486: We note that although these expressions were derived for the finite, discrete alphabets case, following the argument

1487: in \cite[remark 30]{Kramer:2005}, they also hold for the Gaussian case.

1488:

1489: \subsection{The Gaussian Relay Channel with Gaussian Codebooks}

1490: When $X \sim \mN(0,P)$, i.i.d., then the channel outputs at the relay and the receiver are jointly Normal RVs:

1491: \[

1492:     \left(

1493:         \begin{array}{c}

1494:         y\\

1495:         y_1

1496:         \end{array} \right) \sim \mN\left( \left( \begin{array}{c}

1497:                                                         0\\ 0 \end{array}

1498:                                                         \right) , \left( \begin{array}{cc}

1499:                                                                             P + \sigD & gP \\

1500:                                                                                 gP    & g^2P + \sigR \end{array} \right) \right).

1501: \]

1502: The compression is achieved by adding to $Y_1$ a zero mean independent Gaussian RV, $N_Q$:

1503: \begin{equation}

1504:     \label{eqn:def_qaussian_quant}

1505:     \hY_1 = Y_1 + N_Q, \qquad N_Q \sim \mN(0, \sigQ).

1506: \end{equation}

1507: We refer to the assignment \eqref{eqn:def_qaussian_quant} as Gaussian-quantization estimate-and-forward (GQ-EAF).

1508: Evaluating the expressions \eqref{eqn:rate_Gauss} and \eqref{eqn:constraint_Gauss}

1509: with assignment \eqref{eqn:def_qaussian_quant} results in  (see also \cite{Goldsmith:2006}):

1510: %\begin{eqnarray*}

1511: %    I(X;Y)  & = & \log(1+P) +

1512: %    I(Y_1;\hY_1|Y)  & = & \log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}  \right)\\

1513: %    I(X;\hY_1|Y)    & = & \log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\\

1514: %\end{eqnarray*}

1515: \begin{subequations}

1516: \begin{eqnarray}

1517:     \label{eqn:rate_Gauss_evaluated}

1518:     I(X;Y, \hY_1)   & = & \frac{1}{2}\log_2\left(1+P + \frac{gP}{1 + \sigQ}  \right)\\

1519:         \label{eqn:constraint_Gauss_evaluated}

1520:     I(Y_1;\hY_1|Y)  & = & \frac{1}{2}\log_2\left(1  + \frac{1+P+gP}{\sigQ(P+1)}  \right).

1521: \end{eqnarray}

1522: \end{subequations}

1523: The feasibility condition \eqref{eqn:constraint_Gauss} yields

1524: \[

1525:     \sigQ\ge  \frac{1+P+gP}{(2^{2C}  - 1)(P+1)},

1526: \]

1527: and because maximizing the rate \eqref{eqn:rate_Gauss_evaluated} requires minimizing $\sigQ$, the resulting

1528: GQ-EAF rate expression is

1529: \[

1530:     R  \le \frac{1}{2} \log_2\left(1+P + \frac{gP}{1 + \frac{1+P+gP}{(2^{2C}  - 1)(P+1)}}  \right).

1531: \]

1532: Now, when using Gaussian quantization at the relay

1533: %(as it is the most efficient way to compress $Y_1$, the Gaussian channel output at the relay {\Huge reference} ),

1534: it is

1535: obvious that time sharing does not help: we need the minimum $\sigQ$ in order to maximize

1536: the rate. This minimum is

1537: obtained only when the entire capacity of the relay link is dedicated to the transmission of the (minimally)

1538: quantized $Y_1$.

1539: However, when we consider the Gaussian relay channel with coded modulation, the situation is quite different, as

1540: we show in the remaining of this section.

1541:

1542:

1543:

1544:

1545:

1546: \subsection{The Gaussian Relay Channel with Coded Modulation}

1547: Consider the Gaussian relay channel where $X$ is an equiprobable BPSK signal of amplitude $\sqrt{P}$:

1548: \begin{equation}

1549:     \label{eqn:def_PX}

1550:     \Pr(X = \sqrt{P}) = \Pr(X = -\sqrt{P}) = \frac{1}{2}.

1551: \end{equation}

1552: Under these conditions, the received symbols $(Y,Y_1)$ are no longer jointly Gaussian, but follow a Gaussian-mixture

1553: distribution:

1554: \begin{eqnarray*}

1555:     f(y,y_1) & = & \Pr(X = \sqrt{P})f(y,y_1|x = \sqrt{P}) + \Pr(X = -\sqrt{P})f(y,y_1|x = -\sqrt{P}) \\

1556:              & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)G_{y_1}(g\sqrt{P} , \sigR) + G_y(-\sqrt{P},\sigD)G_{y_1}(-g\sqrt{P} , \sigR)\right),

1557: \end{eqnarray*}

1558: where

1559: \begin{equation}

1560:     \label{eqn:def_G}

1561:     G_x(a,b) \triangleq \frac{1}{\sqrt{2 \pi b}} e ^{-\frac{(x-a)^2}{2 b} }.

1562: \end{equation}

1563: Contrary to the Gaussian codebook case, where it is hard to identify a mapping $p(\hy_1|y_1)$ that will be superior to

1564:  Gaussian quantization (if indeed such a mapping exists), in this case it is a natural question to compare the

1565: Gaussian mapping of \eqref{eqn:def_qaussian_quant}, which induces a Gaussian-mixture distribution for $\hY_1$

1566: with other possible mappings. In the case of binary inputs it is natural to consider binary mappings

1567: for $\hY_1$. We can predict that such mappings will do well at high SNR on the source-relay link,

1568: when the probability of error for symbol-by-symbol detection at the relay is small, with a much smaller

1569: complexity than Gaussian quantization. We start by considering

1570:  two types of  hard-decision (HD) mappings:

1571: \begin{enumerate}

1572:     \item The first mapping is HD-EAF: The relay first makes a hard decision about every received $Y_1$ symbol,

1573:     determining whether it

1574:     is positive or negative, and then randomly decides if it is going to transmit this decision or transmit

1575:     an erasure symbol $E$ instead. The probability of transmitting an erasure, $1 - P_{\ners}$, is used to adjust the conference

1576:     rate such that the feasibility constraint is satisfied. Therefore, the conditional distribution $p(\hY_1|Y_1)$ is

1577:     given by:

1578:     \begin{subequations}

1579:         \begin{eqnarray}

1580:             \label{eqn:def_p_hy1_given_y1_HD_eq1}

1581:             p(\hY_1|Y_1 > 0) & = & \left\{

1582:                         \begin{array}{cl}

1583:                             P_{\ners} &, 1\\

1584:                             1 - P_{\ners} &, E

1585:                         \end{array}

1586:                     \right.\\

1587:             \label{eqn:def_p_hy1_given_y1_HD_eq2}

1588:             p(\hY_1|Y_1 \le 0) & = & \left\{

1589:                         \begin{array}{cl}

1590:                             P_{\ners} &, -1\\

1591:                             1 - P_{\ners} &, E

1592:                         \end{array}

1593:                     \right..

1594:         \end{eqnarray}

1595:     \end{subequations}

1596:     This choice is motivated by the time-sharing method considered

1597:     in section \ref{sec:timeshare_single}: after making a hard decision on the received symbol's sign --- positive

1598:     or negative, the relay applies TS to that decision so that the rate required to transmit the resulting random variable

1599:     is less than $C$. This facilitates transmission to the destination through the conference link.

1600:     Since the entropy of the sign decision is $1$, then when $C \ge 1$ we can transmit the sign decisions directly without using

1601:     an erasure. Therefore,

1602:     we expect that for values of $C$ in the range $C > 1$, this mapping

1603:     will not exceed the rate obtained for $C=1$. The focus is, therefore, on values of $C$ that are less than $1$.

1604:     The expressions for this assignment are given in appendix \ref{append:Gauss-deriv-HD-EAF}.

1605:

1606:     \item The second method is deterministic hard-decision. In this approach, we select a threshold $T$ such that the

1607:     range of $Y_1$ is partitioned into three regions: $Y_1 < -T, -T \le Y_1 \le T, Y_1 > T$. Then, according to the

1608:     value of each received $Y_1$ symbol, the corresponding $\hY_1$ is deterministically determined:

1609:     \begin{eqnarray}

1610:         \hY_1 = \left\{

1611:             \begin{array}{cl}

1612:                 1, & Y_1 > T\\

1613:                 E, & -T \le Y_1 \le T\\

1614:                 -1, & Y_1 < -T

1615:             \end{array}

1616:         \right..

1617:     \end{eqnarray}

1618:     The threshold $T$ is selected such that the achievable rate is maximized subject to satisfying

1619:     the feasibility constraint. We refer to this method as deterministic HD (DHD). Therefore, this is

1620:     another type of TS in which  the erasure probability is determined by the fraction of the time

1621:     the relay input is between $-T$ to $T$.

1622:     This method should be better than HD-EAF at high relay SNR since for HD-EAF, erasure is selected without

1623:     any regard to the quality of the decision - both good sign decisions and bad sign decisions are

1624:     erased with the same probability. However in DHD, the erased area is the area where the decisions have

1625:     low quality in the first place and all high quality decisions are sent. However, at low relay SNR and

1626:     small capacity for the relay-destination link, HD-EAF may perform better than DHD since the

1627:     erased area  (i.e. the region between $-T$ to $+T$) for the DHD mapping has to be very large

1628:     to allow 'squeezing' the estimate through the relay link,

1629:     while HD-EAF may require less compression of the HD output.

1630:     The expressions for evaluating the rate of the DHD assignment are given in appendix \ref{sec:expressions_DHD}.

1631: \end{enumerate}

1632:

1633: We now examine the performance of each technique using numerical evaluation:

1634: first, we examine the achievable rates with HD-EAF. The expressions are evaluated for $\sigR = \sigD = 1$ and

1635: $P = 1$. For every pair of values $(g,C)$ considered, the maximum $P_{\ners}$ was selected. Figure \ref{fig:hard-decision-vs-g}

1636: depicts the achievable rate vs. $g$ for $ 0.4 \le C \le 2$, together with the upper bound and the decode-and-forward rate.

1637: \begin{figure}[h]

1638:     \centering

1639:     \scalebox{0.7}{\includegraphics{Hard_decision_vs_g.eps}}

1640:     \caption{Information rate with BPSK and hard decision EAF mapping at the relay vs. relay channel gain $g$,

1641:         for different values of $C$.}

1642:     \label{fig:hard-decision-vs-g}

1643:     \vspace{-0.2cm}

1644: \end{figure}

1645: As can be observed from figure \ref{fig:hard-decision-vs-g}, the information rate of HD-EAF increases with $C$

1646: until $C = 1$ and then remains constant.

1647: It is also seen that for small values of $g$, HD-EAF is better than DAF. This region of $g$  increases with $C$,

1648: and for $C \ge 1$ the crossover value of $g$

1649: is approximately $1.71$. However, even for $g = 2$, DAF is only $2.5\%$ better than HD-EAF.

1650:

1651: Next, examine DHD: as can be seen from figure \ref{fig:DHD-vs-g}, for small values of $C$, DAF exceeds

1652: the information rate of DHD for values of $g$ greater than $1$, but for $C \ge 0.8$, DHD is superior to

1653: DAF, and in fact DAF approaches DHD from below. Another phenomena obvious from the

1654: figure (esp. for $C = 0.8$), is the existence of a threshold: for low values of $C$ there is some $g$ at which the DHD rate

1655: exhibits a jump.

1656: \begin{figure}[!h]

1657:     \centering

1658:     \scalebox{0.69}{\includegraphics{DHD_vs_g.eps}}

1659:     \caption{Information rate with BPSK, for deterministic hard decision at the relay vs. relay channel gain $g$, for

1660:     different values of $C$.}

1661:     \label{fig:DHD-vs-g}

1662: \end{figure}

1663: \begin{figure}

1664:     \centering

1665:     \scalebox{0.69}{\includegraphics{DHD_Explanation.eps}}

1666:     \caption{$I(\hY_1;Y_1|Y)$ and $I(X;\hY_1,Y)$ vs. Threshold $T$ for $(g,C) = (0.4,0.8)$ (left) and

1667:         $(g,C) = (1.4,0.8)$ (right). The bold solid line represents $I(\hY_1,Y_1|Y)$, the bold dashed line represents $C = 0.8$,

1668:         $I(X;Y,\hY_1)$ is represented by the dash-dot line and the resulting information rate is depicted with the solid line. }

1669:     \label{fig:DHD-Explanation}

1670: \end{figure}

1671: This can be explained by looking at figure \ref{fig:DHD-Explanation}, which depicts

1672: the values of $I(X;\hY_1,Y)$ and $I(\hY_1;Y_1|Y)$ vs. the threshold $T$: the bold-solid graph of

1673: $I(\hY_1;Y_1|Y)$ can intersect the bold-dashed horizontal line representing $C$ at two values of $T$. We also note that

1674: for small $T$ the value of $I(X;\hY_1,Y)$ is generally greater than for large $T$. Now, the jump can be explained as follows: as

1675: shown in appendix \ref{sec:HDH-Explanation}, for small $T$ and $g$, $I(\hY_1;Y_1|Y)$ is bounded from below.

1676: Now, if this bound value is greater than $C$ then the intersection will occur only at a large value of $T$, hence

1677: the small rate. When $g$ increases, the value of $I(\hY_1;Y_1|Y)$ for small $T$ decreases accordingly, until

1678: at some $g$ it intersects $C$ for a small $T$ as well as for a large $T$, as indicated by the arrow in the

1679: right-hand part

1680: of figure \ref{fig:DHD-Explanation}. This allows us to obtain the

1681: rates in the region of small $T$ which are in general higher than the rates for large $T$ and this

1682: is the source of the jump in the achievable rate.

1683:

1684:

1685:

1686:

1687:

1688: \FloatBarrier

1689:

1690:

1691:

1692:

1693:

1694:

1695:

1696:

1697:

1698: \subsection{Time-Sharing Deterministic Hard-Decision (TS-DHD)}

1699: It is clearly evident from the above numerical evaluation that none of the two mappings, HD-EAF and DHD, is universally better than

1700: the other: when $g$ is small and $C$ is less than $1$, then HD-EAF performs better than DHD, since the erased region is too large,

1701: and when $g$ increases, DHD performs better than HD-EAF since it erases only the low quality information. It is therefore natural to consider

1702: a third mapping which combines both aspects of binary mapping at the relay, namely deterministically erasing low quality information and

1703: then randomly gating the resulting discrete variable in order to allow its transmission over the conference link.

1704: This hybrid mapping is given in the following equation:

1705:     \begin{subequations}

1706:     \label{eqn:def_TS-DHD}

1707:         \begin{eqnarray}

1708:             \label{eqn:def_TS-DHD_eq1}

1709:             p(\hY_1|Y_1 > T) & = & \left\{

1710:                         \begin{array}{cl}

1711:                             P_{\ners} &, 1\\

1712:                             1 - P_{\ners} &, E

1713:                         \end{array}

1714:                     \right.\\

1715:             \label{eqn:def_TS-DHD_eq2}

1716:             p(\hY_1 = E \;|\;|Y_1| \le T) & = & 1\\

1717:             \label{eqn:def_TS-DHD_eq3}

1718:             p(\hY_1|Y_1 < -T) & = & \left\{

1719:                         \begin{array}{cl}

1720:                             P_{\ners} &, -1\\

1721:                             1 - P_{\ners} &, E

1722:                         \end{array}

1723:                     \right..

1724:         \end{eqnarray}

1725:     \end{subequations}

1726: In this mapping, the region $|Y_1| \le T$ is always erased, and the complement region is erased with probability $P_{\ers} = 1- P_{\ners}$.

1727: Of course, now both $T$ and $P_{\ers}$ have to be optimized. The expressions for TS-DHD can be found in appendix \ref{appndx:expressions_TS_DHD}.

1728: Figure \ref{fig:compare_HD-EAF_DHD_TS-DHD} compares the performance of

1729: DHD, HD-EAF and TS-DHD. As can be seen, the hybrid method enjoys the benefits of both types of mappings and is the superior method.

1730: \begin{figure}[!h]

1731:     \centering

1732:     \scalebox{0.69}{\includegraphics{Compare_DHD_HD-EAF_TS-DHD.eps}}

1733:     \caption{Information rate with BPSK, for  HD-EAF, DHD and TS-DHD at the relay vs. relay channel gain $g$, for

1734:     different values of $C$.}

1735:     \label{fig:compare_HD-EAF_DHD_TS-DHD}

1736: \end{figure}

1737:

1738:

1739: Next, figure \ref{fig:compare-HD-EAF-GQ-EAF} compares the performance of TS-DHD, GQ-EAF, and DAF.

1740: \begin{figure}[!h]

1741:     \centering

1742:     \scalebox{0.67}{\includegraphics{compare_GQ_and_TS-DHD.eps}}

1743:     \caption{Information rate with BPSK, for DAF,  TS-DHD and GQ-EAF at the relay vs. relay channel gain $g$, for

1744:     different values of $C$.}

1745:     \label{fig:compare-HD-EAF-GQ-EAF}

1746: \end{figure}

1747: As can be seen from the figure, Gaussian quantization is not always the optimal choice: for $C = 0.6$ (the lines with

1748: diamond-shaped markers) we have that

1749: GQ-EAF is the best method for $g < 1.05$, for $1.05 < g < 1.55$ TS-DHD is the best method and for $g>1.55$

1750: DAF achieves the highest rate.

1751: For $C = 1$ (x-shaped markers) TS-DHD is superior to both GQ-EAF and DAF for $g > 0.9$ and  for $C = 2$, GQ-EAF is the superior method for all $g \le 2$.

1752: This suggests that for the practical Gaussian relay scenario, where the modulation constraint is taken into account, there is

1753: room to optimize the mapping at the relay since the choice of Gaussian quantization is not always optimal.

1754:

1755: Lastly, figure \ref{fig:DAF-EAF-Regions} depicts the regions in the g-C plane in which each of the methods considered here is superior,

1756: in a similar manner to  \cite[figure 2]{Goldsmith:2006}\footnote{The block shapes are due to the step-size of $0.2$ in the values of $g$ and $C$ used

1757: for evaluating the rates. In the final version we will present an evaluation over a finer grid (such an evaluation

1758: requires several weeks to complete).}.

1759: \begin{figure}[!h]

1760:     \centering

1761:     \scalebox{0.67}{\includegraphics{Regions_figure_GQ_and_DHD.eps}}

1762:     \caption{The best cooperation strategy (out of DAF, TS-DHD and GQ-EAF)

1763:         for the Gaussian relay channel with BPSK transmission.}

1764:     \label{fig:DAF-EAF-Regions}

1765: \end{figure}

1766: As can be observed from the figure, in the noisy region of small $g$ and also in the region of very large $C$,

1767: GQ-EAF is superior, and in the

1768: strong relay region of medium-to-high $g$ and medium-to-high $C$, TS-DHD is the superior method.

1769:  DAF is superior small $C$ and high $g$.

1770: %In the transition region where $C \in [0.6,1.8]$ and $g \in [1,2]$, the two hard decision methods, HD-EAF and DHD are

1771: %superior.

1772: In a sense, the TS-DHD method is a hybrid method between the DAF which makes a hard-decision on the

1773: entire block and GQ-EAF which makes a soft decision every symbol, therefore it is superior in the transition region

1774: between the region where DAF is distinctly better, and the region where GQ-EAF is distinctly superior.

1775:

1776: \FloatBarrier

1777:

1778:

1779: \subsection{When the SNR on the Direct Link Approaches $0$ ($\sigD \rightarrow \infty$)}

1780: In this subsection we analyze the relaying strategies discussed in this section as the SNR on the direct link $ X - Y $

1781: approaches zero. Because TS-DHD is a hybrid method combining

1782: both DHD and HD-EAF, we analyze the behavior of the components rather than the hybrid, to gain more insight.

1783: This analysis is particularly useful when trying to numerically evaluate the rates, since as the direct-link SNR goes to zero,

1784:    the computer's numerical accuracy does not allow to numerically obtain the rates using the general expressions.

1785: %  Therefore, in order to examine the behavior

1786: %of the three EAF relay mappings considered in this section at low SNR on the direct link, it is required to derive analytical approximations

1787: %to the rate expressions.

1788:

1789: First we note that

1790: when the SNR of the direct link $ X - Y $ approaches $0$ we have that $I(X;Y) \rightarrow 0$ as well.

1791: To see this we write

1792: \begin{eqnarray*}

1793:     I(X;Y)  & = & h(Y) - h(Y|X)\\

1794:             & = & h(Y) - h(X + N|X)\\

1795:             & = & h(Y) - h(N),

1796: \end{eqnarray*}

1797: with $h(Y) = -\int_{-\infty}^{\infty}f(y) \log_2(f(y)) dy$, and from \eqref{eqn:f_Y_HC}

1798: \begin{eqnarray*}

1799:     f(Y)& = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD) + G_y(-\sqrt{P},\sigD)\right)\\

1800:         & = & \frac{1}{2}\left( \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{(y-\sqrt{P})^2}{2\sigD}}

1801:             +\frac{1}{\sqrt{2 \pi \sigD}} e^{-\frac{(y+\sqrt{P})^2}{2\sigD}}\right)\\

1802:         & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}\left(\frac{1}{2} e^{\frac{y\sqrt{P}}{\sigD}}

1803:             +\frac{1}{2} e^{-\frac{y\sqrt{P}}{\sigD}}\right)e^{-\frac{P}{2\sigD}}\\

1804:         & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}

1805:              \cosh\left(\frac{y\sqrt{P}}{\sigD}\right)e^{-\frac{P}{2\sigD}}\\

1806:         &\stackrel{\sigD \rightarrow \infty}{\approx} & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}\\

1807:         & \triangleq & G_y(0,\sigD),

1808: \end{eqnarray*}

1809: where the approximation is in the sense that for small $|y|$ we have $\cosh(|y|) \approx 1$ and for large $|y|$, $e^{-\frac{y^2}{2\sigD}}$

1810: drives the entire expression to zero as $e^{-\frac{y^2}{2\sigD}}$,

1811: for $\sigD \rightarrow \infty$.

1812: This approximation reflects the intuitive notion that as the variance increases to infinity, the two-component, symmetric Gaussian

1813: mixture resembles more and more a zero-mean Gaussian RV with the same variance.

1814: Therefore, for low SNR, the output is very close to a zero-mean Normal

1815: RV with variance $\sigD$, and $h(Y) \approx h(N)$,\footnote{For $\sigma = 20$ we have that $\int_{-\infty}^{\infty} |f_Y(y) - G_y(0,\sigD)|dy < 0.001$,

1816: for $\sigma = 55$, $h(Y) - h(N) \approx 0.001$ and for $\sigma = 200$, $h(Y) - h(N) < 0.0001$.} hence

1817: \[

1818:     I(X;Y) \stackrel{\sigD \rightarrow \infty}{\longrightarrow} 0.

1819: \]

1820: Note that the upper bound and the decode-and-forward rate in this case are both equal to

1821: \[

1822:     R_{DAF} = R_{upper} = \min\left\{C,I(X;Y_1)\right\}.

1823: \]

1824:

1825: Now, let us evaluate the rate for HD-EAF as the SNR goes to zero. From \eqref{eqn:rate_Gauss}:

1826: \[

1827:     R \le I(X;Y,\hY_1) = I(X;\hY_1) + I(X;Y|\hY_1),

1828: \]

1829: and

1830: \begin{eqnarray*}

1831:     I(X;Y | \hY_1) & = & h(Y|\hY_1) - h(Y | X, \hY_1)\\

1832:                    & = & \Pr(\hY_1 = 1) h(Y|\hY_1 = 1) + \Pr(\hY_1 = E) h(Y| \hY_1 = E) +

1833:                         \Pr(\hY_1 = -1) h(Y|\hY_1 = -1) - h(N).

1834: \end{eqnarray*}

1835: Using  appendix \ref{append:Gauss-deriv}, equations \eqref{eqn:cond_entropy_hy1_is_1} -- \eqref{eqn:cond_f_y1_pos},

1836: we have

1837: \begin{eqnarray*}

1838:     h(Y|\hY_1 = 1)  & = & -\int_{y = -\infty}^{\infty} f_{Y|\hY_1}(y|\hy_1 = 1) \log_2 \left(f_{Y|\hY_1}(y|\hy_1 = 1)\right) dy,\\

1839:     f_{Y|\hY_1}(y|\hy_1 = 1)  & = & \frac{f_{Y,Y_1}(y,y_1>0)P_{\ners}}{\Pr(Y_1>0)P_{\ners}} = \frac{f_{Y,Y_1}(y,y_1>0)}{\Pr(Y_1>0)},\\

1840:     f_{Y,Y_1}(y,y_1>0)      & = & \frac{1}{2}\left( f_{Y,Y_1|X}(y,y_1>0|x = \sqrt{P}) + f_{Y,Y_1|X}(y,y_1>0|x = -\sqrt{P}) \right)\\

1841:                     & = & \frac{1}{2}\left( G_y(\sqrt{P},\sigD) \Pr(Y_1>0|X = \sqrt{P}) + G_y(-\sqrt{P},\sigD) \big(1 - \Pr(Y_1>0|X = \sqrt{P})\big)\right)\\

1842:                     & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}\left(\frac{1}{2} e^{\frac{y\sqrt{P}}{\sigD}}\Pr(Y_1>0|X = \sqrt{P})

1843:             +\frac{1}{2} e^{-\frac{y\sqrt{P}}{\sigD}}\big(1-\Pr(Y_1>0|X = \sqrt{P})\big)\right)e^{-\frac{P}{2\sigD}}\\

1844:                     & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}

1845:                     \left(\frac{\left(\frac{1}{2}-\delta\right) e^{\frac{y\sqrt{P}}{\sigD}}

1846:             +\left(\frac{1}{2}+\delta\right) e^{-\frac{y\sqrt{P}}{\sigD}}}{2}\right)e^{-\frac{P}{2\sigD}}\\

1847:                     & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}

1848:                     \left(\frac{1}{2}\cosh\left(\frac{y\sqrt{P}}{\sigD}\right)

1849:             -\delta \sinh\left(\frac{y\sqrt{P}}{\sigD}\right)\right)e^{-\frac{P}{2\sigD}}\\

1850:                     & \stackrel{(a)}{\approx} & \frac{1}{2} G_y(0,\sigD),

1851: \end{eqnarray*}

1852: when $\sigD \rightarrow \infty$ and $\delta \in \left[-\frac{1}{2},\frac{1}{2}\right]$ is selected such that

1853: $\Pr(Y_1>0|X = \sqrt{P}) = \frac{1}{2} - \delta$.

1854: The approximation in (a) is because for small $|y|$, $\sinh\left(\frac{y\sqrt{P}}{\sigD}\right) \approx 0$ and

1855: $\cosh\left(\frac{y\sqrt{P}}{\sigD}\right) \approx 1$, and for large $|y|$, both

1856: $ e^{-\frac{y^2}{2\sigD}}\sinh\left(\frac{y\sqrt{P}}{\sigD}\right) \rightarrow 0$ and

1857: $ e^{-\frac{y^2}{2\sigD}}\cosh\left(\frac{y\sqrt{P}}{\sigD}\right) \rightarrow 0$.

1858: %Note that for the symmetric case we consider here $\delta = 0$.

1859: Hence

1860: \begin{eqnarray*}

1861:     h(Y|\hY_1 = 1) & \approx & -\int_{y = -\infty}^{\infty} \frac{G_y(0,\sigD)}{2\Pr(Y_1>0)} \log_2 \left(\frac{G_y(0,\sigD)}{2\Pr(Y_1>0)}\right) dy\\

1862:                 & = & -\frac{1}{2\Pr(Y_1>0)}\int_{y = -\infty}^{\infty} G_y(0,\sigD)

1863:                     \left[\log_2 \left(G_y(0,\sigD)\right) - \log_2 \left(2\Pr(Y_1>0)\right)\right] dy\\

1864:                 & = & \frac{1}{2\Pr(Y_1>0)} \left[h(N) + \log_2 \left(2\Pr(Y_1>0)\right)\right],

1865: \end{eqnarray*}

1866: and using $\Pr(Y_1 > 0) = \Pr(Y_1 \le 0) = \frac{1}{2}$ and $h(Y|\hY_1 = 1) = h(Y| \hY_1 = -1)$, we obtain

1867: \begin{eqnarray*}

1868:     h(Y|\hY_1) & \approx & \frac{1}{2}P_{\ners}h(N) + (1 - P_{\ners})h(N) + \frac{1}{2}P_{\ners} h(N) \\

1869:                     & = &    h(N).

1870: \end{eqnarray*}

1871: Therefore, at low SNR, $Y$ and $\hY_1$ become independent.

1872: Then, $I(X;Y | \hY_1)  =  h(Y|\hY_1)  - h(N) \approx 0$ and the information rate becomes (see

1873: appendix \ref{appndx:appndxHD-EAF-highSNR})

1874: \begin{eqnarray*}

1875:     R \le I(X;\hY_1) & = & H(\hY_1) - H(\hY_1|X)\\

1876: %                    & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right)

1877: %                            - H\left(P_1 P_{\ners}, 1 - P_{\ners}, (1-P_1)P_{\ners}\right)\\

1878: %                    & = & -P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right) -(1 - P_{\ners})\log_2(1 - P_{\ners})+  P_1 P_{\ners} \log_2(P_1 P_{\ners})\\

1879: %                    &   & \quad     +(1 - P_{\ners})\log_2(1 - P_{\ners})  +  (1-P_1)P_{\ners}\log_2((1-P_1)P_{\ners})\\

1880: %                    & = & -P_{\ners} \log_2\left(P_{\ners}\right) +P_{\ners}  +  P_1 P_{\ners} \log_2(P_1) + P_1 P_{\ners} \log_2(P_{\ners})\\

1881: %                    &   & \quad       +  (1-P_1)P_{\ners}\log_2(1-P_1) + (1-P_1)P_{\ners}\log_2(P_{\ners}) \\

1882: %                    & = &  P_{\ners}(1  +  P_1  \log_2(P_1) +  (1-P_1)\log_2(1-P_1) ) \\

1883:                     & = &  P_{\ners}(1  -H ( P_1  ,1-P_1 )),

1884: \end{eqnarray*}

1885: where $H(\cdot)$ is the discrete entropy for the specified discrete distribution and $P_1 = \Pr(Y_1 > 0 | X = \sqrt{P})$.

1886: Now, consider the feasibility condition $C \ge I(Y_1;\hY_1|Y)$:

1887: \begin{eqnarray*}

1888:     I(Y_1;\hY_1|Y)  & = & H(\hY_1|Y) - H(\hY_1|Y_1,Y)\\

1889:                     & \stackrel{(a)}{\approx} & H(\hY_1) - H(\hY_1|Y_1)\\

1890: %                    & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right) -

1891: %                        H(P_{\ners},1-P_{\ners})\\

1892: %                    & = & - 2 \frac{1}{2}P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right)

1893: %                        - (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right) + P_{\ners} \log_2(P_{\ners})\\

1894: %                    &   & \quad        + (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right)\\

1895:                     & = &  P_{\ners},

1896: \end{eqnarray*}

1897: where (a) follows from the independence of $Y$ and $\hY_1$ at low SNR, see appendix \ref{appndx:appndxHD-EAF-highSNR}.

1898: Therefore, for low SNR, we set $P_{\ners} = \min\left\{ C,1\right\}$

1899: and the rate becomes

1900: \[

1901:     R \le \min\left\{ C,1\right\}( 1 - H ( P_1  ,1-P_1 )).

1902: \]

1903:

1904:

1905: For the GQ-EAF we first approximate $f(Y,\hY_1)$ at low SNR starting with \eqref{eqn:joint_y_hy1_gq_eaf}:

1906: \begin{eqnarray*}

1907:     f_{Y,\hY_1}(y,\hy_1) & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)G_{\hy_1}(g\sqrt{P},\sigR+\sigQ) +

1908:             G_y(-\sqrt{P},\sigD)G_{\hy_1}(-g\sqrt{P},\sigR+\sigQ) \right)\\

1909:             & = & \frac{1}{\sqrt{2 \pi \sigD}} e^{-\frac{y^2}{2\sigD}}

1910:                     \left(\frac{1}{2} G_{\hy_1}(g\sqrt{P},\sigR+\sigQ) e^{\frac{y\sqrt{P}}{\sigD}}+

1911:                        \frac{1}{2} G_{\hy_1}(-g\sqrt{P},\sigR+\sigQ) e^{\frac{-y\sqrt{P}}{\sigD}} \right)e^{-\frac{P}{2\sigD}}\\

1912:             & \approx & G_y(0,\sigD)f_{\hY_1}(\hy_1),

1913: \end{eqnarray*}

1914: as $e^{\pm \frac{y\sqrt{P}}{\sigD}} \approx 1$ in the region when $G_{\hy_1}$ is significant, for both $X = \sqrt{P}$

1915: or $X = -\sqrt{P}$.

1916: We conclude that as the direct SNR approaches 0, $Y$ and $\hY_1$ become independent.

1917: Now, the rate is given by:

1918: \begin{eqnarray}

1919:     R & \le & I(X;Y,\hY_1) \nonumber \\

1920:       &  =  & h(Y,\hY_1) - h(Y,\hY_1|X) \nonumber \\

1921:       &  =  & h(Y) + h(\hY_1) - h(X+N, gX+N_1+N_Q|X) \nonumber \\

1922:       &  =  & h(Y) + h(\hY_1) - h(N, N_1+N_Q|X)\nonumber \\

1923:       &  =  & h(Y) - h(N|X) + h(\hY_1) - h(N_1 + N_Q|X)\nonumber \\

1924:       &  =  & I(X;Y) + I(X;\hY_1)\nonumber \\

1925:       & \approx & I(X;\hY_1)\nonumber \\

1926:       \label{eqn:GQ-EAF-at-low-SNR}

1927:       & = & h(\hY_1) - h(N_1+N_Q).

1928: \end{eqnarray}

1929: The  feasibility condition becomes:

1930: \begin{eqnarray}

1931:     C & \ge & I(\hY_1;Y_1|Y) \nonumber\\

1932:         & = & h(\hY_1|Y) - h(\hY_1|Y,Y_1) \nonumber\\

1933:     \label{eqn:cond_C_lowSNR_GQ}

1934:         & \approx & h(\hY_1) - h(N_Q),

1935: \end{eqnarray}

1936: with

1937: \[

1938:     f_{\hY_1}(\hy_1) = \frac{1}{2}\left[G_{\hy_1}(g\sqrt{P},\sigR+\sigQ) + G_{\hy_1}(-g\sqrt{P},\sigR+\sigQ)\right].

1939: \]

1940:

1941: For DHD, as $\sigD \rightarrow \infty$ we have

1942: \begin{eqnarray*}

1943:     I(X;\hY_1;Y) & = & I(X;Y)  + I(X;\hY_1|Y) \\

1944:         & \approx & I(X;\hY_1|Y)\\

1945:         & = & H(\hY_1|Y) - H(\hY_1|Y,X)\\

1946:         & \stackrel{(a)}{\approx} & H(\hY_1) - H(\hY_1|X)\\

1947:         & = & I(X;\hY_1)

1948: \end{eqnarray*}

1949: where (a) follows from the independence of $Y$ and $Y_1$ as $\sigD \rightarrow \infty$ and the fact that

1950: $\hY_1$ is a deterministic function of $Y_1$, combined with the fact that given $X$, $Y_1$ and $Y$ are independent.

1951: The feasibility condition becomes

1952: \[

1953:     C  \ge  H(\hY_1|Y) \approx  H(\hY_1).

1954: \]

1955: Because $I(X;\hY_1)$ is not a monotone function of $T$ we have to optimize over $T$ to find the actual rate.

1956:

1957: As can be seen from the expression for HD-EAF, when the SNR on the direct link decreases, the capacity of the

1958: conference link acts as a scaling factor on the rate of the binary channel from the source to the relay.

1959: \begin{figure}[!h]

1960:     \centering

1961:     \scalebox{0.7}{\includegraphics{DAF_DHD_GQEAF_HDEAF_at_LOWSNR.eps}}

1962:     \caption{Information rate with DAF, DHD, HD-EAF and GQ-EAF vs.

1963:         relay channel gain $g$, for different values of $C$, at low SNR on the source-relay link.}

1964:     \label{fig:low-direct-snr}

1965: \end{figure}

1966: In figure \ref{fig:low-direct-snr} we plotted the information rate for DHD, HD-EAF, GQ-EAF and DAF (which coincides with

1967: the upper bound). Comparing the three EAF strategies we note that DHD, which at intermediate SNR on the source-relay channel performs well for $C \ge 0.8$,

1968: has the worst performance at low SNR up to $C = 1.2$. At $C = 1.2$, DHD becomes the best technique out of the three.

1969: For $C < 1.2$ and high SNR on the

1970: source-relay channel, HD-EAF outperforms both DHD and GQ-EAF. For low SNR on the source-relay channel, GQ-EAF is again superior.

1971:

1972: \subsection{Discussion}

1973: We make the following observations:

1974: \begin{itemize}

1975:     \item As noted at the beginning of this section, for low SNR on the source-relay link,

1976:     GQ-EAF outperforms TS-DHD. To see why, consider the

1977:     distribution of $Y_1$:

1978:     \begin{eqnarray*}

1979:         f_{Y_1}(y_1) & = & G_{y_1}(0,\sigR) \cosh\left(\frac{g \sqrt{P} y_1}{\sigR}\right) e^{-\frac{g^2P}{2\sigR}}\\

1980:         &  \stackrel{g \rightarrow 0}{\approx} & G_{y_1}(0,\sigR) \left(1 - \frac{g^2P}{2\sigR}\right),

1981:     \end{eqnarray*}

1982:     where the approximation is obtained using the first order Taylor expansion, and the fact that for large

1983:     values of $Y_1$, $G_{y_1}(0,\sigR)$ dominates the expression. Therefore, as $g \rightarrow 0$, $Y_1$

1984:     approaches a zero-mean Gaussian RV: $Y_1 \stackrel{\mathcal{D}}{\rightarrow} \mN(0,\sigR)$.

1985:     As discussed in \cite[ch. 13.1]{cover-thomas:it-book},

1986:     the closer the reconstruction variable is to the original variable, the better the quantization performance are expected to be. Therefore

1987:     it should be natural to guess that GQ will perform better at low relay link SNR.

1988:

1989:     \item At the other extreme, as $g \rightarrow \infty$, consider the DAF strategy:

1990:     as $g \rightarrow \infty$, have that

1991:     \begin{eqnarray*}

1992:         h(Y_1) & = & -\int_{y_1 = -\infty}^{\infty}

1993:             \frac{1}{2}\left[G_{y_1}(g\sqrt{P},\sigR) + G_{y_1}(-g\sqrt{P},\sigR)\right]\times\\

1994:             &  & \qquad \qquad \qquad

1995:             \log_2\left(\frac{1}{2}\left[G_{y_1}(g\sqrt{P},\sigR) + G_{y_1}(-g\sqrt{P},\sigR)\right]

1996:             \right)dy_1\\

1997:         & \stackrel{g \rightarrow \infty}{\approx}&  1 - \int_{y_1 = -\infty}^{\infty} \frac{1}{2}G_{y_1}(g\sqrt{P},\sigR)

1998:             \log_2 G_{y_1}(g\sqrt{P},\sigR) dy_1 \\

1999:         &   & \qquad \qquad \qquad - \int_{y_1 = -\infty}^{\infty} \frac{1}{2}G_{y_1}(-g\sqrt{P},\sigR)

2000:             \log_2 G_{y_1}(-g\sqrt{P},\sigR) dy_1\\

2001:         & = & 1 + h(N_1),

2002:     \end{eqnarray*}

2003:     and therefore,

2004:     \[

2005:         I(X;Y_1)  = h(Y_1) -  h(Y_1|X) \approx 1 + h(N_1) -  h(N_1) = 1 = H(X).

2006:     \]

2007:     Hence,

2008:     \[

2009:         R_{DAF}  = \min\left\{I(X;Y_1), I(X;Y)+C\right\} = \min \left\{1, I(X;Y)+C \right\},

2010:     \]

2011:     which is the maximal rate. Therefore, as $g \rightarrow \infty$ DAF provides the optimal rate.

2012:

2013:     \item We can expect that at intermediate SNR, methods that balance between the soft-decision per symbol of GQ-EAF and

2014:     the hard-decision on the entire codeword of DAF, will be superior to both.

2015:     Furthermore, we believe that as the SNR decreases, increasing the

2016:     cardinality of $\hY_1$ accordingly will improve the performance.

2017: \end{itemize}

2018:

2019:

2020:

2021:

2022:

2023:

2024:

2025:

2026:

2027:

2028:

2029:

2030: \section{Multi-Step Cooperative Broadcast Application}

2031: \label{sec:application_multi_step}

2032: % In relaying we first need to find a common knowledge that both the receiver and the

2033: % relay share. The relay helps the receiver by refining this common knowledge. In the DAF method

2034: % the common knowledge is the set of messages $\mW$. In our new relay method the common knowledge is the

2035: % set $\stypm(Y_1|\xvec_2^m)$. Since this set can always be used as common knowledge for conferencing, we

2036: % can apply the same idea used in theorem \ref{thm:main_thm} to generate common knowledge in multi-step conferencing for

2037: % cooperative broadcast.

2038:

2039: In this section we consider the cooperative broadcast (BC) scenario. In this scenario, one transmitter communicates with two receivers. In its most

2040: general form, the transmitter sends three independent messages: a common message intended for both receivers and two private messages,

2041: one for each receiver, where all three messages are encoded into a single channel codeword $X^n$.

2042: Each receiver gets a noisy version of the codeword, $Y_1^n$

2043: at $\Rgood$ and $Y_2^n$ at $\Rbad$. After reception, the receivers exchange messages in a K-cycle conference over noiseless

2044: conference links  of finite capacities $C_{12}$ and $C_{21}$.

2045: Each conference message is based on the channel output at each receiver and the conference messages previously received

2046: from the

2047: other receiver, in a similar manner to the conference defined by Willems in~\cite{Willems:83} for the cooperative MAC.

2048: After conferencing, each receiver decodes its message.

2049: This scenario is depicted in figure

2050: \ref{fig:three_msg_bc}. This setup was studied in \cite{DraperFK:03} for the

2051: single common message case over the independent BC (i.e. $p(\yvec_1,\yvec_2|\xvec) = \prod_{i=1}^n p(y_{1,i}|x_i)p(y_{2,i}|x_i)$),

2052: and in \cite{RonSer:2005} for the general setup with a single cycle of conferencing.

2053: \begin{figure}[h]

2054:     \centering

2055:     \scalebox{0.6}{\includegraphics{Broadcast_Channel_ThreeMSG.eps}}

2056:     \caption{The broadcast channel with cooperating receivers. The encoder sends three messages, a common message $W_0$,  a private message to $\Rgood$,

2057:     $W_1$, and a private message to $\Rbad$, $W_2$. $\hat{W}_0$ and $\hat{\hat{W}}_0$ are the estimates of $W_0$ at

2058:     $\Rgood$ and $\Rbad$ respectively.}

2059:     \label{fig:three_msg_bc}

2060: \end{figure}

2061:

2062:     \subsection{Definitions}

2063:

2064:     We use the standard definition for the discrete memoryless general broadcast channel

2065:     given in \cite{Cover:98}.

2066:     We define a cooperative coding scheme as follows:

2067:     \begin{definition}

2068:         {\em A $\left(C_{12}, C_{21} \right)$-admissible K-cycle conference} consists of the following elements:

2069:         \begin{enumerate}

2070:             \item $K$ message sets from $\Rgood$ to $\Rbad$, denoted by

2071:                 $\mW_{12}^{(1)}$, $\mW_{12}^{(2)}$,...,$\mW_{12}^{(K)}$, and $K$ message sets from $\Rbad$ to $\Rgood$,

2072:                 denoted by $\mW_{21}^{(1)}$, $\mW_{21}^{(2)}$,...,$\mW_{21}^{(K)}$.

2073:                 Message

2074:                 set $\mW_{12}^{(k)}$ consists of $2^{nR_{12}^{(k)}}$ messages and message

2075:                 set $\mW_{21}^{(k)}$ consists of $2^{nR_{21}^{(k)}}$ messages.

2076:             \item $K$ mapping functions, one for each conference step from $\Rgood$ to $\Rbad$:

2077:                 \[

2078:                     h_{12}^{(k)}: \mY_1^n \times \mW_{21}^{(1)} \times \mW_{21}^{(2)} \times ... \times

2079:                                 \mW_{21}^{(k-1)} \mapsto \mW_{12}^{(k)},

2080:                 \]

2081:                 and $K$ mapping functions, one for each conference step from $\Rbad$ to $\Rgood$:

2082:                 \[

2083:                     h_{21}^{(k)}: \mY_2^n \times \mW_{12}^{(1)} \times \mW_{12}^{(2)} \times ... \times

2084:                         \mW_{12}^{(k)} \mapsto \mW_{21}^{(k)},

2085:                 \]

2086:                 where $k = 1,2,...,K$.

2087:         \end{enumerate}

2088: %        Let $R_{12}^{(k)} = \frac{1}{n}\log_2\left( ||\mW_{12}^{(k)}|| \right)$, and

2089: %        $R_{21}^{(k)} = \frac{1}{n}\log_2\left( ||\mW_{21}^{(k)}|| \right)$. Then

2090:     The conference rates satisfy:

2091:         \[

2092:             C_{12} = \sum_{k = 1}^K R_{12}^{(k)}, \qquad C_{21} = \sum_{k = 1}^K R_{21}^{(k)}.

2093:         \]

2094:     \end{definition}

2095:     \begin{definition}

2096:         {\em A $(2^{nR_0},2^{nR_1},2^{nR_2},n,C_{12},C_{21},K)$ code} for the general broadcast channel with

2097:         a common message and two independent private messages, consists of three sets of source messages,

2098:         $\mM_0 = \left\{1, 2,...,2^{nR_0}\right\}$, $\mM_1 = \left\{1, 2,...,2^{nR_1}\right\}$ and

2099:                 $\mM_2 = \left\{1, 2,...,2^{nR_2}\right\}$,

2100: %        three conference message sets,

2101: %        \begin{eqnarray*}

2102: %            \mW_{21}^a &  = & \Big\{1,2,...,2^{n R_{21}^a} \Big\},\\

2103: %            \mW_{12}   &  = & \Big\{1,2,...,2^{n R_{12}}   \Big\},\\

2104: %            \mW_{21}^b &  = & \Big\{1,2,...,2^{n R_{21}^b} \Big\},

2105: %        \end{eqnarray*}

2106:         a mapping function at the transmitter,

2107:         \[

2108:            f: \mM_0 \times \mM_1 \times \mM_2 \mapsto \mX^n,

2109:         \]

2110: %        three relay functions,

2111: %        \begin{eqnarray*}

2112: %             &h_{21}^a:&  \mY_2^n  \mapsto \mW_{21}^a,\\

2113: %             &h_{12}:  &  \mW_{21}^a \times \mY_1^n \mapsto \mW_{12},\\

2114: %             &h_{21}^b:&  \mW_{12} \times \mY_2^n \mapsto \mW_{21}^b,

2115: %        \end{eqnarray*}

2116: %        with

2117: %        \[

2118: %            R_{21}^a  \le  C_{21}^a, \quad   R_{12}  \le  C_{12} \;\;\mbox{    and     }\;\;  R_{21}^b  \le  C_{21}^b ,

2119: %        \]

2120: %        where $C_{21}^a = \alpha C_{21}$ and $C_{21}^b = \left(1 - \alpha\right) C_{21}$;

2121:         A $\left(C_{12}, C_{21} \right)$-admissible $K$-cycle conference,

2122:         and two decoders,

2123:         \begin{eqnarray*}

2124:             & g_1: & \mW_{21}^{(1)} \times \mW_{21}^{(2)}\times ... \times \mW_{21}^{(K)} \times \mY_1^n \mapsto \mM_0 \times \mM_1, \\

2125:             & g_2: & \mW_{12}^{(1)} \times \mW_{12}^{(2)}\times ... \times \mW_{12}^{(K)} \times \mY_2^n \mapsto \mM_0 \times \mM_2.

2126:         \end{eqnarray*}

2127:     \end{definition}

2128:     \begin{definition}

2129:         The {\em average probability of error} is defined as

2130:         the average probability that at least one of the receivers does not decode its message pair correctly:

2131:         \[

2132:             \Pe = \Pr\left(g_1\left(W_{21}^{(1)}, W_{21}^{(2)}, ..., W_{21}^{(K)}, Y_1^n\right)\ne (M_0,M_1) \mbox{ or }

2133:                 g_2\left(W_{12}^{(1)}, W_{12}^{(2)}, ...,W_{12}^{(K)}, Y_2^n\right) \ne (M_0,M_2)\right),

2134:         \]

2135:         where we assume that each message is selected uniformly and independently over its respective message set.

2136:     \end{definition}

2137: %    \begin{definition}

2138: %        Let $\typm(A)$ denote the set of $\delta$-weakly typical sequences of length $m$ generated

2139: %        by the distribution $p_A(a)$ on $\mA$

2140: %        %the i.i.d. distribution $\prod_{l=1}^m p(a_l)$, $a_l \in \mA$,

2141: %        as defined in \cite[ch. 3]{cover-thomas:it-book}.

2142: %        For $\avec_0^m \in \typm(A)$, define the set $\ttyp(A,\avec_0^m)$ to be the set of all

2143: %        typical vectors $\avec^n \in \typ(A)$ such that their first $m$ elements

2144: %        satisfy $\avec^m = \avec_0^m$.

2145: %    \end{definition}

2146:

2147:

2148:

2149:

2150:

2151:

2152:

2153:

2154:

2155:

2156:

2157:

2158:

2159:

2160:

2161:

2162:

2163: \subsection{The Cooperative Broadcast Channel with Two Independent and One Common Message}

2164: \label{sec:multi-step-general-bc}

2165: We first present the general result for the cooperative broadcast scenario with a $K$-cycle conference.

2166: Denote with $\hYvec_1 = \left(\hY_1^{(1)}, \hY_1^{(2)},..., \hY_1^{(K)} \right)$ and

2167: $\hYvec_2 = \left(\hY_2^{(1)}, \hY_2^{(2)},..., \hY_2^{(K)} \right)$. Let $R_1$ and $R_2$ be the private rates to $\Rgood$ and

2168: $\Rbad$ respectively, and let $R_0$ denote the rate of the common information. Then, the following rate triplets are achievable:

2169:

2170: \begin{theorem}

2171:     \label{thm:multi-step-general-bc}

2172:     \it

2173:         Consider the general broadcast channel $\left(\mX, p(y_1,y_2|x), \mY_1 \times \mY_2\right)$ with cooperating

2174:         receivers, having noiseless conference

2175:     links of finite capacities $C_{12}$ and $C_{21}$ between them. Let the receivers hold a conference that

2176:     consists of $K$ cycles. Then, any rate triplet $(R_0, R_1, R_2)$ satisfying

2177:     \begin{subequations}

2178:         \begin{eqnarray}

2179:             R_0 & \le & \min\left\{I\left(W;Y_1,\hYvec_2\right), I\left(W;\hYvec_1,Y_2\right) \right\}\\

2180:             R_1 & \le & I(U;Y_1, \hYvec_2|W)\\

2181:             R_2 & \le & I(V;\hYvec_1, Y_2|W)\\

2182:             R_1 + R_2 & \le & I(U;Y_1, \hYvec_2|W) + I(V;\hYvec_1, Y_2|W) - I(U;V|W),

2183:         \end{eqnarray}

2184:     \end{subequations}

2185:     subject to,

2186:     \begin{subequations}

2187:         \begin{eqnarray}

2188:         \label{eqn:c12_constr_multi_step}

2189:             C_{12}  & \ge & I(Y_1; \hYvec_1, \hYvec_2|Y_2)\\

2190:         \label{eqn:c21_constr_multi_step}

2191:             C_{21}  & \ge & I(Y_2; \hYvec_2, \hYvec_1|Y_1),

2192:         \end{eqnarray}

2193:     \end{subequations}

2194:     for some joint distribution

2195:         \begin{eqnarray}

2196:         \label{eqn:distributions}

2197:          &  & p\left(w,u,v,x,y_1,y_2,\hy_1^{(1)}, \hy_1^{(2)},...,\hy_1^{(K)},\hy_2^{(1)}, \hy_2^{(2)},...,\hy_2^{(K)}\right) =\nonumber\\

2198:          &  & \phantom{xxx} p(w,u,v,x) p(y_1,y_2|x) p\left(\hy_1^{(1)}|y_1\right) p\left(\hy_2^{(1)}|y_2,\hy_1^{(1)}\right)\cdot\cdot\cdot

2199:                 p\left(\hy_1^{(k)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)\times\nonumber\\

2200:          &  &  \phantom{xxx} p\left(\hy_2^{(k)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)

2201:                 \cdot\cdot\cdot p\left(\hy_1^{(K)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-1)}\right)\nonumber\\

2202:          &  &  \phantom{xxx} \times p\left(\hy_2^{(K)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-1)}\right),

2203:     \end{eqnarray}

2204:     is achievable.

2205:     The cardinality of the $k$'th auxiliary random variables are bounded by:

2206:     \begin{eqnarray*}

2207:         ||\mhY_1^{(k)}|| & \le & ||\mY_1|| \times \prod_{l=1}^{k-1} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1, \qquad

2208:             \qquad k = 1,2,...,K\\

2209:         ||\mhY_2^{(k)}|| & \le & ||\mY_2|| \times \prod_{l=1}^{k} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1,

2210:             \qquad \qquad k = 1,2,...,K.

2211:     \end{eqnarray*}

2212: \end{theorem}

2213:

2214:

2215: \begin{proof}

2216:     \subsubsection{Overview of Strategy}

2217:     The coding strategy is based on combining the BC code construction of \cite{ElGamalM:81}, after incorporating the common message into the

2218:     construction, with the $K$-cycle conference of

2219:     \cite{Kaspi:85}. The transmitter constructs a broadcast code to split the rate between the three message sets. This

2220:     is done independently of the relaying scheme.

2221:     Each receiver generates its conference messages according to the construction of \cite{Kaspi:85}.

2222:     After $K$ cycles of conferencing

2223:     each receiver decodes its information based on its channel output and the conference messages received from the other receiver.

2224:

2225:     \subsubsection{Code Construction at The Transmitter}

2226:     \begin{itemize}

2227:     \item

2228:     Fix all the distributions in \eqref{eqn:distributions}. Fix $\eps > 0$ and let $n > 1$. Let $\delta > 0$ be a positive number whose

2229:     value is determined in the following steps.

2230:     Let $R(W) = \min\Big\{I\left(W;Y_1,\hYvec_2\right), I\left(W;\hYvec_1,Y_2\right) \Big\}$. Let $S_{[W]\delta}^{(n)}$ denote the

2231:     set of all $\wvec \in \mW^n$ sequences such that $\wvec \in \stypd(W)$ and $\stypd(U,V|\wvec)$ is non-empty, as defined in

2232:     \cite[corollary 5.11]{YeungBook}. From \cite[corollary 5.11]{YeungBook} we  have that

2233:     $||S_{[W]\delta}^{(n)}|| \ge 2^{n(H(W)-\phi)}$, where $\phi \rightarrow 0$ as $\delta \rightarrow 0$ and $n \rightarrow \infty$.

2234:

2235:     \item Pick $2^{n(R(W) - \eps)}$ sequences from $S_{[W]\delta}^{(n)}$ in a uniform and independent manner according to

2236:     \[

2237:         \Pr(\wvec) = \left\{

2238:                 \begin{array}{cl}

2239:                     \frac{1}{||S_{[W]\delta}^{(n)}||} & ,\wvec \in S_{[W]\delta}^{(n)}\\

2240:                     0   & ,\mbox{otherwise}.

2241:                 \end{array}

2242:             \right.

2243:     \]

2244:     Label these sequences with $l \in \mM_0 \triangleq \left\{1,2,...,2^{n(R(W)-\eps)}\right\}$.

2245:

2246:     \item For each sequence $\wvec(l)$, $l \in \mM_0$, consider the set $\stypdp(U|\wvec(l))$ ,$\delta' = \delta\max\left\{||\mU||, ||\mV||\right\}$.

2247:     Since the sequences $\wvec \in \mW^n$ are selected such that $\stypd(U,V|\wvec(l))$ is non-empty and since

2248:     $(\uvec, \vvec) \in \stypd(U,V|\wvec(l))$ implies  $\uvec \in \stypdp(U|\wvec(l))$, then also $\stypdp(U|\wvec(l))$ in non-empty, and by

2249:     \cite[theorem 5.9]{YeungBook}, $||\stypdp(U|\wvec(l))|| \ge 2^{n(H(U|W) - \psi)}$,

2250:     $\psi \rightarrow 0$ as $\delta' \rightarrow 0$ and $n \rightarrow \infty$.

2251:

2252:     \item For each $l \in \mM_0$ pick $2^{n(I(U;Y_1,\hYvec_2|W)-\eps)}$ sequences in a uniform and independent manner from $\stypdp(U|\wvec(l))$ according

2253:     to

2254:     \[

2255:         \Pr(\uvec|l) =  \left\{

2256:             \begin{array}{cl}

2257:                 \frac{1}{||\stypdp(U|\wvec(l))||} & ,\uvec \in \stypdp(U|\wvec(l))\\

2258:                 0 & , \mbox{otherwise}.

2259:             \end{array}

2260:         \right.

2261:     \]

2262:     Label these sequences with $\uvec(i|l)$, $i \in \mZ_1 \triangleq \left\{1,2,...,2^{n(I(U;Y_1,\hYvec_2|W)-\eps)}\right\}$. Similarly,

2263:     pick $2^{n(I(V;\hYvec_1, Y_2|W)-\eps)}$ sequences in a uniform and independent manner from $\stypdp(V|\wvec(l))$ according

2264:     to

2265:     \[

2266:         \Pr(\vvec|l) =  \left\{

2267:             \begin{array}{cl}

2268:                 \frac{1}{||\stypdp(V|\wvec(l))||} & ,\vvec \in \stypdp(V|\wvec(l))\\

2269:                 0 & , \mbox{otherwise}.

2270:             \end{array}

2271:         \right.

2272:     \]

2273:     Label these sequences with $\vvec(j|l)$, $j \in \mZ_2 \triangleq \left\{1,2,...,2^{n(I(V;\hYvec_1, Y_2|W)-\eps)}\right\}$.

2274:     $\delta$ is selected such that $||S_{[W]\delta}^{(n)}|| \ge 2^{n(R(W)-\eps)}$, and $\forall l \in \mM_0$ we have

2275:     that $||\stypdp(U|\wvec(l))|| \ge 2^{n(I(U;Y_1,\hYvec_2|W)-\eps)}$ and

2276:     $||\stypdp(V|\wvec(l))|| \ge 2^{n(I(V;\hYvec_1, Y_2|W)-\eps)}$.

2277:

2278:

2279:     \item Partition the set $\mZ_1$ into $2^{nR_1}$ subsets $B_{w_1}$,  $w_1 \in \mM_1 = \left\{1,2,...,2^{nR_1}\right\}$, let \\

2280:     $B_{w_1} = \Big[(w_1 - 1)2^{n(I(U;Y_1,\hYvec_2|W)- R_1 - \eps)} + 1, w_1 2^{n(I(U;Y_1,\hYvec_2|W)- R_1 - \eps)} \Big]$. Similarly partition

2281:     the set $\mZ_2$ into $2^{nR_2}$ subsets $C_{w_2}$,  $w_2 \in \mM_2 = \left\{1,2,...,2^{nR_2}\right\}$, let \\

2282:     $C_{w_2} = \left[(w_2 - 1)2^{n(I(V;\hYvec_1,Y_2|W)- R_2 - \eps)} + 1, w_2 2^{n(I(V;\hYvec_1,Y_2|W)- R_2 - \eps)} \right]$.

2283:

2284:     \item For each triplet $(l,w_1,w_2)$ consider the set

2285:     \[

2286:         \mD(w_1,w_2|l) \triangleq \left\{(m_1,m_2): m_1 \in B_{w_1}, m_2 \in C_{w_2}, \left(\uvec(m_1|l), \vvec(m_2|l)\right)

2287:             \in \stypdp(U,V|\wvec(l)) \right\}.

2288:     \]

2289:     By \cite[lemma on pg. 121]{ElGamalM:81}, we have that taking $n$ large enough we can make

2290:     $\Pr\left(||\mD(w_1,w_2|l)||  = 0\right) \le \eps$ for any arbitrary $\eps > 0$, as long as

2291:     \begin{subequations}

2292:         \begin{eqnarray}

2293:         \label{eqn:R1_cond_lemma}

2294:             R_1  & \le & I(U;Y_1,\hYvec_2|W)\\

2295:         \label{eqn:R2_cond_lemma}

2296:             R_2 & \le & I(V;\hYvec_1,Y_2|W)\\

2297:         \label{eqn:R1_and_R2_cond_lemma}

2298:             R_1 + R_2 & \le & I(U;Y_1,\hYvec_2|W) + I(V;\hYvec_1,Y_2|W) - I(U;V|W).

2299:         \end{eqnarray}

2300:     \end{subequations}

2301:     Note that the individual rate constraints are required to guarantee that the sets $B_{w_1}$ and $C_{w_2}$ are non-empty.

2302:

2303:     \item For each $l \in \mM_0$, we pick a unique pair of $(m_1(w_1,w_2,l), m_2(w_1,w_2,l)) \in \mD(w_1,w_2|l)$,

2304:     $(w_1,w_2) \in \mM_1 \times \mM_2$.  The transmitter generates the codeword $\xvec(l,w_1,w_2)$ according to\\

2305:     $p(\xvec(l,w_1,w_2)) = \prod_{i=1}^n p(x_i|u_i(m_1(w_1,w_2,l)),v_i(m_2(w_1,w_2,l)),w_i(l))$.

2306:     When transmitting the triplet $(l,w_1,w_2)$ the transmitter outputs $\xvec(l,w_1,w_2)$.

2307:     \end{itemize}

2308:

2309:         \subsubsection{Codebook Generation at the Receivers}

2310:         \begin{itemize}

2311:             \item For the first conference step from $\Rgood$ to $\Rbad$, $\Rgood$ generates a codebook

2312:                 with $2^{nR_{12}'^{(1)}}$ codewords indexed by

2313:                 $z_{12}^{(1)}\in \mZ_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}'^{(1)}}\right\}$ according to the distribution

2314:                 $p\left(\hy_1^{(1)}\right)$:

2315:                 $p\left(\hyvec_1^{(1)}(z_{12}^{(1)})\right) = \prod_{i=1}^n p\left(\hy_{1,i}^{(1)}(z_{12}^{(1)})\right)$.

2316:                 $\Rgood$ uniformly and independently partitions the message set

2317:                 $\mZ_{12}^{(1)}$ into $2^{nR_{12}^{(1)}}$ subsets indexed by

2318:                 $w_{12}^{(1)} \in \mW_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}^{(1)}}\right\}$. Denote these subsets

2319:                 with $\mS_{12,w_{12}^{(1)}}^{(1)}$.

2320:             \item For the first conference step from $\Rbad$ to $\Rgood$, $\Rbad$ generates a codebook with

2321:                 $2^{nR_{21}'^{(1)}}$ codewords indexed by $z_{21}^{(1)} \in \mZ_{21}^{(1)} = \left\{1,2,..., 2^{nR_{21}'^{(1)}}\right\}$

2322:                 for each codeword $\hyvec_1^{(1)}(z_{12}^{(1)})$, $z_{12}^{(1)} \in \mZ_{12}^{(1)}$, in an i.i.d.

2323:                 manner according to

2324:                 $p\left(\hyvec_2^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\right)=  \prod_{i=1}^n p\left(\hy_{2,i}^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\Big|\hy^{(1)}_{1,i}(z_{12}^{(1)})\right)$.

2325: %                $z_{21}^{(1)} \in \mZ_{21}^{(1)}$.

2326:                 $\Rbad$ uniformly and independently partitions the message set $\mZ_{21}^{(1)}$

2327:                  into $2^{nR_{21}^{(1)}}$ subsets indexed by

2328:                 $w_{21}^{(1)} \in \mW_{21}^{(1)} = \left\{1,2,...,2^{nR_{21}^{(1)}}\right\}$. Denote these subsets

2329:                 with $\mS_{21,w_{21}^{(1)}}^{(1)}$.

2330:             \item For the $k$'th conference step from $\Rgood$ to $\Rbad$, $\Rgood$ considers each combination of

2331:                 $z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)}$,

2332:                 $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}$. For each combination, $\Rgood$ generates a codebook with $2^{nR_{12}'^{(k)}}$

2333:                 messages indexed by $z_{12}^{(k)} \in \mZ_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}'^{(k)}}\right\}$,

2334:                 according to the distribution

2335:                  $p\left(\hy_1^{(k)}|\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$.

2336:                 $\Rgood$ uniformly and independently partitions the message set

2337:                 $\mZ_{12}^{(k)}$ into $2^{nR_{12}^{(k)}}$ subsets indexed by

2338:                 $w_{12}^{(k)} \in \mW_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}^{(k)}}\right\}$. Denote these subsets

2339:                 with $\mS_{12,w_{12}^{(k)}}^{(k)}$.

2340:             \item The codebook for the $k$'th conference step from $\Rbad$ to $\Rgood$ is generated in a parallel manner for each combination

2341:                 of $z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k)}$, $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}$.

2342:

2343:

2344:         \end{itemize}

2345:

2346:

2347:

2348:

2349:

2350:

2351:     \subsubsection{Decoding and Encoding at $\Rgood$ at the $k$'th Conference Cycle ($k \le K$) for Transmission

2352:         Block $i$}

2353:     \label{sec:DecEncMultiStepRgood}

2354:         $\Rgood$ needs first to decode the message $z_{21}^{(k-1)}$ sent from $\Rbad$ at the $(k-1)$'th cycle.

2355:         To that end, $\Rgood$ uses $w_{21}^{(k-1)}$, the index received from $\Rbad$ at the $(k-1)$'th conference

2356:         step. In

2357:         decoding $z_{21}^{(k-1)}$ we assume that all the previous $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-2)}$

2358:         were correctly decoded at $\Rgood$. We denote the $\hyvec_2^{(k)}$ sequences corresponding to

2359:         $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-2)}$ by\\

2360:          $\hyvec_2(1), \hyvec_2(2), ...,\hyvec_2(k-2)$, and

2361:         similarly define $\hyvec_1(1), \hyvec_1(2) ,..., \hyvec_1(k-1)$.

2362:         \begin{itemize}

2363:             \item $\Rgood$ first generates the set $\mL_1(k-1)$ defined by:

2364:                 \begin{eqnarray*}

2365:                     &  & \mL_1(k-1) = \bigg\{z_{21}^{(k-1)} \in  \mZ_{21}^{(k-1)} :

2366:                         \Big(\hyvec_2^{(k-1)}(z_{21}^{(k-1)}|z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)},z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-2)}),\\

2367:                     &  & \phantom{xxxxxxxxxxxxx} \hyvec_1(1),\hyvec_1(2),...,\hyvec_1(k-1),\hyvec_2(1),\hyvec_2(2),...,\hyvec_2(k-2),\yvec_1(i)\Big)\in \styp\bigg\}.

2368:                 \end{eqnarray*}

2369:             \item $\Rgood$ then looks for a unique $z_{21}^{(k-1)} \in \mZ_{21}^{(k-1)}$ such that

2370:                     $z_{21}^{(k-1)} \in \mL_1(k-1) \bigcap \mS_{21,w_{21}^{(k-1)}}^{(k-1)}$. If there is none or

2371:                     there is more than one, an error is declared.

2372:             \item From an argument similar to \cite{Kaspi:85}, the probability of error can be made arbitrarily small

2373:                 by taking $n$ large enough as long as

2374:                 \[

2375:                     R_{21}'^{(k-1)} < I\left(\hY_2^{(k-1)};Y_1\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2376:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-2)}\right) + R_{21}^{(k-1)} - \eps.

2377:                 \]

2378:                 Here, $k > 1$, since for the first conference message from $\Rgood$ to $\Rbad$ no

2379:                 decoding takes place.

2380:         \end{itemize}

2381:         In generating the $k$'th conference message to $\Rbad$, it is assumed that all the previous $k-1$ messages from

2382:         $\Rbad$ were decoded correctly.

2383:         \begin{itemize}

2384:             \item $\Rgood$ looks for a message $z_{12}^{(k)} \in \mZ_{12}^{(k)}$ such that

2385:                 \begin{eqnarray*}

2386:                     & & \Big(\hyvec_1^{(k)}(z_{12}^{(k)}|z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)},z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}),\\

2387:                     & & \phantom{xxxx}    \hyvec_1(1),\hyvec_1(2),...,\hyvec_1(k-1),\hyvec_2(1),\hyvec_2(2),...,\hyvec_2(k-1),\yvec_1(i) \Big) \in \styp.

2388:                 \end{eqnarray*}

2389:                 From the argument in \cite{Kaspi:85}, the probability that such a sequence exists can be made arbitrarily close to $1$

2390:                 by taking $n$ large enough as long as

2391:                 \[

2392:                     R_{12}'^{(k)} > I\left(\hY_1^{(k)};Y_1\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps.

2393:                 \]

2394:             \item $\Rgood$ looks for the partition of $\mZ_{12}^{(k)}$ into which $z_{12}^{(k)}$ belongs. Denote the index of this

2395:                 partition with $w_{12}^{(k)}$.

2396:             \item $\Rgood$ transmits $w_{12}^{(k)}$ to $\Rbad$ through the conference link.

2397:         \end{itemize}

2398:

2399:     \subsubsection{Decoding and Encoding at $\Rbad$ at the $k$'th Conference Step ($k \le K$) for Transmission

2400:         Block $i$}

2401:     \label{sec:DecEncMultiStepRbad}

2402:         Using similar arguments to section \ref{sec:DecEncMultiStepRgood}, we obtain the following

2403:         rate constraints:

2404:         \begin{itemize}

2405:             \item Decoding $z_{12}^{(k)}$ at $\Rbad$ can be done with an arbitrarily small

2406:                 probability of error by taking $n$ large enough as long as

2407:                 \[

2408:                     R_{12}'^{(k)} < I\left(\hY_1^{(k)};Y_2\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2409:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + R_{12}^{(k)} - \eps.

2410:                 \]

2411:             \item Encoding $z_{21}^{(k)}$ can be done with an arbitrarily small probability of error

2412:                 by taking $n$ large enough as long as

2413:                 \[

2414:                     R_{21}'^{(k)} > I\left(\hY_2^{(k)};Y_2\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k)},

2415:                                 \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps.

2416:                 \]

2417:         \end{itemize}

2418:

2419:

2420:     \subsubsection{Combining All Conference Rate Bounds}

2421:     \label{sec:combining_bounds_general}

2422:         First consider the bounds on $R_{12}'^{(k)}$, $k = 1,2,...,K$:

2423:         \begin{eqnarray*}

2424:         &  &    I\left(\hY_1^{(k)};Y_1\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps

2425:                 < R_{12}'^{(k)} < \\

2426:         &  &  \phantom{xxxxx}I\left(\hY_1^{(k)};Y_2\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2427:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + R_{12}^{(k)} - \eps.

2428:         \end{eqnarray*}

2429:         This can be satisfied only if

2430:         \begin{eqnarray*}

2431:         &  &  I\left(\hY_1^{(k)};Y_2\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2432:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + R_{12}^{(k)} - \eps >  \\

2433:         &  &  \phantom{xxxxx}  I\left(\hY_1^{(k)};Y_1\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps\\

2434:         & \Rightarrow  &   R_{12}^{(k)}  >  H\left(\hY_1^{(k)} \big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2435:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) \\

2436:         &  &  \phantom{xxxxx}  -H\left(\hY_1^{(k)}\Big|Y_1,\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + 2\eps\\

2437:         &   &  \phantom{xxx} =  I\left(\hY_1^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2438:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) +2\eps.

2439:         \end{eqnarray*}

2440:         Hence

2441:         \begin{eqnarray}

2442:             C_{12} & = & \sum_{k = 1}^K R_{12}^{(k)} \nonumber\\

2443:                    & \ge & \sum_{k = 1}^K \bigg(I\left(\hY_1^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2444:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) +2\eps\bigg)\nonumber\\

2445:                    & = & \sum_{k = 1}^{K} \bigg[I\left(\hY_1^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2446:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right)\nonumber\\

2447:                    &   & \phantom{xxxxxx} + I\left(\hY_2^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k)},

2448:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right)\bigg] + 2K\eps \nonumber\\

2449: %                   &   & \phantom{xxxxxxxxxx} +I\left(\hY_1^{(K)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K-1)},

2450: %                        \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right)  \nonumber\\

2451: %                   &   & \phantom{xxxxxxxxxxxxxx} + I\left(\hY_2^{(K)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},

2452: %                        \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right) + 2K\eps \nonumber\\

2453:                    & = &  \sum_{k = 1}^{K} I\left(\hY_1^{(k)},\hY_2^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},

2454:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + 2K\eps \nonumber\\

2455: %                   &   & \phantom{xxxxxxx} +I\left(\hY_1^{(K)}, \hY_2^{(K)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K-1)},

2456: %                        \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right) + 2K\eps\nonumber\\

2457:                    \label{eqn:constr_c12_general_bc}

2458:                    & = &   I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},

2459:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K)};Y_1\big| Y_2\right) +2K\eps,

2460:         \end{eqnarray}

2461:         and similarly

2462:         \begin{equation}

2463:         \label{eqn:constr_c21_general_bc}

2464:             C_{21} \ge I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},

2465:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K)};Y_2\big| Y_1\right) +2K\eps.

2466:         \end{equation}

2467:          This provides the rate constraints on the conference auxiliary variables of \eqref{eqn:c12_constr_multi_step} and

2468:          \eqref{eqn:c21_constr_multi_step}.

2469:

2470:          \subsubsection{Decoding at $\Rgood$}

2471:          $\Rgood$ uses $\yvec_1(i)$ and $\hyvec_2^{(1)},\hyvec_2^{(2)},...,\hyvec_2^{(K)}$ received from $\Rbad$, to decode $(l_i,w_{1,i})$ as follows:

2472:          \begin{itemize}

2473:             \item $\Rgood$ looks for a unique message $l \in \mM_0$ such

2474:             \[

2475:                 \big(\wvec(l),\yvec_1(i),\hyvec_2^{(1)},\hyvec_2^{(2)},...,\hyvec_2^{(K)}\big) \in \styp.

2476:             \]

2477:             From the point-to-point channel capacity theorem (see \cite{ElGamalM:81}), this can be done with an arbitrarily

2478:             small probability of error by taking $n$ large enough as long as

2479:             \begin{equation}

2480:             \label{eqn:constr_r0_decode_Rgood}

2481:                 R_0 \le I(W;Y_1,\hYvec_2).

2482:             \end{equation}

2483:             Denote the decoded message $\hat{l}_i$. Now $\Rgood$ decodes $w_{1,i}$ by looking for a unique $k \in \mZ_1$

2484:             such that

2485:             \[

2486:                 \big(\uvec(k|\hat{l}_i),\wvec(\hat{l}_i),\yvec_1(i),\hyvec_2^{(1)},\hyvec_2^{(2)},...,\hyvec_2^{(K)}\big) \in \styp.

2487:             \]

2488:             If a unique such $k$ exists, then denote the decoded index with $\hat{k}=k$. Now $\Rgood$ looks for the partition of $\mZ_1$ into which $\hat{k}$ belongs and sets $\hw_{1,i}$

2489:             to be the index of that partition: $\hat{k} \in B_{\hw_{1,i}}$.

2490:             Similarly to  the proof in \cite[ch 14.6.2]{cover-thomas:it-book}, assuming successful decoding

2491:             of $l_i$, the probability of error can be made arbitrarily small by taking $n$ large enough as long as

2492:             \[

2493:                 \frac{1}{n}\log_2||\mZ_1|| \le I(U;Y_1,\hYvec_2|W),

2494:             \]

2495:             which is satisfied by construction.

2496:

2497:          \end{itemize}

2498:

2499:          \subsubsection{Decoding at $\Rbad$}

2500:          Repeating similar steps for decoding at $\Rbad$ we get that decoding $l_i$ can be done with an arbitrarily

2501:          small probability of error by taking $n$ large enough as long as

2502:          \begin{equation}

2503:          \label{eqn:constr_r0_decode_Rbad}

2504:             R_0 \le I(W; \hYvec_1,Y_2),

2505:          \end{equation}

2506:          and assuming successful decoding of $l_i$, decoding $w_{2,i}$ with an arbitrarily small probability of error

2507:          requires that

2508:          \[

2509:                 \frac{1}{n}\log_2||\mZ_2|| \le I(V;\hYvec_1,Y_2|W),

2510:          \]

2511:          which again is satisfied by construction.

2512:

2513:          Finally, collecting \eqref{eqn:R1_cond_lemma}, \eqref{eqn:R2_cond_lemma},

2514:          \eqref{eqn:R1_and_R2_cond_lemma}, \eqref{eqn:constr_r0_decode_Rgood} and \eqref{eqn:constr_r0_decode_Rbad} give

2515:          the achievable rate constraints of theorem \ref{thm:multi-step-general-bc}, and \eqref{eqn:constr_c12_general_bc}

2516:          and \eqref{eqn:constr_c21_general_bc} give the conference rate constraints of the theorem.

2517: \end{proof}

2518:

2519:

2520:

2521:

2522:

2523:

2524:

2525:

2526:

2527:

2528:

2529: \subsection{The Cooperative Broadcast Channel with a Single Common Message}

2530: \label{sec:multi-step-single-common-message}

2531:

2532: In the single common message cooperative broadcast scenario,

2533: a single transmitter sends a message to two receivers encoded in a single channel codeword

2534: $X^n$. % where the superscript $n$ denotes the length of a vector.

2535: \begin{figure}[ht]

2536:      \epsfxsize=0.6\textwidth \leavevmode\centering\epsffile{Broadcast_Channel_Common.eps}

2537:     \caption{The broadcast channel with cooperating receivers, for the single common message case.

2538:         $\hat{W}$ and $\hat{\hat{W}}$ are the estimates of $W$ at $\Rgood$ and $\Rbad$ respectively.}

2539:     \label{fig:broadcast-cooperation-common}

2540: \end{figure}

2541: This scenario is depicted in figure \ref{fig:broadcast-cooperation-common}.

2542: %The conference messages are

2543: %functions of $Y_1^n$ (at $\Rgood$), $Y_2^n$ (at $\Rbad$),

2544: %and the previous conference messages received from the

2545: %other decoder, as defined by Willems in~\cite{Willems:83}.

2546: After conferencing, each receiver decodes the message.

2547: For this setup we have the following upper bound:

2548: \begin{proposition}

2549:          \label{prop:common_upper}

2550:          {\it (\cite[theorem 6]{RonISIT05:05})}

2551:          {\it

2552:          Consider the general broadcast channel $(\mX, p(y_1,y_2|x), \mY_1 \times \mY_2)$ with cooperating

2553:          receivers having noiseless conference links of finite capacities $C_{12}$ and $C_{21}$ between them.

2554:          Then, for sending a common message to both receivers, any rate $R$ must satisfy

2555:          \[

2556:             R \! \le \!\sup_{p_X(x)} \! \min \! \Big\{I(X;Y_1) + C_{21}, I(X;Y_2) + C_{12}, I(X;Y_1,Y_2) \Big\}.

2557:          \]}

2558: \end{proposition}

2559: In \cite{RonISIT05:05} we also derived the following achievable rate for

2560: %the general broadcast channel with a single common message:

2561: this scenario:

2562: \begin{proposition}

2563:         \label{prop:achive_common_one_step}

2564:          {\it (\cite[theorem 5]{RonISIT05:05})} {\it

2565:          Assume the broadcast channel setup of proposition \ref{prop:common_upper}.

2566:          Then, for sending a common message to both receivers, any rate $R$ satisfying

2567:          \begin{subequations}

2568:              \begin{eqnarray}

2569:                  R & \le & \sup_{p_X(x)}\Big[ \max\Big\{ R_{12}(p_X(x)), R_{21}(p_X(x)) \Big\} \Big], \nonumber\\

2570:             \label{eqn:PrevResult1}

2571:                  R_{12}(p_X(x)) & \triangleq & \min \Big( I(X;Y_1) + C_{21} ,  \max\big\{I(X;Y_2),

2572:                         I(X;Y_2) - H(Y_1|Y_2,X) + \min\big(C_{12},H(Y_1|Y_2)\big)\big\} \Big),\phantom{xx}\\

2573:              \label{eqn:PrevResult2}

2574:                  R_{21}(p_X(x)) & \triangleq & \min \Big( I(X;Y_2) + C_{12} ,

2575:                         \max \big\{I(X;Y_1), I(X;Y_1)  - H(Y_2|Y_1,X) + \min\big(C_{21},H(Y_2|Y_1)\big)\big\} \Big),\phantom{xx}

2576:             \end{eqnarray}

2577:          \end{subequations}

2578: %         with the appropriate $C_{12} > H(Y_1|Y_2,X)$  or  $C_{21} > H(Y_2|Y_1,X)$ (the one used for the first cooperation step),

2579:          is achievable.}

2580: \end{proposition}

2581:

2582: Note that this rate expression

2583: depends only on the parameters of the problem and is, therefore, computable. In proposition

2584: \ref{prop:achive_common_one_step}  the achievable rate  increases linearly with the cooperation

2585: capacity. The downside of this method is that it %cannot be applied to any given conference capacity.

2586: produces a rate increase over the non-cooperative rate only for conference links capacities that exceed

2587: some minimum values.

2588: % The same limitation also exists in theorem \ref{thm:CEG_EAF}.

2589: % We note that \cite{DraperFK:03} presents a different approach for multi-step conference.~The approach of

2590: % \cite{DraperFK:03} generalizes \cite{CoverG:79} but still requires auxiliary random variables.

2591: % The work in \cite{DraperFK:03} is for the single common message case, and also uses the independent

2592: % broadcast channel. Here we derive the rate for the general broadcast channel with three messages, and

2593: % when specializing to the single common message case, we obtain that \cite[theorem 2]{DraperFK:03} also holds

2594: % for the general broadcast channel.

2595: %

2596: %  The motivation for deriving the new multi-step conference

2597: % is twofold: first we note that in the worst case we can always send information

2598: % about part of the received symbols and use this partial information to improve decoding. Second, assume

2599: % that the maximum rate in proposition \ref{prop:achive_common_one_step} is achieved when $\Rgood$

2600: % helps $\Rbad$ first, and then $\Rbad$ decodes and helps $\Rgood$

2601: % (this corresponds to $R_{12}(p(x))$ in equation (\ref{eqn:PrevResult1})).

2602: % The achievable rate for each of the receivers in this case is

2603: % \begin{eqnarray*}

2604: %     R_1  & \le & I(X;Y_1) + C_{21}, \\

2605: %     R_2  & \le & I(X;Y_2) - H(Y_1|Y_2,X) + C_{12},

2606: % \end{eqnarray*}

2607: % for some $p_X(x)$ on $\mX$ and as long as $H(Y_1|Y_2,X) \le C_{12} \le H(Y_1|Y_2)$.

2608: % Now, if $R_1 \le R_2$ then $\min(R_1,R_2) = R_1$ which is the optimal rate,

2609: % since proposition \ref{prop:common_upper} asserts that

2610: % the non-cooperative rate to $\Rgood$

2611: % cannot be increased by more than $C_{21}$.

2612: % However, if after the conference we have that $R_1 > R_2$, this implies that we helped

2613: % $\Rgood$ too much and helped $\Rbad$ too little. We could then increase the rate by

2614: % helping $\Rbad$ more and helping $\Rgood$ less. This is achieved with the multi-step conference.

2615:

2616:

2617:

2618:

2619:

2620:

2621:

2622:

2623:

2624: % \subsection{A Multi-Step Conference for the Broadcast Channel with a Single Common Message}

2625: Specializing the three independent messages result to the single common message case we obtain the

2626: following achievable rate with a $K$-cycle conference for the general BC with a single common message:

2627:

2628: \begin{corollary}

2629:     \label{corr:single-coomon-message-with-multi-step}

2630:     \it

2631:     Consider the general broadcast channel with cooperating receivers, having noiseless conference

2632:     links of finite capacities $C_{12}$ and $C_{21}$ between them. Let the receivers hold a conference that

2633:     consists of $K$ cycles. Then, any rate $R$ satisfying

2634:     \begin{equation}

2635:         R = \max \left\{R_{12}, R_{21} \right\},

2636:     \end{equation}

2637:     is achievable.

2638:

2639:     Here $R_{12}$ is defined as follows:

2640:     \begin{equation}

2641:         R_{12} = \sup_{p_X(x), \alpha \in [0,1]} \min \left\{ R_1, R_2 \right\},

2642:     \end{equation}

2643:     with

2644:     \begin{subequations}

2645:     \begin{eqnarray}

2646:         \label{eqn:R_1}

2647:         R_1 & = & I\left(X;Y_1,\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right) + \alpha C_{21},\\

2648:         \label{eqn:R_2}

2649:         R_2 & = & I\left(X;Y_2,\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)}\right),

2650:     \end{eqnarray}

2651:     \end{subequations}

2652:     subject to

2653:     \begin{subequations}

2654:     \begin{eqnarray}

2655:         C_{12} & \ge & I\left(Y_1; \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\Big|Y_2\right),\\

2656:         (1-\alpha)C_{21} & \ge & I\left(Y_2; \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\Big|Y_1\right),

2657:     \end{eqnarray}

2658:     \end{subequations}

2659:     for the joint distribution

2660:     \begin{eqnarray*}

2661:          &  & p\left(x,y_1,y_2,\hy_1^{(1)}, \hy_1^{(2)},...,\hy_1^{(K)},\hy_2^{(1)}, \hy_2^{(2)},...,\hy_2^{(K-1)}\right) =\\

2662:          &  & \phantom{xxx} p(x) p(y_1,y_2|x) p\left(\hy_1^{(1)}|y_1\right) p\left(\hy_2^{(1)}|y_2,\hy_1^{(1)}\right)\cdot\cdot\cdot

2663:                 p\left(\hy_1^{(k)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)\times\\

2664:          &  &  \phantom{xxx} p\left(\hy_2^{(k)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)

2665:                 \cdot\cdot\cdot p\left(\hy_2^{(K-1)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-2)}\right)\\

2666:          &  &  \phantom{xxx} \times p\left(\hy_1^{(K)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-1)}\right).

2667:     \end{eqnarray*}

2668:     The cardinality of the $k$'th auxiliary random variables are bounded by:

2669:     \begin{eqnarray*}

2670:         ||\mhY_1^{(k)}|| & \le & ||\mY_1|| \times \prod_{l=1}^{k-1} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1, \qquad

2671:             \qquad k = 1,2,...,K\\

2672:         ||\mhY_2^{(k)}|| & \le & ||\mY_2|| \times \prod_{l=1}^{k} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1,

2673:             \qquad \qquad k = 1,2,...,K-1.

2674:     \end{eqnarray*}

2675:     $R_{21}$ is defined in a parallel manner to $R_{12}$, with $\Rbad$ performing the first conference step, and the appropriate change

2676:     in the probability chain.

2677:

2678: \end{corollary}

2679:

2680:

2681:

2682: \bigskip

2683: The proof of corollary \ref{corr:single-coomon-message-with-multi-step} is provided in appendix \ref{appndx:prof_corollary_single_common}.

2684: \smallskip

2685:

2686: We note that \cite[theorem 2]{DraperFK:03} presents a similar result for this scenario, under the constraint that the memoryless

2687: broadcast channel can be decomposed as $p(\yvec_1,\yvec_2|\xvec) = \prod_{i=1}^n p(y_{1,i}|x_i)p(y_{2,i}|x_i)$, and

2688: considering the sum-rate of the conference. Here we show that the same achievable rate expressions hold

2689: for the general memoryless broadcast channel.

2690: A recent result appears in \cite{Shlomo_BZ}, where

2691: the single common message case for a Gaussian BC is considered.

2692: In the multi-cycle conference considered in this section, we let the auxiliary RVs follow a more

2693: general chain than that of \cite{Shlomo_BZ} --- which results in a larger achievable rate.

2694:

2695: \subsection{A Single-Cycle Conference with TS-EAF}

2696: Consider the case where only a single cycle of conferencing between the receivers is allowed.

2697:  Specializing corollary \ref{corr:single-coomon-message-with-multi-step} to a single cycle case

2698:  we obtain

2699: \begin{subequations}

2700:  \begin{eqnarray}

2701:     \label{eqn:two-step_TSEAF-R_1}

2702:     R_1 & = & I(X;Y_1) + C_{21}\\

2703:     \label{eqn:two-step_TSEAF-R_2}

2704:     R_2 & = & I(X;Y_2, \hY_1^{(1)})\\

2705:     \label{eqn:two-step_TSEAF-C12}

2706:     C_{12} & \ge & I(Y_1; \hY_1^{(1)}|Y_2),

2707:  \end{eqnarray}

2708: \end{subequations}

2709: and the TS-EAF assignment is

2710: \[

2711:     p(\hy_1^{(1)}|y_1) = \left\{

2712:         \begin{array}{cl}

2713:             q_1, & \hy_1^{(1)} = y_1\\

2714:             1-q_1, & \hy_1^{(1)} = \Omega \notin \mY_1.

2715:         \end{array}

2716:     \right.

2717: \]

2718: Applying the TS-EAF assignment to \eqref{eqn:two-step_TSEAF-C12} and \eqref{eqn:two-step_TSEAF-R_2} we obtain

2719: \begin{eqnarray*}

2720:     C_{12} & \ge & I(Y_1; \hY_1^{(1)}|Y_2)\\

2721:         & = & H(Y_1|Y_2) - H(Y_1|Y_2, \hY_1^{(1)})\\

2722:         & = & H(Y_1|Y_2) - q_1 H(Y_1|Y_2, Y_1) - (1-q_1)H(Y_1|Y_2)\\

2723:         & = & q_1 H(Y_1|Y_2)\\

2724:     R_2 & = & I(X;Y_2, \hY_1^{(1)}) \\

2725:         & = & I(X;Y_2) + H(X|Y_2) - H(X|Y_2,\hY_1^{(1)})\\

2726:         & = & I(X;Y_2) + H(X|Y_2) - (1-q_1) H(X|Y_2) - q_1 H(X|Y_2,Y_1)\\

2727:         & = & I(X;Y_2) + q_1 I(X;Y_1|Y_2).

2728: \end{eqnarray*}

2729: Maximizing $R_2$ requires maximizing $q_1 \in [0,1]$. Therefore setting $q_1 = \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^*$, we

2730: obtain $R_2 = I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^* I(X;Y_1|Y_2)$. Combining with $R_1$ we have

2731: that the rate when $\Rbad$ decodes first is given by

2732: \[

2733:     R_{12} = \min \left\{I(X;Y_1) + C_{21}, I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^* I(X;Y_1|Y_2)\right\},

2734: \]

2735: and by symmetric argument we can obtain $R_{21}$. We conclude that the rate for the single-cycle conference with TS-EAF is given by

2736: \begin{eqnarray*}

2737:     R & = &  \sup_{p(x)} \min\left\{ R_{12}, R_{21} \right\},\\

2738:     R_{12} & = & \min \left\{I(X;Y_1) + C_{21}, I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^* I(X;Y_1|Y_2)\right\}\\

2739:     R_{21} & = & \min \left\{I(X;Y_1) + \left[\frac{C_{21}}{H(Y_2|Y_1)}\right]^* I(X;Y_2|Y_1), I(X;Y_2) + C_{12}\right\}.

2740: \end{eqnarray*}

2741: We note that this rate is always better than the point-to-point rate and also better than the joint-decoding rate of

2742: proposition \ref{prop:achive_common_one_step} (whenever cooperation can provide a rate increase).

2743: However, as in proposition \ref{prop:achive_common_one_step}, at least one receiver has to satisfy the Slepian-Wolf

2744: condition for the full cooperation rate to be

2745: achieved. We also note that using TS-EAF with more than two steps does not improve upon this result.

2746:

2747: Finally, we demonstrate the results of proposition \ref{prop:achive_common_one_step} and corollary \ref{corr:single-coomon-message-with-multi-step} through

2748: a symmetric BC example: consider the symmetric broadcast channel where $\mY_1 = \mY_2 = \mY$ and

2749:               \[

2750:                   p_{Y_1|Y_2,X}(a|b,x) = p_{Y_2|Y_1,X}(a|b,x),

2751:               \]

2752:               for any $a,b \in \mY \times \mY$ and $x \in \mX$. Let $C_{21} = C_{12} = C$.

2753:               For this scenario we have that $R_{12} = R_{21}$, in corollary \ref{corr:single-coomon-message-with-multi-step} and

2754:               also $R_{12}(p_X(x)) = R_{21}(p_X(x))$ in proposition \ref{prop:achive_common_one_step}. The resulting rate is depicted in

2755:               figure \ref{fig:compare:ft_and_ts} for a fixed probability $p(x)$.

2756:                 \begin{figure}[htb]

2757:                          \centering

2758:                          \scalebox{0.60}{\includegraphics{Two-step-TS_vs_JT.eps}}

2759:                          \caption{\small The achievable rate $R$ vs. conference capacity $C$,

2760:                               for proposition \ref{prop:common_upper} (dashed-dot),

2761:                               proposition \ref{prop:achive_common_one_step} (dashed) and corollary \ref{corr:single-coomon-message-with-multi-step} (solid),

2762:                               for the symmetric broadcast channel.}

2763:                          \label{fig:compare:ft_and_ts}

2764:                   \end{figure}

2765:               We can see that for this case, time-sharing exceeds joint-decoding for all values of $C$. Both methods meet the upper

2766:               bound at $C = H(Y_1|Y_2)$. We note that this is a corrected version of the figure in \cite{ron:ISIT06}.

2767:

2768:

2769:

2770: \begin{comment}

2771:             \subsection{A Three-Step Conference with TS-EAF Relaying Example}

2772:             Consider the following three-step conference:

2773:             \begin{enumerate}

2774:                 \item $\Rbad$  transmits information at rate $C_{21}^a$ to $\Rgood$.

2775:                 \item $\Rgood$ transmits information at rate $C_{12}$ to $\Rbad$.

2776:                 \item $\Rbad$ decodes and sends information at rate $C_{21}^b$ to $\Rgood$.

2777:             \end{enumerate}

2778:             Lastly, $\Rgood$ decodes. We set $C_{21}^a + C_{21}^b = C_{21}$.

2779:             We note that in the following we

2780:             analyze this order of conference, whose rate is denoted by $R_{212}$. However, since we can choose the order that yields

2781:             the highest rate, repeating the same considerations we derive symmetric expressions

2782:             for the same scheme with the roles of $\Rgood$ and $\Rbad$ switched.

2783:             %Theorem \ref{thm:achieve-three-steps}

2784:             Corollary \ref{corr:three-steps-special}

2785:             stated below considers, therefore, both possible orders.

2786:             % and in addition a two-step conference based on theorem

2787:             % \ref{thm:main_thm},

2788:             % that is used if the capacities of the conference links are not large enough for applying the decoding

2789:             % scheme of appendix \ref{sec:decoding-at-Rx2}, or if the rate increase from the three-step conference is too little.

2790:             % The two-step conference will

2791:             % be described in appendix \ref{sec:two-step}. Theorem \ref{thm:achieve-three-steps} selects the configuration that results in the highest rate.

2792:

2793:

2794:             %\subsubsection{Specializing Corollary \ref{corr:single-coomon-message-with-multi-step} to Three Steps}

2795:             With three conference steps, the expressions in corollary \ref{corr:single-coomon-message-with-multi-step} specializes to the following

2796:             \begin{subequations}

2797:             \label{eqn:specialize_single_to_three_steps}

2798:                 \begin{eqnarray}

2799:                     R_1    &  =  & I\big(X;Y_1,\hY_2^{(1)}\big) + (1-\alpha)C_{21}\\

2800:                 \label{eqn:R2_specialize_three_steps}

2801:                     R_2    &  =  & I\big(X;\hY_1^{(1)},Y_2\big)\\

2802:                 \label{eqn:C12_three_steps_special}

2803:                     C_{12} & \ge & I(Y_1;\hY_1^{(1)}, \hY_2^{(1)}|Y_2)\\

2804:                 \label{eqn:C21_three_steps_special}

2805:                     \alpha C_{21} & \ge & I(Y_2;\hY_1^{(1)}, \hY_2^{(1)}|Y_1)\\

2806:                     p(\hy_1^{(1)},\hy_2^{(1)}|y_1,y_2) & = & p(\hy_2^{(1)}|y_2) p(\hy_1^{(1)}|y_1,\hy_2^{(1)}).

2807:                 \end{eqnarray}

2808:             \end{subequations}

2809:             Using TS-EAF, the assignment of $p(\hy_2^{(1)}|y_2)$ is

2810:             \[

2811:                 p(\hy_2^{(1)}|y_2) = \left\{

2812:                     \begin{array}{cl}

2813:                             q_2 &, \hy_2^{(1)} = y_2\\

2814:                             1-q_2 &, \hy_2^{(1)} = \Delta_2 \notin \mY_2.

2815:                     \end{array}

2816:                     \right.

2817:             \]

2818:             Now, as to the assignment of $p(\hy_1^{(1)}|y_1,\hy_2^{(1)})$, we need to set

2819:             \[

2820:                 p(\hy_1^{(1)}|y_1,\hy_2^{(1)} = y_2) = \left\{

2821:                     \begin{array}{cl}

2822:                             q_1' &, \hy_1^{(1)} = y_1\\

2823:                             1-q_1' &, \hy_1^{(1)} = \Delta_1 \notin \mY_1

2824:                     \end{array}

2825:                     \right.,\;\;

2826:                         p(\hy_1^{(1)}|y_1,\hy_2^{(1)} = \Delta_2) = \left\{

2827:                     \begin{array}{cl}

2828:                             q_1'' &, \hy_1^{(1)} = y_1\\

2829:                             1-q_1'' &, \hy_1^{(1)} = \Delta_1 \notin \mY_1.

2830:                     \end{array}

2831:                     \right.

2832:             \]

2833:             Now, let us examine the expressions in \eqref{eqn:specialize_single_to_three_steps} where $\hY_1^{(1)}$ is used:

2834:              first consider \eqref{eqn:R2_specialize_three_steps}, here we see that whether $\hY_2^{(1)}$ is $Y_2$ or

2835:              $\Delta_2$ does not matter. Therefore, what determines the rate is the probability $\Pr(\hY_1^{(1)}|Y_1)$ rather than

2836:              $\Pr(\hY_1^{(1)}|Y_1, \hY_2^{(1)})$.

2837:              Similarly for \eqref{eqn:C21_three_steps_special} we have that since $Y_1$ is given, then the actual value of

2838:              $\hY_1^{(1)}$ does not affect the value of the mutual information.

2839:             \begin{eqnarray*}

2840:                 I(Y_2;\hY_1^{(1)}, \hY_2^{(1)}|Y_1) & = & I(Y_2; \hY_2^{(1)}|Y_1) + I(Y_2;\hY_1^{(1)}|Y_1, \hY_2^{(1)})\\

2841:                     & = & I(Y_2; \hY_2^{(1)}|Y_1).

2842:             \end{eqnarray*}

2843:              Finally consider \eqref{eqn:C12_three_steps_special}:

2844:              \begin{eqnarray*}

2845:                 I(Y_1;\hY_1^{(1)}, \hY_2^{(1)}|Y_2) & = & H(Y_1 | Y_2) - H(Y_1 | \hY_1^{(1)}, \hY_2^{(1)},Y_2)\\

2846:                     & = & H(Y_1 | Y_2) - q_2 H(Y_1 | \hY_1^{(1)}, Y_2,Y_2) - (1-q_2) H(Y_1 | \hY_1^{(1)},\Delta_2,Y_2)\\

2847:                     & = & H(Y_1 | Y_2) - q_1' q_2 H(Y_1 | Y_1, Y_2,Y_2) - (1-q_1') q_2 H(Y_1 | \Delta_1, Y_2,Y_2) \\

2848:                     &   & \qquad \qquad - q_1'' (1-q_2) H(Y_1 | Y_1,\Delta_2,Y_2) - (1-q_1'')(1-q_2) H(Y_1 | \Delta_1,\Delta_2,Y_2) \\

2849:                     & = & H(Y_1 | Y_2) - q_1' q_2 H(Y_1 | Y_1,Y_2) - (1-q_1') q_2 H(Y_1 | Y_2) \\

2850:                     &   & \qquad \qquad - q_1'' (1-q_2) H(Y_1 | Y_1,Y_2) - (1-q_1'')(1-q_2) H(Y_1  | Y_2) \\

2851:                     & = & H(Y_1 | Y_2)  - ((1-q_1') q_2 +(1-q_1'')(1-q_2)) H(Y_1 | Y_2) \\

2852:                     & = & H(Y_1 | Y_2)  - \Pr(\hY_1^{(1)} = \Delta_1) H(Y_1 | Y_2)

2853:              \end{eqnarray*}

2854:             which depends only on $\Pr(\hY_1^{(1)} = Y_1)$. Therefore, it is enough to consider

2855:             \[

2856:                 p(\hy_1^{(1)}|y_1,\hy_2^{(1)}) = \left\{

2857:                     \begin{array}{cl}

2858:                             q_1 &, \hy_1^{(1)} = y_1\\

2859:                             1-q_1 &, \hy_1^{(1)} = \Delta_1 \notin \mY_1

2860:                     \end{array}

2861:                     \right.,

2862:             \]

2863:             independent of $\hY_2^{(1)}$.

2864:             Using this assignment we get for the three-steps conference equations \eqref{eqn:specialize_single_to_three_steps}

2865:             become

2866:             \begin{subequations}

2867:                 \begin{eqnarray}

2868:                 \label{eqn:C12_three_steps_special_explicit}

2869:                     C_{12} & \ge & q_1 H(Y_1|Y_2) \\

2870:                 \label{eqn:C21_three_steps_special_explicit}

2871:                     \alpha C_{21} & \ge & q_2 H(Y_2|Y_1),

2872:                 \end{eqnarray}

2873:             \end{subequations}

2874:             and since $q_1, q_2 \in [0,1]$ we obtain the following corollary:

2875:

2876:             \begin{corollary}

2877:             \label{corr:three-steps-special}

2878:                 For the general BC of corollary \ref{corr:single-coomon-message-with-multi-step}, and rate $R$ satisfying

2879:                 \[

2880:                     R = \sup_{p_X(x), \alpha \in [0,1]} \left\{R_1^{(21)}, R_2^{(21)}, R_1^{(12)}, R_2^{(12)}\right\}

2881:                 \]

2882:             \begin{subequations}

2883:                 \label{eqn:R1R2_specialize_three_steps_explicit}

2884:                 \begin{eqnarray}

2885:                     R_1^{(21)}    &  =  & I\big(X;Y_1\big) + \left[\frac{\alpha C_{21}}{H(Y_2|Y_1)}\right]^*I\big(X;Y_2|Y_1\big) + (1-\alpha)C_{21}\\

2886:                 \label{eqn:R2_specialize_three_steps_explicit}

2887:                     R_2^{(21)}    &  =  & I\big(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)} \right]^*I(X;Y_1|Y_2\big),

2888:                 \end{eqnarray}

2889:             \end{subequations}

2890:             and $R_1^{(12)}$ can be obtained from $R_2^{(21)}$ by switching $'1'$ and $'2'$, and similarly

2891:             $R_2^{(12)}$ can be obtained from $R_1^{(21)}$, and $p(x,y_1,y_2) = p(x)p(y_1,y_2|x)$.

2892:             \end{corollary}

2893:

2894:

2895:

2896:             As an examples, consider the symmetric BC where $\mY_1 = \mY_2 = \mY$ and

2897:              \[

2898:                      p_{Y_1|Y_2,X}(a|b,x) = p_{Y_2|Y_1,X}(a|b,x),

2899:              \]

2900:              for any $a,b \in \mY \times \mY$ and $x \in \mX$. For $R_2^{(21)}$ to achieve the

2901:              full cooperation rate we need $C_{12} \ge H(Y_1|Y_2)$. For $R_2^{(12)}$ to achieve the full cooperation

2902:              rate we need:

2903:              \begin{eqnarray*}

2904:                 C_{21} \left(\frac{\alpha}{H(Y_2|Y_1)} + \frac{1-\alpha}{I(X;Y_2|Y_1)}\right) & \ge & 1 \\

2905:                 C_{21} \left(\frac{\alpha I(X;Y_2|Y_1) + (1-\alpha)H(Y_2|Y_1)}{H(Y_2|Y_1)I(X;Y_2|Y_1)} \right) & \ge  & 1\\

2906:                 C_{21} \left(\frac{ H(Y_2|Y_1) - \alpha H(Y_2|Y_1,X) }{H(Y_2|Y_1)I(X;Y_2|Y_1)} \right) & \ge  & 1\\

2907:                 C_{21}  & \ge  & H(Y_2|Y_1)\frac{I(X;Y_2|Y_1)}{ H(Y_2|Y_1) - \alpha H(Y_2|Y_1,X) }

2908:              \end{eqnarray*}

2909:              Now when $\alpha < 1$ then

2910:

2911: \end{comment}

2912:

2913:

2914:

2915:

2916:

2917:

2918:

2919:

2920:

2921: \begin{comment}

2922:           \subsubsection{An Alternative Achievable Rate for a Three-Step Conference}

2923:           With three steps we can achieve the following rate

2924:

2925:               \begin{theorem}

2926:               \label{thm:achieve-three-steps}

2927:                   {\it

2928:                    Assume the broadcast channel setup of proposition \ref{prop:common_upper}.

2929:                    Then, for sending a common message to both receivers, any rate $R$ satisfying

2930:           %         $R \le  \sup_{p(x),\alpha}\Big[ \max\Big\{R_{12},R_{21}, R_{121}, R_{212} \Big\} \Big]$,

2931:                    {

2932:                    \setlength\arraycolsep{0mm}

2933:                    \begin{subequations}

2934:                        \begin{eqnarray}

2935:                            &  & R \le  \sup_{p_X(x),\alpha}\Big[ \max\Big\{R_{12},R_{21}, R_{121}, R_{212} \Big\} \Big], \nonumber\\

2936:                                   \label{eqn:thm_3step_3steps_rate}

2937:                            &  & R_{iji} \triangleq \min \Big( \max \Big\{I(X;Y_i), \;\; I(X;Y_i) -H(Y_j|Y_i,X) +

2938:                               \min\big(C_{ji} + \left[\frac{\alpha C_{ij}}{H(Y_i|Y_j)}\right]^*I(Y_j;Y_i),H(Y_j|Y_i) \big)\Big\},\nonumber\\

2939:                            &  &  \phantom{xxxxxxxxxxxxxxxxxxxxxx}     I(X;Y_j) + (1-\alpha)C_{ij}

2940:                                   +\left[\frac{\alpha C_{ij}}{H(Y_i|Y_j)}\right]^*I(X;Y_i|Y_j)\Big), \\

2941:                            \label{eqn:thm_3step_2steps_rate}

2942:                            &  &  R_{ji} \triangleq \min\Big( I(X;Y_j) + C_{ij},

2943:                                    I(X;Y_i) + \left[\frac{C_{ji}}{H(Y_j|Y_i)}\right]^*I(X;Y_j|Y_i) \Big),

2944:           %             &  & R_{212} \triangleq \min \Big(I(X;Y_1) + (1-\alpha)C_{21}

2945:           %                    +\frac{\alpha C_{21}}{H(Y_2)}I(X;Y_2|Y_1), \nonumber\\

2946:           %             &  &   \qquad \qquad  I(X;Y_2) -H(Y_1|Y_2,X) + \nonumber\\

2947:           %             &  &   \qquad \qquad \qquad \min\big(C_{12}+ \frac{\alpha C_{21}}{H(Y_2)}I(Y_1;Y_2),H(Y_1|Y_2) \big)\Big)\nonumber

2948:           %

2949:           %            &  & R_{212} \triangleq \min \Big(I(X;Y_1) + (1-\alpha)C_{21} +\frac{\alpha C_{21}}{H(Y_2)}I(X;Y_2|Y_1), \nonumber\\

2950:           %            &  &    \; I(X;Y_2) -H(Y_1|Y_2,X) + \min\big(C_{12}+KI(Y_1;Y_2),H(Y_1|Y_2) \big)\Big)\nonumber\\

2951:           %            &  & R_{121}  \triangleq  \min \Big(I(X;Y_2) + (1-\alpha)C_{12} +KI(X;Y_1|Y_2), \nonumber\\

2952:           %            &  & \; I(X;Y_1)-H(Y_2|Y_1,X) + \min\big(C_{21}+KI(Y_1;Y_2),H(Y_2|Y_1) \big)\Big)\nonumber\\

2953:                        \end{eqnarray}

2954:                   \end{subequations}

2955:                    }

2956:                    where $[x]^* \triangleq \min(x,1)$, $(i,j) = (1,2) \mbox{ or } (2,1)$ and $\alpha \in [0,1]$, is achievable.}

2957:               \end{theorem}

2958:           \smallskip

2959:

2960:           \noindent

2961:                    The proof of this theorem is provided in appendix \ref{sec:three-steps-proof}\footnote{

2962:                       In the proof we assume that $H(Y_1|Y_2)$ and $H(Y_2|Y_1)$ are positive

2963:                       since otherwise

2964:                       one of the receivers cannot receive information from the transmitter.

2965:                       However, we can incorporate the situation where at least one of these entropies is zero as

2966:                       a special case of the derivation.

2967:                       The expressions in \eqref{eqn:thm_3step_3steps_rate} and \eqref{eqn:thm_3step_2steps_rate} hold for

2968:                       any value of the entropy.}.

2969:

2970:               \subsection{An Example}

2971:               \label{sec:example_multi_step}

2972:               Consider the symmetric broadcast channel where $\mY_1 = \mY_2 = \mY$ and

2973:               \[

2974:                   p_{Y_1|Y_2,X}(a|b,x) = p_{Y_2|Y_1,X}(a|b,x),

2975:               \]

2976:               for any $a,b \in \mY \times \mY$ and $x \in \mX$. Let $C_{21} = C_{12} = C$.

2977:               For this scenario we have that $R_{121} = R_{212}$ and $R_{12} = R_{21}$,~so it is enough to consider

2978:               only $R_{212}$ and $R_{12}$. Consider first $\alpha$ small enough such that

2979:               $\frac{\alpha C}{H(Y_2|Y_1)} < 1$. Begin with $R_{212}$:

2980:               \begin{eqnarray}

2981:                   \negdista &  & R_{212}   =  \min\left(R_{212}', R_{212}''\right),\nonumber\\

2982:                   \negdista &  & R_{212}'  =  I(X;Y_1) + (1 - \alpha)C + \frac{\alpha C}{H(Y_2|Y_1)}I(X;Y_2|Y_1),\nonumber\\

2983:               \label{eqn:example_R212''}

2984:                   \negdista &  & R_{212}''  =  I(X;Y_2) - H(Y_1|Y_2,X) + C +\frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2),

2985:               \end{eqnarray}

2986:                   in the region where

2987:                   $H(Y_1|Y_2,X) \le C + \frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2)\! <\! H(Y_1|Y_2)$

2988:                   (otherwise we trivially get either

2989:                   the full cooperation bound for $R_{212}''$ or the non-cooperative rate).

2990:                   Next, we note that for a fixed $C$,

2991:                   $R_{212}'$ is a decreasing function of $\alpha$ and

2992:                   $R_{212}''$ is an increasing function of $\alpha$.

2993:                   Therefore the optimal value of $\alpha$ that maximizes

2994:                   the minimum of these two terms is the value for which both expressions are equal,

2995:                   subject to $\alpha \in [0,1]$. We also have due

2996:                   to the symmetry that $I(X;Y_1) = I(X;Y_2)$ and therefore equality implies

2997:                   \begin{eqnarray*}

2998:                      -\alpha C + \frac{\alpha C}{H(Y_2|Y_1)}I(X;Y_2|Y_1) & = & - H(Y_1|Y_2,X)

2999:                                + \frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2)

3000:                   \end{eqnarray*}

3001:                   or

3002:                   \begin{equation}

3003:                   \label{eqn:example-alpah-C}

3004:                       \frac{\alpha C}{H(Y_2|Y_1)} = \frac{H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1)}.

3005:                   \end{equation}

3006:                   Note that $\frac{H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1)} < 1$, hence indeed

3007:                   $\left[\frac{\alpha C}{H(Y_2|Y_1)}\right]^* = \frac{\alpha C}{H(Y_2|Y_1)}$.

3008:                   Combining the constraint

3009:                   $\alpha \le 1$ with (\ref{eqn:example-alpah-C}), we obtain a lower bound on $C$:

3010:                   \[

3011:                       C \ge C_{\mbox{\scriptsize{min}}} \triangleq \frac{H(Y_2|Y_1) H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1) }.

3012:                   \]

3013:

3014:                   To obtain a rate increase over the non-cooperative rate,~we need to verify in

3015:                   \eqref{eqn:example_R212''} that

3016:                   \begin{equation}

3017:                   \label{eqn:example_rate_constr}

3018:                       C + \frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2)\!>\! H(Y_1|Y_2,X),

3019:                   \end{equation}

3020:                   which implies

3021:                   \begin{equation}

3022:                   \label{eqn:exmaple-C-lower-bound}

3023:                       C >                \frac{H(Y_1|Y_2,X)}{1 + \frac{\alpha}{H(Y_2|Y_1)}I(Y_1;Y_2)}.

3024:                   \end{equation}

3025:                   Again, plugging the equality \eqref{eqn:example-alpah-C} into the right-hand side of

3026:                    \eqref{eqn:exmaple-C-lower-bound} we get a second lower bound on $C$:

3027:                    \begin{eqnarray*}

3028:                       C \ge C_0 \triangleq  \frac{H(Y_1|Y_2,X) (H(Y_2|Y_1) - I(X;Y_2|Y_1))}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1)}.

3029:                    \end{eqnarray*}

3030:                   Note that $C_{\mbox{\scriptsize min}} \ge C_0$, so it is enough to satisfy $C \ge C{\mbox{\scriptsize{min}}}$.

3031:                   Lastly, plugging (\ref{eqn:example-alpah-C}) into the expression for $R_{212}''$ in

3032:                   (\ref{eqn:example_R212''}) results in

3033:                   \begin{eqnarray}

3034:                   \label{eqn:R212_example}

3035:                       R_{212} = R_{212}'' & = & I(X;Y_2) - H(Y_1|Y_2,X) + C +

3036:                                                \frac{I(Y_1;Y_2) H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1) },

3037:                   \end{eqnarray}

3038:                   for $C \ge C_{\mbox{\scriptsize{min}}}$.

3039:                    % satisfies (\ref{eqn:exmaple-C-lower-bound}).

3040:                   We note that $R_{212} = I(X;Y_1,Y_2)$ for

3041:                   \[

3042:                       C = H(Y_1|Y_2) - \frac{I(Y_1;Y_2) H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1) } \ge C_{\mbox{\scriptsize{min}}},

3043:                   \]

3044:                   namely we achieve the full cooperation bound when the capacity $C$ is {\em less} than the

3045:                   full cooperation capacity required by the Slepian-Wolf theorem

3046:                   (see \cite[theorem 14.4.1]{cover-thomas:it-book}). This cannot be achieved using theorem \ref{thm:CEG_EAF}

3047:                   or the approach in \cite{DraperFK:03}.

3048:

3049:                   Next, examine $R_{12}$: writing $R_{12}$ explicitly we have

3050:                   \begin{eqnarray*}

3051:                       R_{12}  & = & \min \left( R_{12}', R_{12}'' \right),\\

3052:                       R_{12}' & = & I(X;Y_1) + C,\\

3053:                       R_{12}''& = & I(X;Y_2) + \frac{C}{H(Y_1|Y_2)}I(X;Y_1|Y_2).

3054:                   \end{eqnarray*}

3055:                   in the region where $C < H(Y_1)$. Here, the equality $I(X;Y_2) = I(X;Y_1)$ implies that

3056:                   \begin{equation}

3057:                   \label{eqn:example-three-steps-two-step}

3058:                       R_{12} = I(X;Y_2) + \left[\frac{C}{H(Y_1|Y_2)}\right]^*I(X;Y_1|Y_2).

3059:                   \end{equation}

3060:                   We see that for $R_{12}$ the slope of the rate vs. $C$ is less than $1$, thus it is smaller

3061:                   than the slope of the rate for $R_{212}$.

3062:                   In our example we get that $R_{12} > R_{212}$

3063:                   for any value of $C$ in the range $0 \le C \le C_{\mbox{\scriptsize{min}}}$. In this range

3064:                   we also have that $\frac{C}{H(Y_1)} \le 1$.

3065:

3066:                   For proposition \ref{prop:achive_common_one_step}, applying the same

3067:                   assumptions, and following the same reasoning leading to equation (\ref{eqn:example-three-steps-two-step})

3068:                   yields the achievable rate

3069:                   \begin{equation}

3070:                   \label{eqn:R_twostep_example}

3071:                       R = \max\Big(I(X;Y_2), I(X;Y_2) - H(Y_1|Y_2,X) + C\Big),

3072:                   \end{equation}

3073:                   for $C \le H(Y_1 | Y_2)$.

3074:                   We see that $C = H(Y_1|Y_2)$ is required for full cooperation, and when $C < H(Y_1|Y_2,X)$

3075:                   we get the non-cooperative rate $I(X;Y_2)$. The comparison between proposition \ref{prop:achive_common_one_step} and

3076:                   theorem \ref{thm:achieve-three-steps} for a fixed $p_X(x)$ on $\mX$ is depicted in figure

3077:                   \ref{fig:Three-steps-comparison}, together with the upper bound of proposition

3078:                   \ref{prop:common_upper}, which for this case specializes to:

3079:                   \[

3080:                       R^{\mbox{\scriptsize upper}} = \min \left\{I(X;Y_2)+C, I(X;Y_1,Y_2) \right\}.

3081:                   \]

3082:

3083:                   \begin{figure}[htb]

3084:                          \centering

3085:                          \scalebox{0.60}{\includegraphics{Three-step-conference_new.eps}}

3086:                          \caption{\small The achievable rate $R$ vs. conference capacity $C$,

3087:                               for proposition \ref{prop:common_upper},

3088:                               proposition \ref{prop:achive_common_one_step} and theorem \ref{thm:achieve-three-steps},

3089:                               for the symmetric broadcast channel.}

3090:                          \label{fig:Three-steps-comparison}

3091:                   \end{figure}

3092:

3093: \end{comment}

3094:

3095: %        \subsection{Discussion}

3096: %        As can be easily observed in figure \ref{fig:Three-steps-comparison},

3097: %        when $C \ge C_{\mbox{\scriptsize{min}}}$ the three-step conference indeed provides

3098: %        a rate increase over the EAF-based two-step conference scheme of proposition \ref{prop:achive_common_one_step}.

3099: %        This can be seen by comparing the solid line when $C \ge C_{\mbox{\scriptsize{min}}}$ which

3100: %        represents the three-step conference and the dashed line that represents the EAF-based two-step

3101: %        conference. We see that both conferences result in rate expressions with slope of $1$ vs. C, but

3102: %        the three-step conference obtains the same rates for smaller values of $C$. This is because

3103: %        as long as the receiver that is first to decode is the limiting one

3104: %        (which is the case in the symmetric channel setup),

3105: %        then helping that receiver will increase the overall rate.

3106:

3107: %        Also note the benefits of the new relaying strategy of theorem \ref{thm:main_thm}, which provides a

3108: %        rate increase over the non-cooperative rate even when both the three-step

3109: %        conference and the EAF-based two-step conference are not able to provide that, due to the constraint on the

3110: %        feasible region resulting from the Wyner-Ziv compression strategy. The superiority of the

3111: %        three-step conference over the two-step conference is also evidenced by directly comparing

3112: %        the rate expressions in \eqref{eqn:R212_example} and \eqref{eqn:R_twostep_example}: we see

3113: %        that the rate expressions are identical except an additive and (in general) positive term that

3114: %        appears only in the three-step rate expression \eqref{eqn:R212_example}. This term is the rate increase of the

3115: %        three-step scheme over the two-step scheme.

3116:

3117:

3118:

3119: %        \begin{itemize}

3120: %            \item Talk about shlomo's paper

3121: %            \item explain that last thm is not easy to obtain from the general expression

3122: %        \end{itemize}

3123:

3124:

3125:

3126:

3127:

3128:

3129:

3130:

3131:

3132:

3133:

3134: \section{Conclusions}

3135: \label{sec:conclu}

3136:

3137: In this  paper we considered the EAF technique using time-sharing on the auxiliary RVs. We first showed that incorporating

3138: joint-decoding at the destination into the EAF technique results in a special case of the classic EAF of

3139: \cite[theorem 6]{CoverG:79}. We then used the time-sharing assignment of the auxiliary RVs to obtain an

3140: easily computable achievable rate for the multiple-relay case, which can be compared against the DAF-based results, to select the highest rate

3141: for any given scenario.

3142: Next, we showed that for the Gaussian relay channel with coded modulation, the Gaussian auxiliary RV

3143: assignment is not always optimal, and a TS-EAF implementing a per-symbol hard decision may sometimes perform better.

3144: Finally, we considered a third application of TS-EAF to the cooperative broadcast scenario with a multi-cycle

3145: conference. We first derived an achievable rate for the general channel, and then we specialized it to the single-cycle

3146: conference for which we obtained an explicit achievable rate. This rate is superior to the explicit expression that

3147: can be obtained with joint-decoding.

3148:

3149:

3150:

3151:

3152: \section{Acknowledgements}

3153: % The authors with to thank Gerhard Karmer and Shlomo Shamai for discussions regarding this work.

3154: In the final version.

3155:

3156:

3157:

3158:

3159:

3160:

3161:

3162:

3163:

3164:

3165:

3166:

3167:

3168:

3169:

3170:

3171:

3172:

3173:

3174:

3175:

3176:

3177:

3178:

3179:

3180:

3181:

3182:

3183:

3184:

3185:

3186:

3187:

3188:

3189:

3190:

3191:

3192:

3193:

3194:

3195:

3196:

3197:

3198:

3199:

3200:

3201:

3202:

3203:

3204:

3205:

3206:

3207:

3208: \useRomanappendicesfalse

3209: \appendices

3210: \setcounter{equation}{0}

3211: \renewcommand{\theequation}{\thesection.\arabic{equation}}

3212:

3213:

3214:

3215:

3216: \section{Expressions for Section \ref{sec:Gauss_relay}}

3217: \label{append:Gauss-deriv}

3218:

3219: % We first recall the definition in eqaution \eqref{eqn:def_G} for $G_x(a,b)$:

3220: % \[

3221: %     G_x(a,b) = \frac{1}{\sqrt{2 \pi b}} e ^{-\frac{(x-a)^2}{2 b} }.

3222: % \]

3223: \subsection{Hard-Decision Estimate-and-Forward}

3224: \label{append:Gauss-deriv-HD-EAF}

3225: We evaluate $I(X;\hY_1,Y)$, with $p(\hY_1|Y_1)$ given by \eqref{eqn:def_p_hy1_given_y1_HD_eq1}

3226: and \eqref{eqn:def_p_hy1_given_y1_HD_eq2} using:

3227: \[

3228:     I(X;\hY_1,Y) = I(X;\hY_1) + I(X;Y|\hY_1).

3229: \]

3230: \begin{enumerate}

3231:     \item Evaluating $I(X;\hY_1)$:  Note that both $X$ and $\hY_1$ are discrete RVs,

3232:         therefore $I(X;\hY_1)$ can be evaluated using the

3233:         discrete entropies. The conditional distribution of $\hY_1$ given $X$ is given by:

3234:         \begin{equation}

3235:         \label{eqn:def_p_hy1_given_x}

3236:             p(\hY_1|X=\sqrt{P}) = \left\{

3237:                                         \begin{array}{cr}

3238:                                         P_1 \cdot P_{\ners}, &              1\\

3239:                                         1-P_{\ners}, &                      E\\

3240:                                         (1-P_1) P_{\ners},&                 -1

3241:                                         \end{array}

3242:                                     \right.

3243:         \end{equation}

3244:         where

3245:         \[

3246:             P_1 = \Pr(Y_1 > 0 | X = \sqrt{P}).

3247:         \]

3248:         $p(\hY_1|X=-\sqrt{P})$ can be obtained from $p(\hY_1|X=\sqrt{P})$ by switching $1$ and $-1$ in \eqref{eqn:def_p_hy1_given_x}.

3249:

3250:     \item Evaluating $I(X;Y|\hY_1)$: write first

3251:         \[

3252:             I(X;Y|\hY_1) = h(Y| \hY_1) - h(Y | \hY_1, X),

3253:         \]

3254:         and we note that

3255:         \[

3256:             h(Y | \hY_1, X) = h(X + N | \hY_1 , X) = h(N | \hY_1 , X) = h(N) = \frac{1}{2} \log_2 (2 \pi e \sigD).

3257:         \]

3258:         Using the chain rule we write

3259:         \[

3260:             h(Y| \hY_1) = p(\hY_1 = 1) h(Y| \hY_1 = 1) + p(\hY_1 = E) h(Y | \hY_1 = E) + p(\hY_1 = -1) h(Y | \hY_1 = -1),

3261:         \]

3262:         $p(\hY_1)$ can be obtained by combining \eqref{eqn:def_PX} and \eqref{eqn:def_p_hy1_given_x} which results in

3263:         \begin{equation}

3264:         \label{eqn:appndx_p_hy1}

3265:             p(\hY_1) = \left\{

3266:                         \begin{array}{cr}

3267:                             \frac{1}{2}P_{\ners}, & 1\\

3268:                             1 - P_{\ners} , & E\\

3269:                             \frac{1}{2}P_{\ners}, & -1

3270:                         \end{array}

3271:                     \right.,

3272:         \end{equation}

3273:         and we note that $h(Y | \hY_1 = E) = h(Y)$, since erasure is equivalent to no prior information.

3274:         Finally we note that by definition

3275:         \begin{eqnarray}

3276:             h(Y) & = &  -\int_{y = -\infty}^{\infty} f(y) \log_2(f(y)) dy, \nonumber\\

3277:             f(Y) & = & \Pr(X = \sqrt{P}) f(Y | X = \sqrt{P}) + \Pr(X = -\sqrt{P}) f(Y | X = -\sqrt{P})\nonumber\\

3278:             \label{eqn:f_Y_HC}

3279:                  & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD) + G_y(-\sqrt{P},\sigD) \right),

3280:         \end{eqnarray}

3281:         where

3282:         \begin{equation}

3283:             \label{eqn:def_G}

3284:                 G_x(a,b) = \frac{1}{\sqrt{2 \pi b}} e ^{-\frac{(x-a)^2}{2 b} }.

3285:         \end{equation}

3286:         Next, we have

3287:         \begin{eqnarray}

3288:                         \label{eqn:cond_entropy_hy1_is_1}

3289:             h(Y|\hY_1 = 1) & = & -\int_{y = -\infty}^{\infty} f(y|\hy_1 = 1) \log_2(f(y|\hy_1 = 1)) dy\\

3290:             f(Y|\hY_1 = 1) & = & \frac{f(Y,\hY_1 = 1)}{\Pr(\hY_1 = 1)}\nonumber\\

3291:                         & = & \frac{f(Y,Y_1 > 0)P_{\ners}}{\Pr(Y_1 > 0)P_{\ners}}\nonumber\\

3292:             \label{eqn:cond_f_hy1_is_1}

3293:                         & = & \frac{f(Y,Y_1 > 0)}{\Pr(Y_1 > 0)},\\

3294:             f(Y,Y_1 > 0) & = & \Pr(X = \sqrt{P}) f(Y,Y_1 > 0 | X = \sqrt{P}) + \Pr(X = -\sqrt{P}) f(Y,Y_1 > 0 | X = -\sqrt{P})\nonumber\\

3295:             \label{eqn:cond_f_y1_pos}

3296:                         & = & \frac{1}{2}\left( f(Y,Y_1 > 0 | X = \sqrt{P}) + f(Y,Y_1 > 0 | X = -\sqrt{P})\right).

3297:         \end{eqnarray}

3298:         Using

3299:         \[

3300:                     f_{Y,Y_1}(y,y_1 | x )  = \mN\left(

3301:                     \left( \begin{array}{c}

3302:                             x\\ g \cdot x

3303:                         \end{array}\right),

3304:                         \left( \begin{array}{cc}

3305:                             \sigD & 0\\ 0 & \sigR

3306:                             \end{array}

3307:                             \right)

3308:                                     \right)  = G_y(x,\sigD)G_{y_1}(g\cdot x,\sigR),

3309:         \]

3310:         we obtain

3311:         \[

3312:                   f(Y,Y_1 > 0 | X )  =  \int_{y_1 = 0}^{\infty} f(y,y_1 | x ) dy_1 = G_y(x, \sigD)

3313:                         \int_{y_1 = 0}^{\infty} G_{y_1}(g \cdot x, \sigR) dy_1.

3314:         \]

3315: \end{enumerate}

3316:

3317:

3318: Next we need to evaluate $I(\hY_1;Y_1|Y) = h(Y_1|Y) - h(Y_1|Y, \hY_1)$:

3319: \begin{enumerate}

3320:         \item $h(Y_1|Y) = h(Y,Y_1) - h(Y)$. Here

3321:         \begin{eqnarray*}

3322:             h(Y,Y_1) & = & -\int_{y = -\infty}^{\infty}\int_{y_1 = -\infty}^{\infty} f(y,y_1) \log_2(f(y,y_1)) dy \;dy_1,\\

3323:             f(Y,Y_1) & = & \frac{1}{2}\left(f(Y,Y_1|X = \sqrt{P}) + f(Y,Y_1|X = -\sqrt{P})\right),\\

3324:             f(Y,Y_1|X ) & = & G_y(x,\sigD)G_{y_1}(g \cdot x ,\sigR).

3325:         \end{eqnarray*}

3326:

3327:         \item By the definition of conditional entropy we have

3328:          \[

3329:             h(Y_1|Y, \hY_1) = p(\hY_1 = 1) h(Y_1| Y, \hY_1 = 1) + p(\hY_1 = E) h(Y_1 |Y, \hY_1 = E) + p(\hY_1 = -1) h(Y_1 | Y, \hY_1 = -1),

3330:         \]

3331:         where $h(Y_1 |Y, \hY_1 = E) = h(Y_1 |Y )$,

3332:         and for $\hY_1 = 1$, for example, we have

3333:         \[

3334:             h(Y_1 |Y, \hY_1 = 1) = -\int_{y = -\infty}^{\infty} \int_{y_1 = -\infty}^{\infty}

3335:                 f(y,y_1|\hy_1 = 1)\log_2(f(y_1|y, \hy_1 = 1)) dy \; dy_1.

3336:         \]

3337:         Finally, we need to derive the distributions $f(y,y_1|\hy_1 = 1)$ and $f(y_1|y, \hy_1 = 1)$.

3338:         Begin with

3339:         \begin{eqnarray*}

3340:             &   & f_{Y,Y_1|\hY_1}(y,y_1|\hy_1 = 1) = \frac{f_{Y,Y_1,\hY_1}(y,y_1,\hy_1 = 1)}{\Pr(\hy_1 = 1)}\\

3341:             &   & \phantom{XXXXXX}\qquad  = \frac{f_{Y,Y_1,\hY_1}(y,y_1,y_1 > 0)P_{\ners}}{\Pr(y_1 > 0)P_{\ners}} = f(y,y_1|y_1 > 0) = \left\{

3342:                                                             \begin{array}{cl}

3343:                                                                 \frac{f_{Y,Y_1}(y,y_1)}{\Pr(Y_1>0)} , & y_1 > 0\\

3344:                                                                 0                           , & y_1 \le 0

3345:                                                             \end{array}

3346:                                                         \right.

3347:         \end{eqnarray*}

3348:         and due to the symmetry, $\Pr(Y_1 > 0)  = \Pr(Y_1 \le 0) = \frac{1}{2}$.

3349:         We also have

3350:         \begin{eqnarray*}

3351:         f(Y_1|Y , \hY_1 = 1) & =  & \frac{f(Y_1,Y|\hY_1 = 1) }{f(Y|\hY_1 = 1)}  =  \frac{f(Y_1,Y|Y_1 >0 ) }{f(Y|Y_1 > 0)} = \frac{\frac{f(Y_1,Y) }{\Pr(Y_1>0)} }{\frac{f(Y,Y_1 > 0)}{\Pr(Y_1>0)}} = \frac{f(Y_1,Y)}{f(Y,Y_1 > 0)}, \quad Y_1 > 0\\

3352:         f(Y_1|Y , \hY_1 = 1) & = & 0, \quad Y_1 \le 0.

3353:         \end{eqnarray*}

3354: \end{enumerate}

3355:

3356:

3357:

3358:

3359:

3360:

3361:

3362: \subsection{Evaluation of the Rate with DHD}

3363: \label{sec:expressions_DHD}

3364: We evaluate the achievable rate using $I(X;Y,\hY_1) = I(X;\hY_1) + I(X;Y|\hY_1)$.

3365: The distribution of $\hY_1$ is given by:

3366: \begin{eqnarray*}

3367:     \Pr(\hY_1 = 1)  =  \Pr(Y_1 > T) & = & \frac{1}{2}\left(\Pr(Y_1 > T | X = \sqrt{P}) + \Pr(Y_1 > T | X = -\sqrt{P}) \right)\\

3368:              & = & \frac{1}{2} \left(\int_{y_1 > T}G_{y_1}(g\sqrt{P},\sigR)dy_1 + \int_{y_1 > T}G_{y_1}(-g\sqrt{P},\sigR)dy_1\right)\\

3369:     \Pr(\hY_1 = E)  =  \Pr(|Y_1| \le T) & = & \frac{1}{2}\left(\Pr(|Y_1| \le T | X = \sqrt{P})

3370:                 + \Pr(|Y_1| \le T | X = -\sqrt{P}) \right)\\

3371:              & = & \frac{1}{2} \left(\int_{y_1 =-T}^T G_{y_1}(g\sqrt{P},\sigR)dy_1

3372:                 + \int_{y_1 =-T}^T G_{y_1}(-g\sqrt{P},\sigR)dy_1\right),

3373: \end{eqnarray*}

3374: and by symmetry, $\Pr(\hY_1 = 1) = \Pr(\hY_1 = -1)$ and $H(\hY_1|X = \sqrt{P}) = H(\hY_1|X = -\sqrt{P})$.

3375: Therefore, we need the conditional distribution $p(\hY_1|X = \sqrt{P})$:

3376: \begin{eqnarray*}

3377:     \Pr(\hY_1 = 1 | X = \sqrt{P}) & = & \Pr(Y_1 > T| X = \sqrt{P}) = \int_{y_1 > T}G_{y_1}(g\sqrt{P},\sigR)dy_1\\

3378:     \Pr(\hY_1 = -1 | X = \sqrt{P}) & = & \Pr(Y_1 < -T| X = \sqrt{P}) = \int_{y_1 <-T}G_{y_1}(g\sqrt{P},\sigR)dy_1\\

3379:     \Pr(\hY_1 = E | X = \sqrt{P}) & = & 1 - \Pr(\hY_1 = 1 | X = \sqrt{P}) - \Pr(\hY_1 = -1 | X = \sqrt{P}).

3380: \end{eqnarray*}

3381: This allows us to evaluate $I(X;\hY_1) = H(\hY_1) - H(\hY_1|X)$. For evaluating $I(X;Y|\hY_1)$ note that

3382: \[

3383:     h(Y|\hY_1,X) = h(X + N | \hY_1,X) = h(N |\hY_1,X) = h(N) = \frac{1}{2}\log_2(2 \pi e \sigD),

3384: \]

3385: and we need only to evaluate $h(Y|\hY_1)$: by definition

3386: \[

3387:     h(Y|\hY_1) = \Pr(\hY_1 = 1)h(Y|\hY_1 = 1) + \Pr(\hY_1 = E)h(Y|\hY_1=E) + \Pr(\hY_1 = -1)h(Y|\hY_1 = -1),

3388: \]

3389: and note that $h(Y|\hY_1=E) = h(Y)$. Finally,

3390: \begin{eqnarray*}

3391:     h(Y|\hY_1 = 1) & = &   -\int_{y=-\infty}^{\infty} f(y|\hy_1 = 1) \log_2 (f(y | \hy_1 = 1)) dy\\

3392:     f_{Y|\hY_1}(y|\hy_1 = 1) & = & f(y|y_1 >T) = \frac{f(y,y_1 >T)}{\Pr(Y_1 > T)}\\

3393:     f_{Y,Y_1}(y,y_1 >T) & = & \frac{1}{2}\left(f(y,y_1 >T | X = \sqrt{P}) + f(y,y_1 >T| X = -\sqrt{P})\right)\\

3394:          & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)\Pr(Y_1>T|X = \sqrt{P}) + G_y(-\sqrt{P},\sigD)\Pr(Y_1>T|X = -\sqrt{P})\right).

3395: \end{eqnarray*}

3396:

3397: Evaluating $I(\hY_1;Y_1|Y)$ we have:

3398: \begin{eqnarray*}

3399:     I(\hY_1;Y_1|Y) & = & H(\hY_1|Y) - H(\hY_1|Y,Y_1) \\

3400:      & \stackrel{(a)}{=} & H(\hY_1|Y)\\

3401:      & = & H(\hY_1) + h(Y|\hY_1) - h(Y),

3402: \end{eqnarray*}

3403: where (a) is due to the deterministic mapping from $Y_1$ to $\hY_1$, and $h(Y)$ can be evaluated using

3404: \eqref{eqn:f_Y_HC}.

3405:

3406: \subsubsection{DHD when $T \rightarrow 0$}

3407: \label{sec:HDH-Explanation}

3408: As $T \rightarrow 0$ we have that $\Pr(\hY_1 = E) \rightarrow 0$ and $\hY_1$ converges in distribution to a

3409: Bernoulli RV with probability $\frac{1}{2}$. Therefore

3410: \begin{eqnarray*}

3411:     f(Y,\hY_1 = 1) & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)\Pr(Y_1>T|X = \sqrt{P}) + G_y(-\sqrt{P},\sigD)\Pr(Y_1>T|X = -\sqrt{P})\right)\\

3412:         & \stackrel{T \rightarrow 0}{\approx} & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)\Pr(Y_1>0|X = \sqrt{P})

3413:                 + G_y(-\sqrt{P},\sigD)\Pr(Y_1>0|X = -\sqrt{P})\right)\\

3414:         &  =  & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)P_+

3415:                 + G_y(-\sqrt{P},\sigD)(1 - P_+)\right),

3416: \end{eqnarray*}

3417: where $P_+ = \Pr(Y_1>0|X = \sqrt{P})$. Now, letting $g \rightarrow 0$ we have that $P_+ \rightarrow \frac{1}{2}$ and

3418: therefore

3419: \begin{eqnarray*}

3420:     f(Y|\hY_1 = 1) & \stackrel{g \rightarrow 0, T \rightarrow 0}{\longrightarrow } & f(Y)\\

3421:     \Rightarrow h(Y|\hY_1 = 1) & \stackrel{g \rightarrow 0, T \rightarrow 0}{\longrightarrow }& h(Y).

3422: \end{eqnarray*}

3423: We conclude that as $g \rightarrow 0, T \rightarrow 0$, then $h(Y|\hY_1) \rightarrow h(Y)$ and therefore the

3424: $I(Y_1;\hY_1|Y)$ becomes

3425: \[

3426:     I(Y_1;\hY_1|Y)  =  H(\hY_1) + h(Y|\hY_1) - h(Y)  \stackrel{g \rightarrow 0, T \rightarrow 0}{\longrightarrow } 1

3427: \]

3428: Using the continuity of $I(Y_1;\hY_1|Y)$ we conclude that for small values of $g$, as $T$ decreases then

3429: $I(Y_1;\hY_1|Y)$ is bounded from below. This implies that for small $g$ and small $C$ the feasibility

3430: is obtained only for large $T$, which in turn implies low rate.

3431:

3432:

3433:

3434:

3435:

3436:

3437: \subsection{Evaluating the Information Rate with TS-DHD}

3438: \label{appndx:expressions_TS_DHD}

3439: \subsubsection{Evaluating $I(X;Y,\hY_1)$}

3440: We first write

3441: \[

3442:     I(X;Y,\hY_1) = I(X;\hY_1) + I(X;Y|\hY_1).

3443: \]

3444: Evaluating $I(X;\hY_1) = H(\hY_1) - H(\hY_1|X)$ requires the marginal of $\hY_1$.

3445: Using the mapping defined in \eqref{eqn:def_TS-DHD}we find the marginal distribution of $\hY_1$:

3446: \[

3447:     \Pr(\hY_1) = \left\{

3448:         \begin{array}{cl}

3449:             1,  & (1-P_{\ers})\Pr(Y_1>T)\\

3450:             E,  & \Pr(|Y_1| \le T) + P_{\ers} \Pr(|Y_1|>T)\\

3451:             -1, & (1-P_{\ers})\Pr(Y_1 < -T)

3452:         \end{array}

3453:     \right.,

3454: \]

3455: where

3456: \begin{eqnarray*}

3457:     \Pr(Y_1 > T) = \Pr(Y_1 < -T) & = & \int_{y_1 = T}^{\infty} \frac{1}{2}\left[G_{y_1}(\sqrt{P},\sigR)+ G_{y_1}(-\sqrt{P},\sigR) \right]d y_1\\

3458:     \Pr(|Y_1| < T) & = & \int_{y_1 = -T}^{T} \frac{1}{2}\left[G_{y_1}(\sqrt{P},\sigR)+ G_{y_1}(-\sqrt{P},\sigR) \right]d y_1.

3459: \end{eqnarray*}

3460: Also, due to symmetry we have that $H(\hY_1|X = \sqrt{P}) = H(\hY_1|X = -\sqrt{P})$, and therefore we need only to find the conditional

3461: $\Pr(\hY_1|X = \sqrt{P})$:

3462: \[

3463:     \Pr(\hY_1|X = \sqrt{P}) = \left\{

3464:         \begin{array}{cl}

3465:             1,  & (1-P_{\ers})\Pr(Y_1>T|X = \sqrt{P})\\

3466:             E,  & \Pr(|Y_1| \le T|X = \sqrt{P}) + P_{\ers} \Pr(|Y_1|>T|X = \sqrt{P})\\

3467:             -1, & (1-P_{\ers})\Pr(Y_1 < -T|X = \sqrt{P})

3468:         \end{array}

3469:     \right.,

3470: \]

3471: and we note that $f_{Y_1|X} (y_1 | x = \sqrt{P}) = G_{y_1}(\sqrt{P},\sigR)$.

3472:

3473: Next, we need to evaluate $I(X;Y|\hY_1) = h(Y|\hY_1) - h(Y | \hY_1,X)$. We first note that

3474: \[

3475:     h(Y| \hY_1,X) = h(X + N|X,\hY_1) = h(N|X , \hY_1) = h(N) = \frac{1}{2}\log_2(2 \pi e \sigR).

3476: \]

3477: Lastly, we have

3478: \[

3479:     h(Y|\hY_1) = \Pr(\hY_1 = 1) h(Y|\hY_1 = 1) + \Pr(\hY_1 = E) h(Y|\hY_1 = E) + \Pr(\hY_1 = -1)h(Y|\hY_1 = -1).

3480: \]

3481: We note that $h(Y|\hY_1 = E) = h(Y)$ and that $h(Y|\hY_1 = 1)$ and $h(Y|\hY_1 = -1)$ are calculated exactly as in

3482: appendix \ref{sec:expressions_DHD} for the DHD case.

3483:

3484:

3485:

3486:

3487:

3488: \subsubsection{Evaluating $I(\hY_1;Y_1|Y)$}

3489: Begin by writing

3490: \begin{eqnarray*}

3491:     I(\hY_1;Y_1|Y) & = &  h(\hY_1|Y_1) - h(\hY_1|Y_1,Y) \\

3492:         & = & h(Y|\hY_1) + H(\hY_1) - h(Y) - h(\hY_1|Y_1)

3493: \end{eqnarray*}

3494: where we used the fact that given $Y_1$, $\hY_1$ is independent of $Y$. All the terms in the above expressions have been calculated

3495: in the previous subsection, except $h(\hY_1|Y_1)$:

3496: \begin{eqnarray*}

3497:     h(\hY_1|Y_1) & = & \Pr(\hY_1 > T) h(\hY_1|Y_1 > T) + \Pr(|Y_1| \le T) h(\hY_1||Y_1| \le T) + \Pr(Y_1 < -T) h(\hY_1|Y_1 < -T)\\

3498:      & = & \Pr(\hY_1 > T) H(P_{\ers},1 - P_{\ers}) +  \Pr(\hY_1 < -T)H(P_{\ers},1 - P_{\ers}) \\

3499:      & = & (1 - P(|Y_1| \le T)H(P_{\ers},1 - P_{\ers}).

3500: \end{eqnarray*}

3501:

3502:

3503:

3504: \subsection{Gaussian-Quantization Estimate-and-Forward}

3505: Here the relay uses the assignment of equation \eqref{eqn:def_qaussian_quant}:

3506: \[

3507:     \hY_1 = Y_1 + N_Q, \qquad N_Q \sim \mN(0, \sigQ).

3508: \]

3509: We first evaluate

3510: \begin{eqnarray*}

3511:     I(X;Y,\hY_1) = h(Y,\hY_1) - h(Y,\hY_1|X):

3512: \end{eqnarray*}

3513: \begin{enumerate}

3514:     \item

3515:         \begin{eqnarray}

3516:             h(Y,\hY_1) & = & - \int_{y = -\infty}^{\infty} \int_{\hy_1 = -\infty}^{\infty}

3517:                 f_{Y,\hY_1}(y, \hy_1) \log_2(f_{Y,\hY_1}(y,\hy_1)) dy \; d\hy_1\nonumber\\

3518:         \label{eqn:joint_y_hy1_gq_eaf}

3519:             f_{Y,\hY_1}(y,\hy_1) & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)G_{\hy_1}(g\sqrt{P},\sigR + \sigQ)

3520:                         +G_y(-\sqrt{P},\sigD)G_{\hy_1}(-g\sqrt{P},\sigR + \sigQ)\right).

3521:         \end{eqnarray}

3522:

3523:     \item We also have

3524:     \begin{eqnarray*}

3525:         h(Y,\hY_1|X) & = & h(X + N, gX + N_1 + N_Q|X)\\

3526:                     & = & h( N,  N_1 + N_Q|X)\\

3527:                     & = & h(N) + h(N_1 + N_Q)\\

3528:                     & = & \frac{1}{2}\log_2\left((2\pi e)^2\sigD (\sigR + \sigQ)\right).

3529:     \end{eqnarray*}

3530: \end{enumerate}

3531: Lastly we need to evaluate

3532: \[

3533:     I(\hY_1;Y_1|Y) = h(\hY_1|Y) - h(\hY_1 | Y_1,Y) = h(\hY_1,Y) - h(Y) - h(\hY_1 | Y_1,Y),

3534: \]

3535: where

3536: \[

3537:     h(\hY_1| Y_1, Y) = h(Y_1 + N_Q | Y_1,Y) = h(N_Q|Y_1,Y) = h(N_Q) = \frac{1}{2} \log_2(2 \pi e \sigQ).

3538: \]

3539:

3540: \subsection{Approximation of HD-EAF for $\sigD \rightarrow \infty$}

3541:      \label{appndx:appndxHD-EAF-highSNR}

3542:      Using \eqref{eqn:def_p_hy1_given_x} and \eqref{eqn:appndx_p_hy1} we can write

3543:         \begin{eqnarray*}

3544:             R \le I(X;\hY_1) & = & H(\hY_1) - H(\hY_1|X) \nonumber\\

3545:                     & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right)

3546:                             - H\left(P_1 P_{\ners}, 1 - P_{\ners}, (1-P_1)P_{\ners}\right) \nonumber\\

3547:                     & = & -P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right) -(1 - P_{\ners})\log_2(1 - P_{\ners})+  P_1 P_{\ners} \log_2(P_1 P_{\ners})\nonumber\\

3548:                     &   & \quad     +(1 - P_{\ners})\log_2(1 - P_{\ners})  +  (1-P_1)P_{\ners}\log_2((1-P_1)P_{\ners})\nonumber\\

3549:                     & = & -P_{\ners} \log_2\left(P_{\ners}\right) +P_{\ners}  +  P_1 P_{\ners} \log_2(P_1) + P_1 P_{\ners} \log_2(P_{\ners})\nonumber\\

3550:                     &   & \quad       +  (1-P_1)P_{\ners}\log_2(1-P_1) + (1-P_1)P_{\ners}\log_2(P_{\ners}) \nonumber\\

3551:                     & = &  P_{\ners}(1  +  P_1  \log_2(P_1) +  (1-P_1)\log_2(1-P_1) ) \nonumber\\

3552:                     & = &  P_{\ners}(1  -H ( P_1  ,1-P_1 )).

3553:             \end{eqnarray*}

3554:         \begin{eqnarray*}

3555:             I(Y_1;\hY_1|Y)  & = & h(\hY_1|Y) - h(\hY_1|Y_1,Y)\\

3556:                     & \stackrel{(a)}{\approx} & H(\hY_1) - H(\hY_1|Y_1)\\

3557:                     & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right) -

3558:                         H(P_{\ners},1-P_{\ners})\\

3559:                     & = & - 2 \frac{1}{2}P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right)

3560:                         - (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right) + P_{\ners} \log_2(P_{\ners})\\

3561:                     &   & \quad        + (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right)\\

3562:                     & = &  P_{\ners},

3563:         \end{eqnarray*}

3564: where in (a) we used the fact that $\hY_1$ and $Y$ are independent as $\sigD \rightarrow \infty$, and that given

3565: $Y_1$, $\hY_1$ is independent of $Y$.

3566:

3567:

3568:

3569:

3570:

3571:

3572:

3573:

3574: \setcounter{equation}{0}

3575: \section{Proof of Corollary \ref{corr:single-coomon-message-with-multi-step}}

3576: \label{appndx:prof_corollary_single_common}

3577: %\begin{proof}

3578:

3579: %    In the proof we combine channel coding and the multi-step

3580: %    conference proposed by Kaspi in \cite{Kaspi:85}.

3581: %

3582: %    Fix $n$, $\alpha \in [0,1]$, $p(x)$, and for $k = 1,2,...,K$, fix $p\left(\hy_1^{(k)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$

3583: %    and\\

3584: %     $p\left(\hy_2^{(k)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$.

3585: %

3586:     In the following we highlight only the modifications from the general broadcast result due to the application of

3587:     DAF to the last

3588:     conference step from $\Rgood$ to $\Rbad$, and the fact that we transmit a single message.

3589:

3590:     \subsubsection{Codebook Generation and Encoding at the Transmitter}

3591:         The transmitter generates $2^{nR}$ codewords $\xvec$ in an i.i.d. manner according to

3592:         $p(\xvec(w)) = \prod_{i=1}^n p(x_i(w))$, $w \in \mW = \left\{1,2,...,2^{nR}\right\}$. For transmission

3593:         of the message $w_i$ at time $i$ the transmitter outputs $\xvec(w_i)$.

3594:

3595:     \subsubsection{Codebook Generation at the $\Rgood$}

3596: %        \begin{itemize}

3597: %            \item For the first conference step from $\Rgood$ to $\Rbad$, $\Rgood$ generates a codebook with $2^{nR_{12}'^{(1)}}$ codeword denoted

3598: %                $\mZ_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}'^{(1)}}\right\}$ according to the distribution

3599: %                $p\left(\hy_1^{(1)}\right)$:

3600: %                $p\left(\hyvec_1^{(1)}(z_{12}^{(1)})\right) = \prod_{i=1}^n p\left(\hy_{1,i}^{(1)}(z_{12}^{(1)})\right)$,

3601: %                $z_{12}^{(1)} \in \mZ_{12}^{(1)}$. $\Rgood$ then uniformly and independently partitions the codebook

3602: %                $\mZ_{12}^{(1)}$ into $2^{nR_{12}^{(1)}}$ subsets indexed by

3603: %                $w_{12}^{(1)} \in \mW_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}^{(1)}}\right\}$. Denote these sets

3604: %                with $\mS_{12,w_{12}^{(1)}}^{(1)}$.

3605: %            \item For the first conference step from $\Rbad$ to $\Rgood$, $\Rbad$ generates a codebook

3606: %                for each codeword $\hyvec_1^{(1)}(z_{12}^{(1)})$, $z_{12}^{(1)} \in \mZ_{12}^{(1)}$ in an i.i.d.

3607: %                manner according to

3608: %                $p\left(\hyvec_2^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\right)                 \prod_{i=1}^n p\left(\hy_{2,i}^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\Big|\hy_{1,i}(z_{12}^{(1)})\right)$,

3609: %                $z_{21}^{(1)} \in \mZ_{21}^{(1)} = \left\{1,2,..., 2^{nR_{21}'^{(1)}}\right\}$.

3610: %                $\Rbad$ then uniformly and independently partitions the codebook $\mZ_{21}^{(1)}$

3611: %                 into $2^{nR_{21}^{(1)}}$ subsets indexed by

3612: %                $w_{21}^{(1)} \in \mW_{21}^{(1)} = \left\{1,2,...,2^{nR_{21}^{(1)}}\right\}$. Denote these sets

3613: %                with $\mS_{21,w_{21}^{(1)}}^{(1)}$.

3614: %            \item For the $k$'th conference step from $\Rgood$ to $\Rbad$, then for each combination of

3615: %                $z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)}$, and \\

3616: %                $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}$, $\Rgood$ generates a codebook with $2^{nR_{12}'^{(k)}}$

3617: %                messages denoted by $\mZ_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}'^{(k)}}\right\}$,

3618: %                according to the distribution

3619: %                 $p\left(\hy_1^{(k)}|\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$.

3620: %                 Then $\Rgood$ uniformly and independently partitions the codebook

3621: %                $\mZ_{12}^{(k)}$ into $2^{nR_{12}^{(k)}}$ subsets indexed by

3622: %                $w_{12}^{(k)} \in \mW_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}^{(k)}}\right\}$. Denote these sets

3623: %                with $\mS_{12,w_{12}^{(k)}}^{(k)}$.

3624: %            \item The codebook for the $k$'th conference step from $\Rbad$ to $\Rgood$ is generated in a parallel manner

3625:

3626:              The $K$ conference steps from $\Rgood$ to $\Rbad$ are carried out exactly as in section \ref{sec:DecEncMultiStepRgood}.

3627:              The first $K-1$ steps from $\Rbad$ to $\Rgood$ are carried out as in section \ref{sec:DecEncMultiStepRbad}.

3628:              The $K$'th conference step from $\Rbad$ to $\Rgood$, is different from that of theorem \ref{thm:multi-step-general-bc},

3629:              as after the $K$'th step from $\Rgood$ to $\Rbad$, $\Rbad$ may decode the message

3630:              since $\Rbad$ received all the $K$ conference messages from $\Rgood$. Then, $\Rbad$ uses decode-and-forward for

3631:                 its $K$'th conference transmission to $\Rgood$. Therefore, $\Rbad$ simply partitions $\mW$ into $2^{n \alpha C_{21}}$

3632:                 subsets in a uniform and independent manner.

3633: %        \end{itemize}

3634:

3635:

3636:

3637:     \subsubsection{Encoding and Decoding at the $K$'th Conference Step from $\Rbad$ to $\Rgood$}

3638:         \begin{itemize}

3639: %            \item Encoding at $\Rgood$ at the $K$'th conference step is done as described in section

3640: %                \ref{sec:DecEncMultiStepRgood}.

3641:             \item Before the $K$'th conference step, $\Rbad$ decodes its message using his channel input and all the

3642:             $K$ conference messages received from $\Rgood$. This can be done with an arbitrarily small probability of error as long as \eqref{eqn:R_2} is satisfied.

3643:             \item  Having decoded its message, $\Rbad$ uses the decode-and-forward strategy to select the

3644:                 $K$'th conference message to $\Rgood$. The conference capacity allocated to this step is

3645:                 $R_{21}^{(K)} = \alpha C_{21}$.

3646:             \item Having received the $K$'th conference message from $\Rbad$, $\Rgood$ can now

3647:                 decode its message using the information received at the first $K-1$ steps,

3648:                 and combining it with the information from the last step using the decode-and-forward

3649:                 decoding rule. This gives rise to \eqref{eqn:R_1}.

3650:         \end{itemize}

3651:

3652:     \subsubsection{Combining All the Conference Rate Bounds}

3653:         The bounds on $R_{12}'^{(k)}$, $k = 1,2,...,K$ can be obtained as in section \ref{sec:combining_bounds_general}:

3654:         \begin{eqnarray*}

3655:             C_{12} & = & \sum_{k = 1}^K R_{12}^{(k)}\\

3656:                    & \ge &   I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},

3657:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)};Y_1\big| Y_2\right) +2K\eps,

3658:         \end{eqnarray*}

3659:         and similarly

3660:         \[

3661:             (1-\alpha)C_{21} \ge I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},

3662:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)};Y_2\big| Y_1\right) +2K\eps,

3663:         \]

3664:         where $(1-\alpha)C_{21}$ is the total capacity allocated to the first $K-1$ conference steps from $\Rbad$ to $\Rgood$.

3665:          This provides the rate constraints on the conference auxiliary variables.

3666: %\end{proof}

3667:

3668:

3669:

3670:

3671:

3672:

3673: \begin{comment}

3674:

3675:

3676:            \section{The Gaussian Relay Channel}

3677:            \label{sec:Gauss_appendix}

3678:

3679:            \subsection{The Motivating Example}

3680:

3681:            \begin{eqnarray*}

3682:                Y_1 & = & \sqrt{g}X + N_1\\

3683:                Y   & = & X + N_2

3684:            \end{eqnarray*}

3685:            let

3686:            \[

3687:                \hY_1 = Y_1 + \nQ = \sqrt{g}X + N_1 + \nQ.

3688:            \]

3689:            Where for this scenario we have $Y \sim \mathcal{NC}(0,(1+P))$,

3690:            $\left(\begin{array}{c} y_1 \\ y \end{array} \right) = \left(\begin{array}{c} \sqrt{g}X + N_1 \\ X + N_2 \end{array} \right) \sim

3691:            \mathcal{NC}\left(\left[\begin{array}{c} 0 \\ 0 \end{array} \right],\left[\begin{array}{cc} gP+1 & \sqrt{g}P \\ \sqrt{g}P & P+1 \end{array} \right] \right)$

3692:            \begin{eqnarray*}

3693:                I(X;Y) & = & h(Y) - h(Y|X)\\

3694:                       & = & \log(1+P) - h(N_2)\\

3695:                       & = & \log(1+P) - \log(1)\\

3696:                       & = & \log(1+P)\\

3697:                I(X;Y_1|Y) & = & h(Y_1|Y) - h(Y_1|Y,X)\\

3698:                           & = & h(Y_1|Y) - h(\sqrt{g}X+N_2|X,X+N_1)\\

3699:                           & = & h(Y_1|Y) - h(N_2|X,N_1)\\

3700:                           & = & h(Y_1|Y) - h(N_2)\\

3701:                           & = & h(Y_1|Y)\\

3702:                h(Y_1|Y)   & = & h(Y_1,Y) - h(Y)\\

3703:                           & = & \log((gP+1)(P+1)-gP^2) - \log(1+P)\\

3704:                           & = & \log(P+1 + gP^2+gP-gP^2) - \log(1+P)\\

3705:                           & = & \log\left(1 +\frac{gP}{1+P}\right)

3706:            \end{eqnarray*}

3707:            \begin{eqnarray*}

3708:                I(Y_1;\hY_1|Y)  & = & h(\hY_1|Y) - h(\hY_1|Y,Y_1)\\

3709:                                & = & h(\sqrt{g}X + N_1 + \nQ|X + N_2) - h(h(Y_1 + \nQ|Y,Y_1)\\

3710:                                & = & h(\sqrt{g}X + N_1 + \nQ,X + N_2) - h(X + N_2)) - h(\nQ|Y,Y_1)\\

3711:                                & = & \log\left((gP + 1 + \sigQ)(P+1) - \left(\sqrt{g}P\right)^2 \right) - \log(P+1) - h(\sigQ)\\

3712:                                & = & \log\left(gP^2 + gP + (1 + \sigQ)(P+1) - gP^2 \right) - \log(P+1) - h(\sigQ)\\

3713:                                & = & \log\left(gP + (1 + \sigQ)(P+1)  \right) - \log(P+1) - h(\sigQ)\\

3714:                                & = & \log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}  \right)\\

3715:            \end{eqnarray*}

3716:            \begin{eqnarray*}

3717:                I(X;\hY_1|Y)    & = & h(\hY_1|Y) - h(\hY_1|Y,X)\\

3718:                                & = & \log\left(gP + (1 + \sigQ)(P+1)  \right) - \log(P+1) - h(\sqrt{g}X + N_1 + \nQ|X + N_2,X)\\

3719:                                & = & \log\left(1 + \sigQ + \frac{gP}{P+1}  \right)  - h( N_1 + \nQ|N_2,X)\\

3720:                                & = & \log\left(1 + \sigQ + \frac{gP}{P+1}  \right)  - h( N_1 + \nQ)\\

3721:                                & = & \log\left(1 + \sigQ + \frac{gP}{P+1}  \right)  - \log(1+\sigQ)\\

3722:                                & = & \log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\\

3723:            \end{eqnarray*}

3724:

3725:

3726:

3727:            \subsection{A Motivating Example: The Noiseless Relay Case}

3728:            \label{sec:motivation_gauss}

3729:            We consider the noiseless relay scenario considered in \cite{Goldsmith:2006}. For this scenario the received

3730:            signals are described by

3731:            \begin{eqnarray*}

3732:                y_1 & = & \sqrt{g}x + n_1\\

3733:                y   & = & x + n_2

3734:            \end{eqnarray*}

3735:            where $x,y_1,y,n_1,n_2 \in \mathbb{C}$, $g \in \mathbb{R}_+$,

3736:            \[  \left(

3737:                    \begin{array}{c}

3738:                        n_1 \\ n_2

3739:                    \end{array}

3740:                \right) \sim

3741:                    \mathcal{NC} \left(

3742:                        \left[

3743:                            \begin{array}{c}

3744:                            0 \\ 0

3745:                          \end{array}

3746:                        \right],

3747:                        \left[

3748:                    \begin{array}{cc}

3749:                            1 & 0 \\ 0 & 1

3750:                    \end{array}

3751:                        \right]

3752:                    \right),

3753:            \]

3754:            and the transmitter has an average power constraint $E\left\{|x|^2\right\} \le P$. It is further assumed that

3755:            all the nodes have perfect channel state information (CSI). The receivers have noiseless conference links

3756:            between them, with capacities $\alpha C$  from the relay to the destination, and $(1-\alpha)C$ from the

3757:            destination to the relay, $\alpha \in [0,1]$. In \cite{Goldsmith:2006} two cooperation strategies are considers:

3758:            one shot cooperation ($\alpha = 1$), which is the standard relay scenario, and an iterative cooperation scheme in which

3759:             the receiver first sends a message to the relay and then the relay sends a message back to the receiver.

3760:            The results in \cite{Goldsmith:2006} for the upper bound ($C_{os,CS}$), one-shot DAF ($R_{os,DF}$),

3761:            one-shot EAF ($R_{os,CF}$) and iterative ($(R_i)$ conferences are summarized below:

3762:            \begin{eqnarray}

3763:                C_{os,CS} & = & \min\left\{\log(1+(1+g)P), \log(1+P) + C \right\}\\

3764:                R_{os,DF} & = & \min\left\{\log(1+gP), \log(1+P)+C \right\}\\

3765:                R_{os,CF} %& = & \log\left(1+P+ \frac{gP(2^C-1)(P+1)}{2^C(P+1) + gP} \right)\\

3766:                          & = & \log\left(1+P+ \frac{gP}{1+ \frac{1+ P + gP}{(2^C-1)(P+1)}} \right)\\

3767:                          & = & \log(1+P) + \log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)\\

3768:                R_i       & = & \max_{0 \le \alpha \le 1} \min \left\{\log\left(1+gP + \frac{P}{1+N_i} \right), \log(1+P) +\alpha C \right\},\\

3769:                          &   & N_i = \frac{(1+g)P+1}{(2^{(1-\alpha)C} - 1)(gP+1)}.

3770:            \end{eqnarray}

3771:            We now evaluate the expression of corollary \ref{corr:single_relay_TAF}  for this scenario, assuming

3772:            $X \sim \mathcal{NC}(0,P)$:

3773:            \begin{eqnarray*}

3774:                R_{os,TF}  & = & I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)} \right]^* I(X;\hY_1|X_1,Y)\\

3775:                           & = & I(X;Y) + \left[ \frac{C}{I(Y_1;\hY_1|Y)} \right]^* I(X;\hY_1|Y)\\

3776:                           & = & \log(1+P) + \min\left\{1,\frac{C}{I(Y_1;\hY_1|Y)}\right\}\left(h(\hY_1|Y) - h(\hY_1|Y,X)\right)\\

3777:                           & = & \log(1+P) + \min \left\{\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right) ,

3778:                           \frac{C}{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\right\}\\

3779:                           & = & \min \left\{\log\left(1+P + \frac{gP}{1 + \sigQ}  \right)

3780:                           ,\log(1+P) +\frac{C}{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\right\}\\

3781:            \end{eqnarray*}

3782:            We see that a one-shot TAF to be superior we need:

3783:            \[

3784:                \sigQ < \frac{1+ P + gP}{(2^C-1)(P+1)}

3785:            \]

3786:            but then

3787:            \begin{eqnarray*}

3788:                \log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right) & = &  \log\left(1 + \frac{1}{\sigQ}\left(\frac{(1+P+gP}{(P+1)}\right)\right)\\

3789:                        & > & \log \left( 1 + \frac{(2^C-1)(P+1)}{1+ P + gP}\left(\frac{(1+P+gP}{(P+1)}\right)\right)\\

3790:                        & = & \log\left(2^C\right)\\

3791:                        & = & C.

3792:            \end{eqnarray*}

3793:            Hence, when $\sigQ > \frac{1+ P + gP}{(2^C-1)(P+1)}$, then EAF is better than TAF. When $\sigQ = \frac{1+ P + gP}{(2^C-1)(P+1)}$

3794:            both expressions are equivalent. When $\sigQ < \frac{1+ P + gP}{(2^C-1)(P+1)}$, then first we have

3795:            \[

3796:                \log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right) >

3797:                    \log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)

3798:            \]

3799:            So, for TAF to be better than EAF we need:

3800:            \[

3801:                \frac{C}{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)

3802:                        > \log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)

3803:            \]

3804:            or

3805:            \[

3806:                \frac{C}{\log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)}

3807:                        > \frac{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}

3808:                            {\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)}

3809:            \]

3810:            we have that TAF is better than EAF.

3811:

3812:            \subsection{The General Gaussian Relay Channel}

3813:            The general form of the Gaussian relay channel is given by

3814:            \begin{eqnarray}

3815:                Y   & = & h X + h_0 X_1 + N,\\

3816:                Y_1 & = & h_1 X + N_1,

3817:            \end{eqnarray}

3818:            where $h$, $h_0$ and $h_1$ are fixed know channel gains, $N_1 \sim \mathcal{NC}(0,\sigR)$,

3819:            $N \sim \mathcal{NC}(0,\sigD)$, independent of $N_1$, $E\left\{|X^2|\right\} \le P$ and

3820:            $E\left\{|X_1^2|\right\} \le P_1$.

3821:

3822:            \subsection{TAF for the Gaussian Case}

3823:            Consider the following assignment of the auxiliary random variable of theorem \ref{thm:CEG_EAF}:

3824:            \begin{equation}

3825:                p(\hY_1|Y_1,X_1) = \left\{

3826:                        \begin{array}{cl}

3827:                            q &, \hY_1 = Y_1 + \nQ\\

3828:                            1-q & ,\hY_1 = \nQ

3829:                        \end{array}

3830:                    \right.

3831:            \end{equation}

3832:            where $\nQ \sim \mathcal{NC}(0,\sigQ)$ is independent of all other variables.

3833:            Under this assignment the feasibility condition of \eqref{eqn:EAF_feasible}:

3834:            \begin{eqnarray*}

3835:                I(X_1;Y) & \ge & I(\hY_1;Y_1|X_1,Y) \\

3836:                         & = &   H(Y_1|X_1,Y) - H(Y_1|X_1,Y,\hY_1) \\

3837:                         & = &   H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y,N_Q) - q H(Y_1|X_1,Y,Y_1+\nQ)\\

3838:                         & = &   H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y) - q H(Y_1|X_1,Y,Y_1+\nQ)\\

3839:                         & = &   q (H(Y_1|X_1,Y) - H(Y_1|X_1,Y,Y_1+\nQ))\\

3840:                         & = &   q \Bigg(

3841:                                \log \left(\frac{(h_1^2\hP + \sigR ) (h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2}{h^2 \hP + \sigD }\right)\\

3842:                         &   & \qquad  - \log \left( \frac{\left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2\right)\sigQ}

3843:                                        {(h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ}\right)\Bigg)\\

3844:                         & = &   q   \log \left( \frac{(h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ}

3845:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)\\

3846:                         & = &   q   \log \left( 1 + \frac{ (h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 }

3847:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)\\

3848:                         & = &   q   \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }

3849:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)

3850:            \end{eqnarray*}

3851:            Hence

3852:            \begin{eqnarray*}

3853:                q \le \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }

3854:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)}.

3855:            \end{eqnarray*}

3856:            Combining with the constraint $q \le 1$ we obtain

3857:            \begin{equation}

3858:                q \le \left[\frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }

3859:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)}\right]^*.

3860:            \end{equation}

3861:            and the rate expression becomes

3862:            \begin{eqnarray*}

3863:                R & \le & I(X;Y_1,\hY_1|X_1)\\

3864:                    & = & I(X;Y|X_1) + I(X; \hY_1|X_1,Y)\\

3865:                    & = & I(X;Y|X_1) + H(X| X_1,Y) - H(X|X_1,Y,\hY_1)\\

3866:                    & = & I(X;Y|X_1) + H(X| X_1,Y) - (1-q) H(X|X_1,Y,\nQ) - q H(X|X_1,Y,Y_1)\\

3867:                    & = & I(X;Y|X_1) + H(X| X_1,Y) - (1-q) H(X|X_1,Y) - q H(X|X_1,Y,Y_1+\nQ)\\

3868:                    & = & I(X;Y|X_1) + q I(X;Y_1 + \nQ|X_1,Y)

3869:            \end{eqnarray*}

3870:            hence we want to maximize $q$.

3871:

3872:            Assuming $X \sim \mathcal{NC}(0,\hP)$ and $X_1 \sim \mathcal{NC}(0,\hP_1)$, $X$ and $X_1$ independent,

3873:            we can evaluate the expression of corollary \ref{corr:single_relay_TAF} as follows:

3874:            \begin{eqnarray*}

3875:                R_{TF} & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) + q\log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)\\

3876:                       & = &  \min\left\{\log\left(1 + \frac{h^2\hP}{\sigD}\right) +\log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right),\right.\\

3877:                       &   & \left.    \log\left(1 + \frac{h^2\hP}{\sigD}\right) + \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)

3878:                                \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }

3879:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)} \right\}\\

3880:                       & = &  \min\left\{\log\left(1 + \frac{h^2\hP}{\sigD} + \frac{ h_1^2 \hP}{(\sigR + \sigQ)}\right),\right.\\

3881:                       &   & \left.    \log\left(1 + \frac{h^2\hP}{\sigD}\right) + \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)

3882:                                \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }

3883:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)} \right\}

3884:            \end{eqnarray*}

3885:            \[

3886:                R_{Gauss} = \log\left(1 + \frac{h^2\hP}{\sigD} +  \frac{h_1^2 \hP}{\left(\sigR+\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}\right)}\right)

3887:            \]

3888:            Let $h_0^2\hP_1$ be large enough such that $q = 1$.

3889:            Then if

3890:            \[

3891:                \sigQ < \frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}

3892:            \]

3893:            But, for $q = 1$ we need:

3894:            \begin{eqnarray*}

3895:                 \frac{h_0^2 \hP_1}{h^2\hP + \sigD} & > & \frac{\left(h^2 \hP + \sigD\right)\sigR + h_1^2 \hP \sigD }{\left(h^2 \hP + \sigD\right)\sigQ}\\

3896:                 \sigQ & > & \frac{\sigR (h^2 \hP + \sigD) + h_1^2 \hP \sigD }{h_0^2 \hP_1}

3897:            \end{eqnarray*}

3898:            So this does not work.

3899:            Therefore we conclude that if $\sigQ  >  \frac{\sigR (h^2 \hP + \sigD) + h_1^2 \hP \sigD }{h_0^2 \hP_1}$

3900:            then $R_{Gauss} > R_{TF}$, and if $\sigQ  =  \frac{\sigR (h^2 \hP + \sigD) + h_1^2 \hP \sigD }{h_0^2 \hP_1}$ then

3901:            $R_{Gauss} = R_{TF}$. Now consider

3902:            \[

3903:                    \sigQ < \frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}

3904:            \]

3905:            compare $R_{TF}$ with

3906:            \[

3907:              R_{Gauss} =   \log\left(1 + \frac{h^2\hP}{\sigD}\right)

3908:                            + \log \left( 1 + \frac{h_1^2 \hP\sigD}{(\sigR+\sigW)(h^2\hP + \sigD)}\right)

3909:            \]

3910:            so we need

3911:            \[

3912:                \log \left( 1 + \frac{h_1^2 \hP\sigD}{\left(\sigR+\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}\right)(h^2\hP + \sigD)}\right) <

3913:                  \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)

3914:                                \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }

3915:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)}

3916:            \]

3917:

3918:            %\begin{eqnarray*}

3919:            %    R_{TF}  & = & I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{H(Y_1|X_1,Y)} \right]^* I(X;Y_1|X_1,Y)\\

3920:            %            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) + \min\left\{1,\frac{I(X_1;Y)}{H(Y_1|X_1,Y)}\right\}I(X;Y_1|X_1,Y)\\

3921:            %            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) +\min\left\{ \log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)- \log(\sigR),

3922:            %                \frac{I(X_1;Y)}{h(Y_1|X_1,Y)}(h(Y_1|X_1,Y) - h(Y_1|X,X_1,Y))\right\}\\

3923:            %            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) +\min\left\{ \log \left(1 + \frac{h_1^2\hP \sigD}{\sigR(h^2 \hP + \sigD) }\right),

3924:            %                \log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)

3925:            %                    - \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{\log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)} \log(\sigR)\right\}

3926:            %\end{eqnarray*}

3927:

3928:            $\left(\begin{array}{c}

3929:                    h_1 X + N_1 \\ h X  + N \\ h_1 X + N_1 + \nQ

3930:                \end{array}\right) \sim \left(\left[

3931:                                                \begin{array}{c}

3932:                                                    0 \\ 0 \\ 0

3933:                                                \end{array}

3934:                                                \right], \left[

3935:                                                            \begin{array}{ccc}

3936:                                                                h_1^2\hP + \sigR & h_1 h \hP       & h_1^2\hP + \sigR\\

3937:                                                                h_1 h \hP        & h^2 \hP + \sigD & h h_1 \hP\\

3938:                                                                h_1^2\hP + \sigR & h_1 h \hP       & h_1^2 \hP + \sigR + \sigQ

3939:                                                            \end{array}

3940:                                                            \right] \right)$

3941:            \begin{eqnarray*}

3942:                h(h_1 X + N_1, h X  + N, h_1 X + N_1 + \nQ) & = & \log\Big((h_1^2\hP + \sigR)(h^2 \hP + \sigD)(h_1^2 \hP + \sigR + \sigQ)

3943:                            + 2(h_1 h \hP)^2(h_1^2\hP + \sigR) \\

3944:                            &  & - (h_1^2\hP + \sigR)^2(h^2 \hP + \sigD) - (h_1 h \hP)^2(h_1^2\hP + \sigR)\\

3945:                            &  & - (h_1 h \hP)^2(h_1^2 \hP + \sigR + \sigQ)\Big)\\

3946:                            & = & \log\left(\left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - (h_1 h \hP)^2\right)\sigQ\right)\\

3947:                h(h X  + N, h_1 X + N_1 + \nQ) & = & \log\left((h^2 \hP + \sigD)(h_1^2 \hP + \sigR + \sigQ) - (h h_1 \hP)^2\right)\\

3948:                                               & = & \log\left((h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ\right)\\

3949:                h(Y_1|X_1,Y,Y_1+\nQ) & = & h(h_1 X + N_1 | X_1, h X + h_0 X_1 + N, h_1 X + N_1 + \nQ)\\

3950:                                     & = & h(h_1 X + N_1 | X_1, h X  + N, h_1 X + N_1 + \nQ)\\

3951:                                     & = & h(h_1 X + N_1 | h X  + N, h_1 X + N_1 + \nQ)\\

3952:                                     & = & h(h_1 X + N_1, h X  + N, h_1 X + N_1 + \nQ) - h(h X  + N, h_1 X + N_1 + \nQ)\\

3953:                                     & = & \log \left( \frac{\left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - (h_1 h \hP)^2\right)\sigQ}

3954:                                        {(h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ}\right)

3955:            \end{eqnarray*}

3956:            \begin{eqnarray*}

3957:                I(X;Y_1 + \nQ|X_1,Y)    & = & h(Y_1 + \nQ|X_1,Y) - h(Y_1 + \nQ|X,X_1,Y)\\

3958:                                        & = & \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)\\

3959:                    h(Y_1 + \nQ|X_1,Y)  & = & h(h_1 X + N_1 + \nQ|X_1,h X + h_0 X_1 + N)\\

3960:                                        & = & h(h_1 X + N_1 + \nQ|X_1,h X  + N)\\

3961:                                        & = & h(h_1 X + N_1 + \nQ|h X  + N)\\

3962:                                        & = & h(h_1 X + N_1 + \nQ,h X  + N) - h(h X  + N)\\

3963:                                        & = & \log\left((h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ\right)-\log(h^2\hP+\sigD)\\

3964:                                        & = & \log\left( \frac{\sigD h_1^2 \hP    + (h^2 \hP + \sigD)(\sigQ + \sigR)}{h^2\hP+\sigD}\right)\\

3965:                                        & = & \log\left( \sigQ + \sigR + \frac{\sigD h_1^2 \hP}{h^2\hP+\sigD}\right)\\

3966:                h(Y_1 + \nQ|X,X_1,Y)    & = & h(h_1 X + N_1 + \nQ|X_1,h X + h_0 X_1 + N,X)\\

3967:                                        & = & h( N_1 + \nQ|X_1,  N,X)\\

3968:                                        & = & h( N_1 + \nQ)\\

3969:                                        & = & \log(\sigR + \sigQ)

3970:            \end{eqnarray*}

3971:

3972:            \begin{eqnarray*}

3973:                h(Y|X_1)      & = & h(h X + h_0 X_1 + N|X_1)\\

3974:                            & = & h(h X + N|X_1)\\

3975:                            & = & h(h X + N)\\

3976:                            & = & \log(h^2\hP + \sigD)\\

3977:                h(Y|X_1,X)  & = & h(h X + h_0 X_1 + N|X_1,X)\\

3978:                            & = & h(N|X_1,X)\\

3979:                            & = & h(N)\\

3980:                            & = & \log(\sigD)\\

3981:                I(X;Y|X_1)  & = & h(Y|X_1) - h(Y|X_1,X)\\

3982:                            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right)\\

3983:                I(X_1;Y)    & = & h(Y) - h(Y|X_1)\\

3984:                            & = & \log(h^2 \hP + h_0^2 \hP_1 + \sigD) - \log(h^2\hP + \sigD)\\

3985:                            & = & \log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)\\

3986:            \end{eqnarray*}

3987:            \begin{eqnarray*}

3988:                h(Y_1|X_1,Y)& = & h(h_1 X + N_1 | X_1, h X + h_0 X_1 + N)\\

3989:                            & = & h(h_1 X + N_1 | X_1, h X + N)\\

3990:                            & = & h(h_1 X + N_1 | h X + N)\\

3991:                            & = & h(h_1 X + N_1 , h X + N) - h( h X + N)\\

3992:                            & = & \log \left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2\right) - \log\left(h^2 \hP + \sigD \right)\\

3993:                            & = & \log \left(\frac{h_1^2\hP(h^2 \hP + \sigD) + \sigR(h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2}{h^2 \hP + \sigD }\right)\\

3994:                            & = & \log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)\\

3995:             I(X;Y_1|X_1,Y) & = & h(Y_1|X_1,Y)- h(Y_1|X,X_1,Y)\\

3996:                            & = & h(Y_1|X_1,Y)- h(N_1|X,X_1, N)\\

3997:                            & = & h(Y_1|X_1,Y)- h(N_1)\\

3998:                            & = & \log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)- \log(\sigR).

3999:            \end{eqnarray*}

4000:

4001:            $\left(\begin{array}{c} h_1 X + N_1 \\ h X + N \end{array} \right)  \sim

4002:            \mathcal{NC}\left(\left[\begin{array}{c} 0 \\ 0 \end{array} \right],\left[\begin{array}{cc} h_1^2\hP + \sigR & h_1 h \hP \\ h_1 h \hP & h^2 \hP + \sigD\end{array} \right] \right)$

4003:

4004:            The standard application of EAF for the Gaussian channel uses the assignment

4005:            \[

4006:                \hY_1 = Y_1 + W = h_1 X + N_1 + W, \qquad W \sim \mathcal{NC}(0,\sigW).

4007:            \]

4008:            The rate expression is given by:

4009:            \begin{eqnarray}

4010:                \label{eqn:R_gauss_exp1}

4011:                R   & = & I(X;Y,\hY_1|X_1)\nonumber\\

4012:                    & = & I(X;Y|X_1) + I(X;\hY_1|X_1,Y)\nonumber\\

4013:                    & = & I(X;Y|X_1) + h(\hY_1|X_1,Y) - h(\hY_1|X,X_1,Y)\nonumber\\

4014:                    & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right)

4015:                            + \log \left( 1 + \frac{h_1^2 \hP\sigD}{(\sigR+\sigW)(h^2\hP + \sigD)}\right)\nonumber\\

4016:                    & = & \log\left(1 + \frac{h^2\hP}{\sigD} +  \frac{h_1^2 \hP}{(\sigR+\sigW)}\right)

4017:            \end{eqnarray}

4018:            subject to

4019:            \begin{eqnarray*}

4020:                I(X_1;Y) = \log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right) & \ge & I(\hY_1;Y_1|X_1,Y)\\

4021:                    & = & h(\hY_1|X_1,Y) - h(\hY_1|Y_1,X_1,Y)\\

4022:                    & = & \log \left( \sigR + \sigW + \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD} \right) - \log(\sigW)\\

4023:                    & = & \log \left( 1 + \frac{\sigR}{\sigW} + \frac{h_1^2 \hP\sigD}{\sigW(h^2\hP + \sigD)} \right)

4024:            \end{eqnarray*}

4025:            \begin{eqnarray}

4026:                \label{eqn:sigW_cond}

4027:                1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \ge  1 + \frac{\sigR}{\sigW} + \frac{h_1^2 \hP\sigD}{\sigW(h^2\hP + \sigD)}\nonumber\\

4028:                \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \ge  \frac{1}{\sigW} \left(\sigR+ \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD}\right)\nonumber\\

4029:                \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \ge  \frac{1}{\sigW} \left(\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h^2\hP + \sigD}\right)\nonumber\\

4030:                \sigW \ge   \frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}.

4031:            \end{eqnarray}

4032:            Maximizing the rate implies making $\sigW$ as small as possible, i.e. use equality in \eqref{eqn:sigW_cond}. Plugging this back

4033:            into \eqref{eqn:R_gauss_exp1} yields

4034:            \[

4035:                R = \log\left(1 + \frac{h^2\hP}{\sigD} +  \frac{h_1^2 \hP}{\left(\sigR+\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}\right)}\right)

4036:            \]

4037:

4038:

4039:            \begin{eqnarray*}

4040:                h(\hY_1|X,X_1,Y)    & = & h(h_1 X + N_1 + W|X,X_1,h X + h_0 X_1 + N)\\

4041:                                    & = & h(N_1 + W|X,X_1,N)\\

4042:                                    & = & h(N_1 + W)\\

4043:                                    & = & \log(\sigR+\sigW)\\

4044:                h(\hY_1|X_1,Y)      & = & h(h_1 X + N_1 + W|X_1,h X + h_0 X_1 + N)\\

4045:                                    & = & h(h_1 X + N_1 + W|X_1,h X + N)\\

4046:                                    & = & h(h_1 X + N_1 + W|h X + N)\\

4047:                                    & = & h(h_1 X + N_1 + W,h X + N) - h(h X + N)\\

4048:                                    & = & \log \left( (h_1^2 \hP + \sigR + \sigW)(h^2\hP + \sigD)- \left(h h_1 \hP\right)^2\right)

4049:                                             - \log(h^2\hP + \sigD) \\

4050:                                    & = & \log \left( h_1^2 \hP\sigD + (\sigR + \sigW)(h^2\hP + \sigD) \right)

4051:                                             - \log(h^2\hP + \sigD) \\

4052:                                    & = & \log \left( \sigR + \sigW + \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD} \right)\\

4053:                I(X;\hY_1|X_1,Y)    & = & I(X;\hY_1|X_1,Y)\\

4054:                                    & = & \log \left( \sigR + \sigW + \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD} \right) - \log(\sigR+\sigW)\\

4055:                                    & = & \log \left( 1 + \frac{h_1^2 \hP\sigD}{(\sigR+\sigW)(h^2\hP + \sigD)} \right)\\

4056:                h(\hY_1|Y_1,X_1,Y)  & = & h(h_1 X + N_1 + W|h_1 X + N_1,X_1,h X + h_0 X_1 + N)\\

4057:                                    & = & h(h_1 X + N_1 + W|h_1 X + N_1,X_1,h X  + N)\\

4058:                                    & = & h(W|h_1 X + N_1,X_1,h X  + N)\\

4059:                                    & = & h(W)\\

4060:                                    & = & \log(\sigW)

4061:            \end{eqnarray*}

4062:            $\left(\begin{array}{c} h_1 X + N_1 + W \\ h X + N \end{array} \right) \sim

4063:            \mathcal{NC}\left(\left[\begin{array}{c} 0 \\ 0 \end{array} \right],

4064:                \left[\begin{array}{cc} h_1^2 P + \sigR + \sigW & h h_1 P \\ h h_1 P & h^2P + \sigD \end{array} \right] \right)$

4065:

4066: \end{comment}

4067:

4068:

4069:

4070:

4071:

4072:

4073:

4074:

4075:

4076:

4077:

4078:

4079:

4080:

4081: \begin{comment}

4082:

4083:            \setcounter{equation}{0}

4084:

4085:            \section{Proof of Theorem \ref{thm:achieve-three-steps}}

4086:            \label{sec:three-steps-proof}

4087:                \subsection{Rate Bounds for the Three-Step Conference}

4088:                \label{sec:three-steps}

4089:                \subsubsection{Codebooks Construction and Conference Steps}

4090:                Fix $p_X(x)$ on $\mX$, $\eps > 0$, $\delta > 0$ and the block length $n$. Let $0 < \alpha \le 1$ and

4091:                \begin{equation}

4092:                \label{eqn:def_m_coop_BC}

4093:                    m = \min \left\{n,  \left\lfloor\frac{n C_{21}^a }{H(Y_2|Y_1) + \delta}\right\rfloor\right\}.

4094:                \end{equation}

4095:                Let $K = \frac{m}{n}$,

4096:                and $K_{\eps} \triangleq \!\left[\frac{C_{21}^a}{H(Y_2|Y_1)+\delta}\right]^*\! - \eps$.

4097:                Increase $n$  s.t. $K > K_{\eps}$. Note that $K \le 1$.

4098:                    \paragraph{Codebook Generation and Encoding at the Transmitter}

4099:                        The transmitter generates $2^{nR}$ i.i.d. codewords $\xvec$ of length $n$ such that

4100:                        $p(\xvec(w)) = \prod_{l=1}^n p_X(x_l(w))$,

4101:                        $w \in \mW = \left\{1,2,...,2^{nR}\right\}$.

4102:                        For transmitting the message $w_i$ at time $i$, the transmitter outputs the corresponding channel

4103:                        codeword $\xvec(w_i)$.

4104:                    \paragraph{Relay Codebook Generation at $\Rgood$}

4105:                        First define

4106:                        \[

4107:                            \btyp(Y_1,\yvec_2^m) = \bigcup_{\yvec_1^m \in A_{\delta}^{(m)}(Y_1|\yvec_2^m)} \ttyp(Y_1,\yvec_1^m).

4108:                        \]

4109:                        Now, for each $\yvec_2^m \in \typm(Y_2)$, $\Rgood$ partitions the set $\btyp(Y_1,\yvec_2^m)$

4110:                        into $2^{n C_{12}}$ subsets

4111:                        $S_k(\yvec_2^m)$, $k \in \big\{1,2,...,2^{nC_{12}}\big\}$,

4112:                        in a uniform and independent manner.

4113:                    \paragraph{Relay Codebook Generation at $\Rbad$}

4114:                        \begin{itemize}

4115:                            \item For the first conference step, $\Rbad$ simply enumerates all the sequences in the

4116:                                set $\typm(Y_2)$ with an index $l \in \left\{1,2,...,2^{n R_{21}^a}\right\}$. This can be done

4117:                                when

4118:                                \[

4119:                                 ||\typm(Y_2)|| \le 2^{nR_{21}^a}.

4120:                                \]

4121:

4122:                                Now $\Rbad$ partitions the set of $\typm(Y_2)$ into $2^{nC_{21}^a}$ subsets

4123:                                in a uniform and independent manner. Denote these subsets with $B_k$, $k \in

4124:                                \left\{1,2,...,2^{nC_{21}^a}\right\}$.

4125:                            \item For the third conference step, $\Rbad$ partitions the message set $\mW$ into $2^{n C_{21}^b}$

4126:                            subsets, $B_{k'}$, $k' \in \big\{1,2,...,2^{n C_{21}^b} \big\}$,

4127:                            in a uniform and independent manner.

4128:

4129:                        \end{itemize}

4130:                    \paragraph{Encoding at $\Rbad$ at the First Conference Step (time $i+1$)}

4131:                        Upon receiving the

4132:                        channel output $\yvec_2(i)$, $\Rbad$ considers its first $m$ symbols denoted $\yvec_2^m(i)$. $\Rbad$

4133:                        then finds the index $l_i$ of the partition $B_{l_i}$ that contains the received $\yvec_2^m(i)$

4134:                        in $\typm(Y_2)$ and sends it to $\Rgood$ through the conference link.

4135:                    \paragraph{Encoding at $\Rgood$ at the Second Conference Step (time $i+2$)}

4136:                      From $l_i$ $\Rgood$ knows the set $B_{l_i}$ into which $\yvec_2(i)$ belongs. $\Rgood$ now

4137:                      looks for a sequence $\yvec_2^m$ such that $\yvec_2^m \in \typm(Y_2|\yvec_1^m(i)) \bigcap B_{l_i}.$

4138:                        If there is none or there is more than one, an error is declared.

4139:                       Now $\Rgood$ knows $\yvec_2^m(i)$, therefore $\Rgood$

4140:                        can construct the set $A_{\delta}^{(m)}(Y_1|\yvec_2^m(i))$ and thus $\btyp(Y_1,\yvec_2^m(i))$.

4141:            %            knows $\btyp(Y_1,\yvec_2^m(i))$.

4142:                        $\Rgood$ then looks for the partition of $\btyp(Y_1,\yvec_2^m(i))$

4143:                         into which $\yvec_1(i)$ belongs, denoted $S_{k(i)}$, and

4144:                        transmits its index $k(i)$ to $\Rbad$.

4145:                    \paragraph{Decoding at $\Rbad$ (time $i+2$)}

4146:                    \label{sec:decoding-at-Rx2}

4147:                        Note that $\Rbad$ knows $\btyp(Y_1,\yvec_2^m(i))$, so both $\Rgood$ and $\Rbad$ can refer to the same set.

4148:                        Now, $\Rbad$ generates the set $\mL_2(i) = \left\{ w \in \mW : (\xvec(w), \yvec_2(i)) \in \typ(X,Y_2)\right\}$.

4149:                        Then, $\Rbad$ looks for a unique message $\hw \in \mL_2(i)$, such that

4150:                        $\left(\xvec(\hw),\yvec_1,\yvec_2(i)\right) \in \typ(X,Y_1,Y_2)$ for at least one $\yvec_1 \in S_{k(i)}$.

4151:                    \paragraph{Encoding at $\Rbad$ at the Third Conference Step (time $i+3$)}

4152:                        $\Rbad$ looks for the partition $B_{k'(i)}$ of $\mW$ into which the decoded message $\hw$ belongs.

4153:                        $\Rbad$ then transmits $k'(i)$ to $\Rgood$ through the conference link.

4154:                        % through the conference link.

4155:                    \paragraph{Decoding at $\Rgood$ (time $i+3$)}

4156:                    \label{sec:three-steps-decoder-1}

4157:                        $\Rgood$ generates the set\\

4158:                        $\tmL_1(i) = \Big\{ w \in \mW : (\xvec^m(w), \yvec_1^m(i)) \in \typm(X,Y_1), \;

4159:                            (\xvec(w), \yvec_1(i)) \in \typ(X,Y_1)\Big\}$. Then $\Rgood$ looks

4160:                        for a unique message $\hw \in \tmL_1(i)$ such that $\hw \in B_{k'(i)}$ and

4161:                        $\left(\xvec^m(\hw),\yvec_1^m(i),\yvec_2^m(i)\right) \in \typm(X,Y_1,Y_2)$.

4162:

4163:                \subsubsection{Error Events}

4164:                \label{sec:three-steps-error-events}

4165:                    For the scheme defined above we have the following error events, for decoding the message transmitted at

4166:                    time $i$:

4167:                    \paragraph{Joint Typicality Decoding Fails}

4168:                        $E_0 = E_0' \bigcup E_0''$,\\

4169:                        $E_0' = \left\{\left(\xvec(w_i),\yvec_1(i),\yvec_2(i) \right) \notin \typ(X,Y_1,Y_2) \right\}$,\\

4170:                        $E_0''= \left\{\left(\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)\right) \notin \typm(X,Y_1,Y_2) \right\}$.

4171:                    \paragraph{Encoding at $\Rbad$ at the First Step Fails} $\phantom{x}$\\

4172:                        $E_1' = \left\{\yvec_2^m(i) \notin \typm(Y_2) \right\}$.

4173:                    \paragraph{Encoding at $\Rgood$ at the Second Step Fails}$\phantom{xxxxx}$\\

4174:            %            $E_1'' = E_{1,a}'' \bigcup E_{1,b}''$,\\

4175:                        $E_{1,a}'' = \left\{ \yvec_2^m(i) \notin \typm(Y_2|\yvec_1^m(i)) \bigcap B_{l_i} \right\}$,\\

4176:                        $E_{1,b}'' = \left\{ \exists \yvec_2^m \ne \yvec_2^m(i), \yvec_2^m \in \typm(Y_2|\yvec_1^m(i)) \bigcap B_{l_i}\right\}$,\\

4177:                        $E_{1,c}'' = \left\{ \yvec_1^m(i) \notin \typm(Y_1|\yvec_2^m(i))\right\}$,\\

4178:                        $E_{1,d}'' = \left\{ \yvec_1(i)  \notin \btyp(Y_1,\yvec_2^m(i)) \right\}$.\\

4179:            %            $E_1 = E_1' \bigcup E_{1,a}'' \bigcup E_{1,b}''$.

4180:                        Let $E_1 = E_1' \bigcup E_{1,a}'' \bigcup E_{1,b}'' \bigcup E_{1,c}'' \bigcup E_{1,d}''$.

4181:                    \paragraph{Decoding at $\Rbad$ Fails} $E_2 = E_2' \bigcup E_2'' \bigcup E_2'''$, \\

4182:                        $E_2'  = \Big\{w_i \notin \mL_2(i) \Big\}$,\\

4183:                        $E_2'' =\left\{\nexists \yvec_1 \mbox{$\in$} S_{k(i)} \mbox{ s.t. } (\xvec(w_i),\yvec_1, \yvec_2(i))

4184:                            \mbox{$\in$} \typ(X,Y_1,Y_2)\right\}$,\\

4185:                        $E_2''' = \Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},

4186:                             (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)\Big\}$.

4187:                    \paragraph{Decoding at $\Rgood$ Fails} $E_3 = E_3' \bigcup E_3'' \bigcup E_3'''$,\\

4188:                        $E_3' = \Big\{w_i \notin B_{k'(i)} \bigcap \tmL_1(i) \Big\}$,\\

4189:                        $E_3'' = \left\{\left(\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)\right) \notin \typm(X,Y_1,Y_2) \right\}$,\\

4190:                        $E_3''' = \Big\{\exists w \ne w_i, w \in  B_{k'(i)}, w \in \tmL_1(i),

4191:                            (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i)) \in \typm(X,Y_1,Y_2)\Big\}$.

4192:

4193:                \subsubsection{Bounding the Probability of Error Events}

4194:                \label{sec:three-steps-error-bounds}

4195:                    The average probability of error can be bounded by

4196:                    \begin{eqnarray*}

4197:                        \Pe & \le & \Pr\left(\bigcup_{l=0}^3 E_l\right)\\

4198:                            &  =  & \sum_{l=0}^3 \Pr\left(E_l  \bigcap_{k=0}^{l-1} E_k^c\right).

4199:                    \end{eqnarray*}

4200:                    \paragraph{Probability of $E_0$}

4201:                        By the AEP (see \cite[ch. 3]{cover-thomas:it-book}), taking $n$ large enough we can make $\Pr(E_0') \le \eps$

4202:                        and $\Pr(E_0'') \le \delta$ for any $\eps$, $\delta$ positive, arbitrarily small, since from equation

4203:                        \eqref{eqn:def_m_coop_BC} $m$ is increasing with $n$: $m \ge K_{\eps}n$.

4204:                        Thus, by the union bound $\Pr(E_0) \le \Pr(E_0') + \Pr(E_0'') \le \eps + \delta$ for $n$ large enough.

4205:

4206:                    \paragraph{Probability of $E_1 \bigcap E_0^c$}

4207:                        Note that $E_0^c = E_0'^c \bigcap E_0''^c$. Now, from the definition

4208:                        of jointly typical sets (see \cite[ch. 14.2]{cover-thomas:it-book}) it follows that

4209:                        $\Pr\left(E_1' \bigcap E_0''^c\right) = \Pr\left(E_{1,a}'' \bigcap E_0''^c\right) =

4210:                                \Pr\left(E_{1,c}'' \bigcap E_0''^c\right) = 0$, and

4211:                        $\Pr\left(E_{1,d}''\bigcap E_{1,c}''^c \bigcap E_0'^c \bigcap E_0''^c\right) = 0$.

4212:                        The last equality holds since $E_o'^c \bigcap E_0'^c \bigcap E_0''^c$ imply that

4213:                        $\yvec_1(i) \in \ttyp(Y_1,\yvec_1^m(i))$, and $E_{1,c}''^c$ implies correct decoding of

4214:                        $\yvec_2^m(i)$ at $\Rgood$.

4215:                        Following similar arguments to \cite[theorem 6]{cover-thomas:it-book} we conclude that

4216:                        taking $n$ large enough we can make

4217:                        $\Pr(E_{1,c}'' \bigcap E_0''^c) \le \eps$, as long as

4218:                        \begin{eqnarray}

4219:                             K_{\eps}  (H(Y_2|Y_1)   + 2\eps) & < & C_{12}^a\nonumber\\

4220:                            \label{eqn:K_eps_condition}

4221:                               \Rightarrow K_{\eps}   & < & \frac{C_{12}^a}{H(Y_2|Y_1)   + 2\eps}.

4222:                        \end{eqnarray}

4223:            %            Therefore by the union bound $\Pr\left(E_1 \bigcap E_0^c\right) = 0$.

4224:                        Note that \eqref{eqn:K_eps_condition} is satisfied when $\eps > \frac{\delta }{2}$.

4225:                        Hence, taking $n$ large enough we can make $\Pr\left(E_1 \bigcap E_0^c\right) \le \eps$.

4226:

4227:                    \paragraph{Probability of $E_2 \bigcap E_1^c \bigcap E_0^c$}

4228:                        $E_0^c$ implies that $(\xvec(w_i), \yvec_2(i)) \in \typ(X,Y_2)$, hence

4229:                        $\Pr\left(E_2' \bigcap E_0^c\right) = 0$.

4230:                        Next, $E_1^c \bigcap E_0^c$ implies that $\yvec_1(i) \in \btyp\left(Y_1,\yvec_2^m(i)\right)$, and hence $\yvec_1(i)$ is in some

4231:                        partition $S_{k(i)}$ of $\btyp\left(Y_1,\yvec_2^m(i)\right)$.

4232:                        %Since this is the partition transmitted to $\Rbad$ this implies that $\Pr\left(E_2'' \bigcap E_1^c \bigcap E_0^c\right) = 0$. We now calculate

4233:                        This implies that $\Pr\left(E_2'' \bigcap E_1^c \bigcap E_0^c\right) = 0$.

4234:            %            We now calculate $\Pr\left(E_2''' \bigcap E_1^c \bigcap E_0^c\right)$:

4235:                        Lastly, consider

4236:                        {\setlength\arraycolsep{0pt}

4237:                        \begin{eqnarray*}

4238:                        &  & \Pr\left(E_2''' \bigcap E_1^c \bigcap E_0^c\right)= \\

4239:            %                    = \Pr\left(E_{2,A}'''\right) + \Pr\left(E_{2,B}'''\right),\\

4240:                        &  & \qquad \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},

4241:                              (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)

4242:                                \Big\}\bigcap E_1^c \bigcap E_0^c\Big)\\

4243:                        &  &\qquad = \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, (\xvec(w), \yvec_1(i), \yvec_2(i)) \in \typ(X,Y_1,Y_2)

4244:                                 \Big\}\bigcap E_1^c \bigcap E_0^c\Big)\\

4245:                        &  & \qquad \quad  +\Pr\Big(\Big\{ \exists \yvec_1 \in S_{k(i)}, \yvec_1 \ne \yvec_1(i),\exists w \in \mL_2(i),

4246:                              w \ne w_i, (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big)\\

4247:                        &  & \triangleq \Pr\left(E_{2,A}'''\right) + \Pr\left(E_{2,B}'''\right).

4248:                        \end{eqnarray*}}

4249:                        First we note that from \cite[equation (36)]{CoverG:79} we have that

4250:                        \begin{equation}

4251:                        \label{equ:size_L2}

4252:                           E_{\yvec_2}\big\{||\mL_2(i)||\big\} \le 1 + 2^{n(R- I(X;Y_2) + 3\eps)}.

4253:                        \end{equation}

4254:            %             where $||\mA||$ denotes the cardinality of the set $\mA$.

4255:                         Now, consider $\Pr\left(E_{2,A}'''\right)$:

4256:            %            {\setlength\arraycolsep{0pt}

4257:            %            \begin{eqnarray*}

4258:            %            &  & \Pr\left(E_{2,A}'''\right)

4259:            %%            &  &\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i,  \\

4260:            %%            &  &  \phantom{xx}  (\xvec(w), \yvec_1(i), \yvec_2(i)) \in \typ(X,Y_1,Y_2)

4261:            %%                    \Big\}\bigcap E_1^c \bigcap E_0^c)\Big)\\

4262:            %%            &  & = \sum_{\mL_2(i)}\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, (\xvec(w), \yvec_1(i), \yvec_2(i))  \\

4263:            %%            &  &  \phantom{xxxx}  \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big| \mL_2(i)\Big)\Pr\big(\mL_2(i)\big)\\

4264:            %%             &  &\stackrel{(a)}{=} \sum_{\yvec_2(i)}

4265:            %%                    \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, (\xvec(w), \yvec_1(i), \yvec_2(i))  \\

4266:            %%            &  &  \phantom{xxxx}  \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big|

4267:            %%                                  \yvec_2(i)\Big)\Pr\big(\yvec_2(i)\big)\\

4268:            %             \stackrel{(a)}{=}  E_{\yvec_2}\bigg\{\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i,    \\

4269:            %            &  &    (\xvec(w), \yvec_1(i), \yvec_2(i)) \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big| \yvec_2(i)\Big)\bigg\}

4270:            %            \end{eqnarray*}

4271:            %%            \begin{eqnarray*}

4272:            %%%            &  & \stackrel{(b)}{\le} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}

4273:            %%%                                 \Pr\Big(\Big\{(\xvec(w), \yvec_1(i), \yvec_2(i))  \\

4274:            %%%            &  &  \phantom{xxxxxxxxx}  \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big| \yvec_2(i)\Big)\Bigg\}\\

4275:            %%%            &  & = E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}

4276:            %%%                                 \sum_{ \substack{\yvec_1(i) \in \\ \typ(Y_1|\xvec(w), \yvec_2(i))}}

4277:            %%%                                     \Pr (\yvec_1(i)|\xvec(w), \yvec_2(i)) \Bigg\}\\

4278:            %%            &  & \stackrel{(b)}{\le} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}

4279:            %%                                 \sum_{ \substack{\yvec_1(i) \in \\ \typ(Y_1|\xvec(w), \yvec_2(i))}}

4280:            %%                                     \Pr (\yvec_1(i)|\xvec(w), \yvec_2(i)) \Bigg\}\\

4281:            %%            &  & \stackrel{(c)}{=} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}

4282:            %%                                 \sum_{ \substack{\yvec_1(i) \in \\ \typ(Y_1|\xvec(w), \yvec_2(i))}}

4283:            %%                                     \Pr (\yvec_1(i)| \yvec_2(i))  \Bigg\}\\

4284:            %%            &  & \stackrel{(d)}{\le} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}} 2^{n (H(Y_1|X,Y_2) + 2\eps)}

4285:            %%                2^{-n(H(Y_1|Y_2) - 2\eps)} \Bigg\}\\

4286:            %%%            &  &  \le E_{\yvec_2}\Big\{ ||\mL_2(i)|| 2^{-n (I(X;Y_1|Y_2) - 4\eps)} \Big\}\\

4287:            %%%            &  &   = 2^{-n (I(X;Y_1|Y_2) - 4\eps)} E_{\yvec_2}\Big\{ ||\mL_2(i)|| \Big\}\\

4288:            %%            &  &   \le 2^{-n (I(X;Y_1|Y_2) - 4\eps)} E_{\yvec_2}\Big\{ ||\mL_2(i)|| \Big\}\\

4289:            %%            &  &  \stackrel{(e)}{\le} 2^{-n (I(X;Y_1|Y_2) - 4\eps)} \left( 1 + 2^{n(R - I(X;Y_2) + 3\eps)}\right)

4290:            %%%            &  &  = 2^{-n (I(X;Y_1|Y_2) - 4\eps)}  + 2^{n(R - I(X;Y_2)  -I(X;Y_1|Y_2) + 7\eps)}\\

4291:            %%%%            &  &  = 2^{-n (I(X;Y_1|Y_2) - 4\eps)}  + 2^{n(R - I(X;Y_1,Y_2)  + 7\eps)},

4292:            %%            \end{eqnarray*}

4293:            %            }

4294:            %            where (a) is because $\mL_2(i)$ is a deterministic function of $\yvec_2(i)$ (we denote

4295:            %            $\Pr(\yvec_2(i)) \triangleq \Pr(\yvec_2(i)|w_i \mbox{ transmitted})$).

4296:            %            %(b) follows from the

4297:            %            %union bound, (c) comes from the independence of $\yvec_1(i)$ and $\xvec(w)$, $w \ne w_i$, (d)

4298:            %            %follows from the properties of conditionally typical sets (see \cite[ch. 14.2]{cover-thomas:it-book}), and

4299:            %            %(e) follows the same lines as in \cite[equation (36)]{CoverG:79}.

4300:            %            %This can be made arbitrarily small as long as

4301:                        by the point-to-point channel capacity theorem (see \cite[theorem 8.7.1]{cover-thomas:it-book})

4302:                        $\Pr(E_{2,A}''')$ can be made arbitrarily small by taking $n$ large enough as long as

4303:                        \begin{equation}

4304:                        \label{equ:EA_rate_bound}

4305:            %                $R < I(X;Y_1,Y_2) - 7\eps$.

4306:                            R < I(X;Y_1,Y_2)-2\eps.

4307:                        \end{equation}

4308:                        Next, consider $\Pr\left(E_{2,B}'''\right)$:

4309:                        {\setlength\arraycolsep{0mm}

4310:                        \begin{eqnarray}

4311:            %            &  & \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},

4312:            %                    \yvec_1 \ne \yvec_1(i), \nonumber\\

4313:            %            &  &  \phantom{xx}  (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)

4314:            %                    \Big\}\bigcap E_1^c \bigcap E_0^c\Big) \nonumber\\

4315:                        &  & \Pr\left(E_{2,B}'''\right)

4316:                           = E_{\yvec_1,\yvec_2} \bigg\{\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i,

4317:                            \exists \yvec_1 \in S_{k(i)}, \yvec_1 \ne \yvec_1(i),  \nonumber\\

4318:                        &  &       \phantom{xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx}  (\xvec(w), \yvec_1, \yvec_2(i))\in  \typ(X,Y_1,Y_2) \Big\}\bigcap E_1^c \bigcap E_0^c\Big| \yvec_1(i), \yvec_2(i) \Big)\bigg\}\nonumber\\

4319:            %%%%%%%%%%%%%%%%%%%%%

4320:             %           &  & \le E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}

4321:             %                       \Pr\Big(\Big\{ \exists \yvec_1 \in S_{k(i)}, \yvec_1 \ne \yvec_1(i),\nonumber\\

4322:             %           &  &       \phantom{xxxxx}   (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2) \Big\}\nonumber\\

4323:             %           &  &       \phantom{xxxxxxxxx} \bigcap E_1^c \bigcap E_0^c\Big| \yvec_1(i), \yvec_2(i) \Big)\Bigg\}\nonumber\\

4324:                        &  & \stackrel{(a)}{\le} E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}

4325:                                    \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}}

4326:                                    \Pr\Big(\Big\{ (\xvec(w), \yvec_1, \yvec_2(i))   \in \typ(X,Y_1,Y_2) \Big\}

4327:                                \bigcap E_1^c \bigcap E_0^c\Big| \yvec_1(i), \yvec_2(i) \Big)\Bigg\}\nonumber\\

4328:            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%

4329:            %            &  & = E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}

4330:                        &  & = E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}

4331:                                    \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}}

4332:                 \sum_{\substack{\tilde{\yvec}_1 \in \typ(Y_1| \xvec(w),\yvec_2(i))}}

4333:                                            \Pr\big(\tilde{\yvec}_1|\xvec(w), \yvec_2(i)  \big)\Bigg\}\nonumber\\

4334:            %            &  & \stackrel{(a)}{=} E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}

4335:            %                        \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}}

4336:            %                    \sum_{\substack{\yvec_1 \in \\ \typ(Y_1|\xvec(w),\yvec_2(i))}} \Pr\big(\yvec_1 \big)\Bigg\}\nonumber\\

4337:                        &  & \stackrel{(b)}{\le}\!  E_{\yvec_1,\yvec_2} \Bigg\{\!\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}

4338:                                    \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}} 2^{n(H(Y_1|X,Y_2) + 2\eps)}

4339:            %            &  & \phantom{xxxxxxxxxxxxxxxxxxxxx}

4340:                        2^{-n(H(Y_1) - \eps)}\!\Bigg\}\nonumber\\

4341:            %            &  & \le  2^{-n(I(X,Y_2;Y_1) - 3\eps)} E_{\yvec_1,\yvec_2} \Big\{ ||\mL_2(i)|| \cdot ||S_{k(i)}||  \Big\}\nonumber\\

4342:            %            &  &  = 2^{-n(I(X,Y_2;Y_1) - 3\eps)} E_{\yvec_2}

4343:            %                    \bigg\{ E_{\yvec_1|\yvec_2} \Big\{ ||\mL_2(i)|| \cdot ||S_{k(i)}|| \Big\} \bigg\}\nonumber\\

4344:                        &  &  \le 2^{-n(I(X,Y_2;Y_1) - 3\eps)} E_{\yvec_2}

4345:                                \bigg\{ E_{\yvec_1|\yvec_2} \Big\{ ||\mL_2(i)|| \cdot ||S_{k(i)}|| \Big\} \bigg\}\nonumber\\

4346:                        \label{eqn:Rx2_rate_bound_2}

4347:                        &  &  = 2^{-n(I(X,Y_2;Y_1) - 3\eps)}

4348:                                E_{\yvec_2} \bigg\{ ||\mL_2(i)|| E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\} \bigg\},

4349:                        \end{eqnarray}}

4350:                        where (a) follows from the union bound

4351:                        and (b) is because $\yvec_1 \ne \yvec_1(i)$ is selected independently of $\xvec(w)$ and $\yvec_2(i)$

4352:                        and we also used the properties of typical sets, see \cite[ch. 14.2]{cover-thomas:it-book}. In the derivation

4353:                        above we used

4354:                        \[

4355:                            \Pr(\yvec_1(i),\yvec_2(i)) \triangleq \Pr((\yvec_1(i), \yvec_2(i)) \mbox{ recevied } | \xvec(w_i)

4356:                                \mbox{ transmitted}).

4357:                        \]

4358:

4359:                        Next, we bound $E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\}$:

4360:            %            From $E_1^c \bigcap E_0^c$ we have that the correct $\yvec_2^m(i)$ is available at $\Rgood$. Now, by

4361:                        By \cite[ch. 14.2]{cover-thomas:it-book}, we have that

4362:                        the size of the set $\typm(Y_1|\yvec_2^m(i))$ is at most

4363:                        $||\typm(Y_1|\yvec_2^m(i))|| \le 2^{m(H(Y_1|Y_2) + 2\delta)}$, for any

4364:                            $\yvec_2^m(i)$.

4365:                        For  each $\yvec_1^m \in \typm(Y_1|\yvec_2^m(i))$ we look for the vectors $\yvec_1 \in \typ(Y_1)$,

4366:                        such that their first $m$ elements equal to $\yvec_1^m$. Let us examine one such $\yvec_1$ vector:

4367:                        since this vector is $\eps$-typical then, by definition

4368:                        \begin{equation}

4369:                        \label{eqn:epsilon-typical-set}

4370:                            H(Y_1) - \eps \le -\frac{1}{n}\sum_{i=1}^n \log p_{Y_1}(y_{1,i}) \le H(Y_1) + \eps,

4371:                        \end{equation}

4372:                        and its first $m$ elements, by the definition of conditionally typical sets and jointly typical sets,

4373:                        satisfy

4374:                        \begin{equation}

4375:                        \label{eqn:delta-typical-set}

4376:                            H(Y_1) - \delta \le -\frac{1}{m}\sum_{i=1}^m \log p_{Y_1}(y_{1,i}) \le H(Y_1) + \delta.

4377:                        \end{equation}

4378:            %%            Therefore, we can write

4379:            %%            {\setlength\arraycolsep{0pt}

4380:            %%            \begin{eqnarray*}

4381:            %%                &  & \left|\frac{1}{n}\sum_{i=1}^n \log p(y_{1,i}) - H(Y_1)\right|  =  \\

4382:            %%%                &  & \quad    \left|\frac{1}{n}\sum_{i=1}^m \log p(y_{1,i}) + \frac{1}{n}\sum_{i=m+1}^n \log p(y_{1,i})-H(Y_1)\right|\\

4383:            %%                &  & \phantom{x}   \left|\frac{m}{n} \frac{1}{m}\sum_{i=1}^m \log p(y_{1,i}) +

4384:            %%                        \frac{1}{n}\sum_{i=m+1}^n \log p(y_{1,i}) - H(Y_1)\right| \le \eps,

4385:            %%            \end{eqnarray*}

4386:            %%            }

4387:            %%            hence,

4388:                        From  (\ref{eqn:epsilon-typical-set}) and \eqref{eqn:delta-typical-set} it follows that

4389:                        \begin{eqnarray*}

4390:                            -\frac{1}{n}\!\sum_{i=m+1}^n \!\!\log p_{Y_1}(y_{1,i}) \! & \le & \!

4391:                            H(Y_1) + \!\frac{m}{n} \frac{1}{m}\sum_{i=1}^m \log p_{Y_1}(y_{1,i})\!+\eps\\

4392:                             & \le & H(Y_1) - \frac{m}{n} \left( H(Y_1) - \delta \right) + \eps\\

4393:                             &  =  & H(Y_1)\left( 1 - \frac{m}{n} \right) +\eps + \frac{m}{n}\delta.

4394:            %                 &  \stackrel{(a)}{\le}  & H(Y_1)\left( 1 - \frac{m}{n} \right) +\eps + \frac{m}{n}\delta.

4395:                        \end{eqnarray*}

4396:            %            where (a) follows from the fact that $-\frac{1}{m}\sum_{i=1}^m \log p_{Y_1}(y_{1,i}) \ge H(Y_1) - \delta$.

4397:                         Finally we get that

4398:                        \begin{eqnarray}

4399:                            -\frac{1}{n-m}\sum_{i=m+1}^n \log p_{Y_1}(y_{1,i})  & \le &

4400:                              H(Y_1) +\frac{n}{n-m}\left(\eps + \frac{m}{n}\delta\right) \nonumber\\

4401:                        %\label{eqn:Entropy_calc_tail}

4402:                              & = & H(Y_1) +\frac{1}{1-K}\left(\eps + K\delta\right). \nonumber

4403:            %                   H(Y_1) +\frac{1}{1-K}\left(\eps + K\delta\right). \nonumber

4404:                        \end{eqnarray}

4405:                        Letting $\eps' \triangleq \frac{1}{1-K}\left(\eps + K\delta\right)$ we get that the

4406:                        last $n-m$ symbols of $\yvec_1 \in \ttyp(Y_1,\yvec_1^m)$ belong to

4407:            %            the $\eps'$-typical set of $Y_1$ of length $n-m$,

4408:                        $A_{\eps'}^{(n-m)}(Y_1)$.

4409:            %            \[

4410:            %                H(Y_1) - \eps' \le \frac{1}{n-m}\sum_{i=m+1}^n \log p(y_{1,i}) \le H(Y_1) + \eps'.

4411:            %            \]

4412:                        Therefore,

4413:                        %the size of the set of typical sequences of length $n$ with the first $m$ elements

4414:                        %fixed,

4415:                        $||\ttyp(Y_1,\yvec_1^m)|| \le 2^{(n-m)(H(Y_1) + \eps')}$

4416:            %            is upper bounded by $2^{(n-m)(H(Y_1) + \eps')}$,

4417:                        for any $\yvec_1^m \in \typm(Y_1|\yvec_2^m(i))$.

4418:            %            In conclusion, the size of $\btyp(Y_1,\yvec_2^m(i))$ is upper bounded by:

4419:                        In conclusion, we get

4420:                        \begin{eqnarray}

4421:                        ||\btyp(Y_1,\yvec_2^m(i))|| & \le & ||\typm(Y_1|\yvec_2^m(i))|| \times

4422:                                \max_{\yvec_1^m \in \typm(Y_1|\yvec_2^m(i))}\left\{||\ttyp(Y_1,\yvec_1^m) ||\right\}\nonumber\\

4423:                               & \le & 2^{m(H(Y_1|Y_2) + 2\delta)} 2^{(n-m)(H(Y_1) + \eps')}\nonumber\\

4424:                               & = & 2^{nH(Y_1) + m(H(Y_1|Y_2) - H(Y_1)) +2m\delta + (n - m) \eps'}\\

4425:                               & = & 2^{nH(Y_1)  - m I(Y_1;Y_2) + n \left( \frac{m}{n}2 \delta + \frac{n-m}{n} \eps' \right)}\nonumber\\

4426:            %                   & = & 2^{n(H(Y_1) - K I(Y_1;Y_2) + \left(2K \delta + \eps + K \delta\right))}\nonumber\\

4427:                        \label{eqn:typ_set_size_bound}

4428:                               & = & 2^{n(H(Y_1) - K I(Y_1;Y_2) + \eps'')},

4429:                        \end{eqnarray}

4430:                        where we set $\eps'' \triangleq \eps + 3K \delta$. Note that this result is derived for $K<1$.

4431:                        Repeating the derivation for $K=1$ we get that the bound in (\ref{eqn:typ_set_size_bound}) remains

4432:                        valid also when $K=1$. Since this is independent of the particular

4433:                        $\yvec_2^m(i)$ sequence, we have that

4434:                        \begin{equation}

4435:                        \label{eqn:Conditional-bound-set-size}

4436:                            E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\} \le 1 + 2^{n(H(Y_1) - K I(Y_1;Y_2)  - C_{12} + \eps'')}.

4437:                        \end{equation}

4438:            %            Note that $H(Y_1) - K I(Y_1;Y_2) < H(Y_1)$ when $K >0$, therefore

4439:            %            the partition size for this conference step is reduced compared to theorem \ref{thm:achive_common_one_step},

4440:            %            resulting in higher rates to $\Rbad$.

4441:                        Plugging (\ref{eqn:Conditional-bound-set-size})

4442:                            back into (\ref{eqn:Rx2_rate_bound_2}) and using the bound on

4443:                            $E_{\yvec_2}\big\{||\mL_2(i)||\big\}$ from equation \eqref{equ:size_L2}, we get

4444:                        \begin{eqnarray*}

4445:            %%            &  & \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},

4446:            %%                    \yvec_1 \ne \yvec_1(i), \nonumber\\

4447:            %%            &  &  \phantom{xx}  (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)

4448:            %%                    \Big\}\bigcap E_1^c \bigcap E_0^c\Big) \nonumber\\

4449:                        &  & \Pr(E_{2,B}''') \nonumber\\

4450:            %            &  &  \le 2^{-n(I(X,Y_2;Y_1) - 3\eps)}

4451:            %                     E_{\yvec_2} \bigg\{ ||\mL_2(i)|| E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\} \bigg\}\\

4452:            %            &  &  \le E_{\yvec_2} \Big\{ ||\mL_2(i)||\Big\}  2^{-n(I(X,Y_2;Y_1) - 3\eps)} \times \\

4453:            %            &  &  \phantom{xxxxxxxxxx}   \left( 1 + 2^{n(H(Y_1) - K I(Y_1;Y_2)  - C_{12} + \eps'')}\right)\\

4454:                        &  & \le \left( 1 + 2^{n(R- I(X;Y_2) + 3\eps)}\right) \times \Big(2^{-n(I(X,Y_2;Y_1) - 3\eps)}

4455:                                      + 2^{-n(C_{12} + K I(Y_1;Y_2) - H(Y_1 | X,Y_2) - \eps'' - 3\eps)} \Big)\\

4456:                        %%%%%%%%%%%%%%%%%%%%%%%%

4457:                        &  & = 2^{-n(C_{12} + K I(Y_1;Y_2) - H(Y_1 | X,Y_2) - \eps'' - 3\eps)} +

4458:                                 2^{n(R- I(X;Y_2)  -C_{12} - K I(Y_1;Y_2) + H(Y_1 | X,Y_2) + \eps'' + 6\eps)}+\\

4459:                        &  & \phantom{xxxxxx} 2^{-n(I(X,Y_2;Y_1) - 3\eps)} + 2^{n(R- I(X;Y_2) - I(X,Y_2;Y_1) + 6\eps)}.

4460:                        \end{eqnarray*}

4461:            %            Making this arbitrarily small for $n$ large enough requires

4462:                        Therefore,

4463:                        $\Pr(E_{2,B}''')$ can be made arbitrarily small by taking $n$ large enough, as

4464:                        long as

4465:                        \begin{eqnarray}

4466:                            \label{eqn:three-steps-capacity-conditions}

4467:                            C_{12} &  >  &\! H(Y_1 | X,Y_2) - K_{\eps} I(Y_1;Y_2) + \eps''' + 3\eps,\\

4468:            %                R      & < &  I(X;Y_2) - H(Y_1 | X,Y_2) + C_{12} +  K I(Y_1;Y_2)  - \eps'' - 6\eps.

4469:                            R      & < & \! I(X;Y_2) - H(Y_1 | X,Y_2) + C_{12} +  K_{\eps} I(Y_1;Y_2)  - \eps'''',\nonumber

4470:                        \end{eqnarray}

4471:                        where $\eps''' \! \triangleq \!\eps \!+ \! 3 \delta$ and $\eps''''\!\triangleq\!\eps'''\! + 6\eps$.

4472:                        Combining this with the rate bound from \eqref{equ:EA_rate_bound} we get that if

4473:            %            $C_{12}$ satisfies (\ref{eqn:three-steps-capacity-conditions})~and

4474:            %            \[

4475:            %                C_{12}   >   H(Y_1 | X,Y_2) - K I(Y_1;Y_2) + \eps'' +3\eps,

4476:            %            \]

4477:            %            and

4478:                        \begin{eqnarray}

4479:                            R  & < &   \max \Big\{I(X;Y_2), \;\; I(X;Y_2) - H(Y_1 | X,Y_2) +

4480:                        \label{eqn:three-steps-decode-first}

4481:                                \min\left(C_{12} + K_{\eps} I(Y_1;Y_2), H(Y_1|Y_2) \right) \Big\}- \eps'''',

4482:            %                   &     & \phantom{x} \min\left(C_{12} + K I(Y_1;Y_2), H(Y_1|Y_2) \right) - \eps'' - 6\eps,

4483:                        \end{eqnarray}

4484:                        the probability

4485:                        $\Pr\left(E_2 \bigcap E_1^c \bigcap E_0^c\right)$

4486:            %            \begin{eqnarray*}

4487:            %                &   & \Pr\left(E_2 \bigcap E_1^c \bigcap E_0^c\right)  \le

4488:            %                            \Pr\left(E_2' \bigcap E_0^c\right) \\

4489:            %                &   & \phantom{xxxxx}+ \Pr\left(E_2''\bigcap E_1^c \bigcap E_0^c\right) +

4490:            %                    \Pr\left(E_2''' \bigcap E_1^c \bigcap E_0^c\right) \\

4491:            %                &   & \le 2^{-n(C_{12} + K I(Y_1;Y_2) - H(Y_1 | X,Y_2) - \eps'' - 3\eps)} + \\

4492:            %            &  & \phantom{xxxx} 2^{n(R- I(X;Y_2)  -C_{12} - K I(Y_1;Y_2) + H(Y_1 | X,Y_2) + \eps'' + 6\eps)}+\\

4493:            %            &  & \phantom{xxxxxx} 2^{-n (I(X;Y_1|Y_2) - 4\eps)}  + 2^{n(R - I(X;Y_1,Y_2)  + 7\eps)},

4494:            %            \end{eqnarray*}

4495:                        can be made arbitrarily small by taking $n$ large enough.

4496:                        %\footnote{Note that $K$ is

4497:                        %also a function of $n$ but we can approach the rate arbitrarily close by taking

4498:                        %$n$ large enough.}.

4499:                        Compared with proposition \ref{prop:achive_common_one_step} we note that for the same $C_{12}$

4500:                        the rate to $\Rbad$ is increased and the region of $C_{12}$ where cooperation is useful is

4501:                        also increased.

4502:

4503:

4504:

4505:                    \subsubsection{Probability of $E_3 \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c$}

4506:                    \label{sec:prob-bound-E3}

4507:                        First note that $E_2^c \bigcap E_1^c \bigcap E_0^c$ implies correct decoding at $\Rbad$ and

4508:                        thus $w_i \in B_{k'(i)}$ for some index $k'(i)$. It also implies that

4509:                        $\left(\xvec^m(w_i) ,\yvec_1^m(i) \right) \in \typm(X,Y_1)$

4510:                        and $\left(\xvec(w_i) ,\yvec_1(i) \right) \in \typ(X,Y_1)$

4511:                        and therefore

4512:            %            $w_i \in \tmL_1(i)$. Thus

4513:                        $w_i \in B_{k'(i)} \bigcap \tmL_1(i)$ and

4514:                        $\Pr\left(E_3' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right) =  \Pr\left(E_3'' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right) = 0$.

4515:            %            Additionally, $E_0^c$ implies that

4516:                        %$\left(\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)\right) \in \typm(X,Y_1,Y_2)$ and thus

4517:            %            $\Pr\left(E_3'' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right) = 0$.

4518:                        Consider now $\Pr\left(E_3''' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right)$:

4519:                        {\setlength\arraycolsep{0mm}

4520:                        \begin{eqnarray}

4521:                            &  &  \Pr\left(E_3''' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right)\nonumber\\

4522:            %                &  & \Pr\Big(\Big\{\exists w \ne w_i,  w \in \tmL_1(i),

4523:            %                        (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\nonumber\\

4524:            %                &  &   \phantom{xxxxx} \in \typm(X,Y_1,Y_2),w \in  B_{k'(i)}\Big\} \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\Big)\nonumber\\

4525:            %%%                &  &  =  E_{\yvec_1}\bigg\{\Pr\Big(\Big\{\exists w \ne w_i,  w \in \tmL_1(i), w \in  B_{k'(i)},\nonumber\\

4526:            %%%                &  &   \phantom{xxxxx} (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\in \typm(X,Y_1,Y_2)\Big\} \nonumber\\

4527:            %%%                &  &   \phantom{xxxxxxxxxx}  \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\bigg\}\nonumber\\

4528:                            &  & \qquad \le E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}

4529:                                    \Pr\Big(\Big\{   w \in  B_{k'(i)},

4530:                                (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i)) \in  \typm(X,Y_1,Y_2)\Big\}

4531:                            \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\Bigg\}\nonumber\\

4532:                            &  & \qquad \stackrel{(a)}{=} E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}

4533:                                    \Pr\big(w \in  B_{k'(i)}\big)\times\nonumber\\

4534:                            &  &   \phantom{xxxxxxxxxxxxxxxxxxxx} \Pr\Big(\Big\{  (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\in \typm(X,Y_1,Y_2)\Big\}

4535:                                  \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\Bigg\}\nonumber\\

4536:            %%%                &  & = E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}} 2^{-nC_{21}^b}\times \nonumber\\

4537:            %%%                &  &   \phantom{xxxxx} \sum_{\yvec_2^m(i) \in \typm(Y_2|\yvec_1^m(i),\xvec^m(w))}

4538:            %%%                            \Pr(\yvec_2^m(i)|\yvec_1^m(i), \xvec^m(w))\Bigg\}\nonumber\\

4539:            %%%                &  & \stackrel{(b)}{=} 2^{-nC_{21}^b} E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}

4540:            %%%               \sum_{\substack{\yvec_2^m(i) \in \\ \typm(Y_2|\yvec_1^m(i),\\\phantom{xxxxxx} \xvec^m(w))}}

4541:            %%%                            \Pr(\yvec_2^m(i)|\yvec_1^m(i))\Bigg\}\nonumber\\

4542:            %%                &  & \stackrel{(a)}{=}  E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i),  w \ne w_i}}

4543:            %%                \Pr\big(w \in  B_{k'(i)}\big)\nonumber\\

4544:            %%                &  & \phantom{xxxxxxxxx}\sum_{\substack{\yvec_2^m(i) \in  \typm(Y_2|\yvec_1^m(i), \xvec^m(w))}}

4545:            %%                            \Pr(\yvec_2^m(i)|\yvec_1^m(i))\Bigg\}\nonumber\\

4546:            %                &  &   \qquad   \le E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}

4547:            %                          \Pr\Big(\Big\{   w \in  B_{k'(i)},

4548:            %                            (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i)) \in \typm(X,Y_1,Y_2)\Big\}

4549:            %                            \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\Bigg\}\nonumber\\

4550:                            &  & \qquad    \stackrel{(b)}{=}  E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i),  w \ne w_i}}

4551:                            \Pr\big(w \in  B_{k'(i)}\big) \times

4552:                                    \sum_{\substack{\yvec_2^m \in  \typm(Y_2|\yvec_1^m(i), \xvec^m(w))}}

4553:                                        \Pr(\yvec_2^m|\yvec_1^m(i))\Bigg\}\nonumber\\

4554:            %%%                &  & \le 2^{-nC_{21}^b}  E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}

4555:            %%%                         2^{m(H(Y_2|Y_1,X) + 2\delta)} 2^{-m(H(Y_2|Y_1) - 2\delta)}\Bigg\}\nonumber\\

4556:            %%%                &  & = 2^{-nC_{21}^b} 2^{-m(I(X;Y_2|Y_1) - 4\delta)} E_{\yvec_1}\Big\{||\tmL_1(i)||\Big\}\nonumber\\

4557:                            &  & \qquad \le 2^{-nC_{21}^b} 2^{-m(I(X;Y_2|Y_1) - 4\delta)} E_{\yvec_1}\Big\{||\tmL_1(i)||\Big\}\nonumber\\

4558:                            &  & \qquad \stackrel{(c)}{\le} 2^{-n(C_{21}^b + K I(X;Y_2|Y_1) -  4 K \delta)}\left(1 + 2^{n\left(R - I(X;Y_1) + 3\eps + 6 K \delta\right)} \right) \nonumber

4559:            %%%                &  & = 2^{-n(C_{21}^b + K I(X;Y_2|Y_1) -  4 K \delta)}\nonumber\\

4560:            %%%            \label{eqn:Pron_error_E3'''}

4561:            %%%                &  & \phantom{xxxxxx}  + 2^{n\left(R - I(X;Y_1) -C_{21}^b - K I(X;Y_2|Y_1) + 3\eps  +  10 K \delta\right)},

4562:                        \end{eqnarray}}

4563:                        \noindent

4564:                        where (a) is due to the independent mapping of the messages into the sets $B_{k'}$, (b) is

4565:                         because $\yvec_2^m(i)$ is generated by $\xvec^m(w_i)$ and hence

4566:                        is independent of any $\xvec^m(w)$ with $w \ne w_i$.

4567:                        To obtain the bound in (c) we recall that $\avec^m \in \typm(A)$ and

4568:                        $\avec^n \in \typ(A)$ imply that the elements $\avec_{m+1}^n \in A_{\eps'}^{(n-m)}(A)$.

4569:                        Therefore, the probability of an i.i.d. sequence $\xvec$ independent of $\yvec_1(i)$ to be in

4570:                        $\tmL_1(i)$ is bounded~by

4571:                        \begin{eqnarray*}

4572:                            \Pr(\xvec \in \tmL_1(i)) & \le &\! \Pr\Big((\xvec^m,\yvec_1^m(i)) \in \typm(X,Y_1)\Big) \times

4573:                                \Pr\Big((\xvec_{m+1}^n, \yvec_{1,m+1}^n(i)) \in A_{\eps'}^{(n-m)}(X,Y_1)\Big)\\

4574:                              & \le & 2^{-m(I(X;Y_1) - 3 \delta)} 2^{-(n-m)(I(X;Y_1) - 3 \eps')}\\

4575:                              & = & 2^{-n (I(X;Y_1)  - 3 \eps - 6 K \delta)}.

4576:                        \end{eqnarray*}

4577:            %            The expression in (\ref{eqn:Pron_error_E3'''})

4578:                        Therefore $\Pr(E_3'''\bigcap E_2^c \bigcap E_1^c \bigcap E_0^c)$ can

4579:                        be made arbitrarily small by taking $n$ large enough, as long as

4580:                        \begin{equation}

4581:                        \label{eqn:three-steps-decode-second}

4582:                            R < I(X;Y_1) + C_{21}^b + K_{\eps} I(X;Y_2|Y_1) - 3\eps  -  10  \delta.

4583:                        \end{equation}

4584:                        Combining this with equation (\ref{eqn:three-steps-decode-first}) yields the rate expression

4585:                        $R_{212}$. Switching the roles of $\Rgood$ and $\Rbad$ we obtain $R_{121}$.

4586:                        The case where $\alpha = 0$ can be obtained from proposition \ref{prop:achive_common_one_step}.

4587:

4588:

4589:                \subsection{Rate Bounds for the Two-Step Conference}

4590:                \label{sec:two-step}

4591:                        Consider the following two-step conference:

4592:                        \begin{enumerate}

4593:                            \item $\Rgood$ sends information at rate $C_{12}$ to $\Rbad$.

4594:                            \item $\Rbad$ decodes and sends information at rate $C_{21}$ to $\Rgood$.

4595:                        \end{enumerate}

4596:                        Lastly $\Rgood$ decodes.

4597:                        For this setup we specialize the conference scheme of appendix

4598:                        \ref{sec:three-steps} (we state only the modifications).

4599:                        Let $m = \min \left\{ n , \left\lfloor \frac{n C_{12}}{H(Y_1) + \delta} \right\rfloor \right\}$,

4600:                        $K = \frac{m}{n}$ and $K_{\eps} = \left[\frac{C_{12}}{H(Y_1) + \delta}\right]^* - \eps$.

4601:                        Increase $n$ to obtain $K > K_{\eps}$. Note that  $K \le 1$.

4602:            %            \[

4603:            %                m = \min \left\{ n , \left\lfloor \frac{n C_{12}}{H(Y_1) + \delta} \right\rfloor \right\},

4604:            %                \qquad K = \frac{m}{n}.

4605:            %            \]

4606:                        \subsubsection{Codebooks Construction and Conference Steps}

4607:                        \paragraph{Relay Codebook Construction at $\Rgood$}

4608:                            $\Rgood$ enumerates all the sequences in $\typm(Y_1)$ with an index

4609:                            $l \in \left\{1,2,...,2^{nC_{12}} \right\}$.

4610:                        \paragraph{Relay Codebook Construction at $\Rbad$}

4611:                            $\Rbad$

4612:                            %has only one codebook, generated by partitioning the message set

4613:                            partitions the set $\mW$ into $2^{nC_{21}}$

4614:                            sets in a uniform and independent manner. Denote these sets with $B_{k'}$,

4615:                            $k' \in \left\{1,2,...,2^{nC_{21}}\right\}$.

4616:                        \paragraph{Encoding at $\Rgood$ at the First Conference Step (time $i+1$)}

4617:                            Upon reception of $\yvec_1(i)$, $\Rgood$ considers its first $m$ elements denoted $\yvec_1^m(i)$,

4618:                            and looks for the index $l$ of $\yvec_1^m(i)$ in

4619:                            $\typm(Y_1)$. $\Rgood$ then sends this index to $\Rbad$ through the conference link.

4620:                        \paragraph{Decoding at $\Rbad$ (time $i+1$) and Encoding for the Second Conference Step}

4621:                            $\Rbad$ generates the set $\tmL_2(i)$,

4622:                            defined in the same way as $\tmL_1(i)$ in appendix \ref{sec:three-steps-decoder-1},

4623:                            with $\yvec_2(i)$, $\yvec_2^m(i)$ and $Y_2$ replacing $\yvec_1(i)$, $\yvec_1^m(i)$ and

4624:                            $Y_1$ respectively. Then,~$\Rbad$ looks for a unique message $\hw \in \tmL_2(i)$

4625:                            such that $\left(\xvec^m(\hw),\yvec_1^m(i), \yvec_2^m(i)\right)\! \in\! \typm(X,Y_1,Y_2)$.

4626:                            After decoding $w_i$, $\Rbad$ finds the index $k'(i)$ of the partition $B_{k'(i)}$ into which

4627:                            the decoded $w_i$ belongs. At time $i+2$, $\Rbad$ sends $k'(i)$ to $\Rgood$ through the conference

4628:                            link.

4629:                        \paragraph{Decoding at $\Rgood$ (time $i+2$)}

4630:                        \label{sec:two-step-decode-rx2}

4631:                            $\Rgood$ uses the standard set-intersection relay decoding as in \cite[theorem 1]{CoverG:79}.

4632:                            Thus the achievable rate to $\Rgood$ is bounded by

4633:            %                \begin{equation}

4634:            %                \label{eqn:two-step-rgood-rate}

4635:                                $R \le I(X;Y_1) + C_{21}$.

4636:            %                \end{equation}

4637:

4638:                        \subsubsection{Error Events and Error Probability Bounds}

4639:                        We present here only the error events for decoding at $\Rbad$. The rest of the error events follow

4640:                        easily from the analysis in appendix \ref{sec:three-steps-error-events}. Define

4641:                        \begin{itemize}

4642:                         \item    $E_2' \!  = \!\Big\{ w_i \notin \tmL_2(i) \Big\}$,

4643:                         \item    $E_2''  = \left\{\! (\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)) \notin \typm(X,Y_1,Y_2)\!\right\}$,

4644:                         \item     $E_2'''\!\! =\!\! \Big\{\! \exists w \! \in \! \tmL_2(i), w \! \ne \! w_i,

4645:                                   (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\! \in \!\typm(X,Y_1,Y_2)\!\Big\}$.

4646:                        \end{itemize}

4647:            %        \subsection{Bounding the Probability of Error Events}

4648:                    By similar considerations to those in appendix \ref{sec:three-steps-error-bounds}, we have that

4649:                    $\Pr\left(E_2' \bigcap E_0^c\right) = \Pr\left(E_2'' \bigcap E_0^c\right) = 0$, and~we need to

4650:                    bound $\Pr\left(E_2'''\bigcap E_0^c\right)$:

4651:            %        {\setlength\arraycolsep{0mm}

4652:            %        \begin{eqnarray*}

4653:            %        & & \Pr\Big(\Big\{ \exists w \in \tmL_2(i), w \ne w_i, (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\\

4654:            %        & &  \qquad \qquad   \in \typm(X,Y_1,Y_2)\Big\}\bigcap E_1^c \bigcap E_0^c\Big)\\

4655:            %%        & & = E_{\yvec_2}\bigg\{ \Pr\Big(\Big\{ \exists w \in \tmL_2(i), w \ne w_i, (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\\

4656:            %%        & &  \qquad \qquad   \in \typm(X,Y_1,Y_2)\Big\}\bigcap E_1^c \bigcap E_0^c \Big|\yvec_2(i)\Big)\bigg\}\\

4657:            %        & & \le E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \tmL_2(i), \\ w \ne w_i}}

4658:            %            \Pr\Big(\Big\{  (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\\

4659:            %        & &  \qquad \qquad   \in \typm(X,Y_1,Y_2)\Big\}\bigcap E_1^c \bigcap E_0^c \Big|\yvec_2(i)\Big)\Bigg\}\\

4660:            %%        & & \le E_{\yvec_2}\bigg\{ \sum_{\substack{w \in \tmL_2(i), \\ w \ne w_i}} 2^{-m(I(X;Y_1|Y_2) - 4 \delta)}

4661:            %%            \bigg\}\\

4662:            %%        & & = 2^{-m(I(X;Y_1|Y_2) - 4 \delta)} E_{\yvec_2}\Big\{||\tmL_2(i)||\Big\}\\

4663:            %        & & \le 2^{-m(I(X;Y_1|Y_2) - 4 \delta)} E_{\yvec_2}\Big\{||\tmL_2(i)||\Big\}\\

4664:            %        & & \le 2^{-nK(I(X;Y_1|Y_2) - 4 \delta)}\left(1 + 2^{n(R - I(X;Y_2) + 3 \eps + 6 K \delta)}\right),

4665:            %%       & & = 2^{-nK(I(X;Y_1|Y_2) - 4 \delta)} + 2^{n(R - I(X;Y_2) - KI(X;Y_1|Y_2) +10K\delta + 3 \eps)}

4666:            %        \end{eqnarray*}

4667:            %        }

4668:            %        which can be made arbitrarily small as long as

4669:                    using similar derivation to that in %section \ref{sec:prob-bound-E3}

4670:                    theorem \ref{thm:main_thm} we conclude that

4671:                    $\Pr(E_2''' \bigcap E_0^c)$ can be made arbitrarily small as long as

4672:                    \[

4673:                        R \le I(X;Y_2) + K_{\eps} I(X;Y_1|Y_2) - 3\eps -10  \delta.

4674:                    \]

4675:            %        Combining this with equation (\ref{eqn:two-step-rgood-rate}) we get the rate

4676:                    Combining this with the rate constraint in appendix \ref{sec:two-step-decode-rx2}

4677:                    we get the rate expression for $R_{12}$ of equation (\ref{eqn:thm_3step_2steps_rate}).

4678:            %        {\setlength\arraycolsep{0pt}

4679:            %        \begin{eqnarray*}

4680:            %          &  &  R_{12} = \min\bigg(I(X;Y_1) + C_{21}, \\

4681:            %          &  &  \phantom{xxxxxxxxxxxxx} I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1)}\right]^*I(X;Y_1|Y_2) \bigg).

4682:            %        \end{eqnarray*}}

4683:                    Switching the order of the conference we obtain the expression for $R_{21}$.

4684:                    Finally, combining this with the three-step conference rate expressions

4685:                    obtained in appendix \ref{sec:three-steps} yields the rates of theorem \ref{thm:achieve-three-steps}.

4686:            %        Note that since the minimum includes the two-step conference result which always (i.e. when the

4687:            %        channels are not degraded) gives a rate increase over the non-cooperative rate,

4688:            %        we do not need to state explicitly

4689:            %        the condition on the capacity of equation (\ref{eqn:three-steps-capacity-conditions}).

4690:

4691: \end{comment}

4692:

4693:

4694: \begin{thebibliography}{10}

4695:

4696: \bibitem{Meulen:71}

4697: E. C. van der Meulen.

4698: \newblock {``Three-Terminal Communication Channels"}.

4699: \newblock {\em Adv. Appl. Probab.},vol. 3, pp. 120--154, 1971.

4700:

4701: \bibitem{CoverG:79}

4702: T.~M. Cover and A.~A. {El Gamal}.

4703: \newblock {``Capacity Theorems for the Relay Channel"}.

4704: \newblock {\em IEEE Trans. Inform. Theory}, IT-25(5):572--584, 1979.

4705:

4706: \bibitem{GuptaKumar:2003}

4707: P.~Gupta and P.~R.~Kumar.

4708: \newblock{``Towards an Information Theory of Large Networks: An Achievable Rate Region"}.

4709: \newblock{\em IEEE Trans. Inform. Theory}, 49(8):1877--1894, 2003.

4710:

4711: \bibitem{XieKumar:2004}

4712: L.~-L.~Xie and P.~R.~Kumar.

4713: \newblock{``A Network Information Theory for Wireless Communication: Scaling Laws and Optimal Operation"}.

4714: \newblock{\em IEEE Trans. Inform. Theory}, 50(5):748--767, 2004.

4715:

4716: \bibitem{XieKumar:2005}

4717: L.~-L.~Xie and P.~R.~Kumar.

4718: \newblock{``An Achievable Rate for the Multiple-Level Relay Channel"}.

4719: \newblock{\em IEEE Trans. Inform. Theory}, 51(4):1348--1358, 2005.

4720:

4721: \bibitem{Kramer:2003}

4722: G.~Kramer, M.~Gastpar, and P.~Gupta.

4723: \newblock{``Capacity Theorems for Wireless Relay Channels"}.

4724: \newblock {\em Proc. 41st Allerton Conf.  Communications, Control, and Computing}, pp. 1074--1083, Monticello, IL, 2003.

4725:

4726: \bibitem{Madsen:2005}

4727: B.~Wang, J.~Zhang and A.~Host-Madsen.

4728: \newblock{``On the Capacity of MIMO Relay Channels"}.

4729: \newblock{\em IEEE Trans. Inform. Theory}, 51(1):29--43, 2005.

4730:

4731: \bibitem{Kramer:2005}

4732: G.~Kramer, M.~Gastpar, and P.~Gupta.

4733: \newblock{``Cooperative Strategies and Capacity Theorems for Relay Networks"}.

4734: \newblock{\em IEEE Trans. Inform. Theory}, 51(9):3037--3063 , 2005.

4735:

4736: \bibitem{Gastpar:2002}

4737: M. Gastpar, G. Kramer and P. Gupta.

4738: \newblock{``The Multiple-Relay Channel: Coding and Antenna-Clustering Capacity"}.

4739: \newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Lausanne, Switzerland, 2002, pg. 136.

4740:

4741: %\bibitem{SchienGallager:2000}

4742: %B.~Schein and R.~Gallager.

4743: %\newblock {``The Gaussian Parallel Relay Network"}.

4744: %\newblock {\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Sorrento, Italy, 2000, pg. 22.

4745:

4746: \bibitem{ElGamalH:2006}

4747: L.~Lifeng, L.~Ke and H.~El-Gamal.

4748: \newblock{``The Three-Node Wireless Network: Achievable Rates and Cooperation Strategies"}.

4749: \newblock{\em IEEE Trans. Inform. Theory},  52(3):805--828,  2006.

4750:

4751: \bibitem{Goldsmith:2006}

4752: C. T. K. Ng, I. Maric, A. J. Goldsmith, S. Shamai and R. D. Yates.

4753: \newblock{``Iterative and One-Shot Conferencing in Relay Channels"}.

4754: \newblock {\em Proc. IEEE Inform. Theory Workshop (ITW)}, Punta del Este, Uruguay, 2006.

4755:

4756: %\bibitem{Motani:2005}

4757: %H.~F.~Chong, M.~Motani and  H.~K.~Garg.

4758: %\newblock{``New Coding Strategies for the Relay Channel"}.

4759: %\newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Adelaide,  Australia, 2005, pp. 1086--1090.

4760:

4761: \bibitem{DraperFK:03}

4762: S.~C. Draper, B.~J. Frey, and F.~R. Kschischang.

4763: \newblock {``Interactive Decoding of a Broadcast Message"}.

4764: \newblock {\em Proc. 41st Allerton Conf.}, % on Communication, Control and Computing},

4765:  Urbana, IL, 2003.

4766:

4767: \bibitem{RonSer:2005}

4768: R. Dabora and S.~D. Servetto,

4769: \newblock {``Broadcast Channels with Cooperating Decoders"}.

4770: \newblock {{\em  IEEE Trans. Inform. Theory}}, to appear.

4771:

4772: \bibitem{LiagV:2005}

4773: Y. Liang and V. V. Veeravalli.

4774: \newblock{``Cooperative Broadcast Relay Channels"}.

4775: \newblock{Submitted to the {\em IEEE Trans. Inform. Theory}}, July 2005.

4776:

4777: \bibitem{ElGamal:06}

4778: A. El-Gamal, M. Mohseni and S. Zahedi,

4779: \newblock{``Bounds on Capacity and Minimum Energy-per-Bit for AWGN Relay Channels"}.

4780: \newblock{\em IEEE Trans. Inform. Theory}, IT-52(4):1545--1561, 2006.

4781:

4782: \bibitem{HostMadsen:05}

4783: A. Host-Madsen, and J. Zhang.

4784: \newblock{``Capacity Bounds and Power Allocation for Wireless Relay Channels"}.

4785: \newblock{\em IEEE Trans. Inform. Theory}, IT-51(6):2020--2040, 2006.

4786:

4787: \bibitem{Laneman:2000}

4788: J. N. Laneman and G. W. Wornell.

4789: \newblock{``Energy-Efficient Antenna Sharing and Relaying for Wireless Networks"}.

4790: \newblock{\em Proc. IEEE Wireless Communications and Networking Conference (WCNC)} 2000, vol. 1,  pp. 7--12.

4791:

4792: \bibitem{Bao:2005}

4793: X. Bao and J. Li.

4794: \newblock{``Decode-Amplify-Forward (DAF): A New Class of Forwarding Strategy for Wireless Relay Channels"}.

4795: \newblock{\em Proc. 6th IEEE Workshop on Signal Proc. Adv. in Wireless Comm. (SPAWC) }, New York, 2005,  pp. 816--820.

4796:

4797: \bibitem{Kramer:Asi05}

4798: G. Kramer.

4799: \newblock{``Distributed and Layered Codes for Relaying"}.

4800: \newblock{\em Proc. 39th Asilomar Conf. on Signals, Systems and Computers}, 2005, pp. 1752--1756.

4801:

4802: \bibitem{Stankovic:05}

4803: L. Zhixin, V. Stankovic and X. Zixiang.

4804: \newblock{``Wyner-Ziv Coding for the Half-Duplex Relay Channel"}.

4805: \newblock{\em Proc. IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP)}, Philadelphia, 2005,

4806: vol. 5, pp. 1113--1116.

4807:

4808: \bibitem{Marton:79}

4809: K.~Marton.

4810: \newblock {``A Coding Theorem for the Discrete Memoryless Broadcast Channel"}.

4811: \newblock {\em IEEE Trans. Inform. Theory}, IT-25(3):306--311, 1979.

4812:

4813: \bibitem{Motani:06}

4814: M. Motani, H.-F. Chong and H. K. Garg.

4815: \newblock{``Backward Decoding Strategies for the Relay Channel"}.

4816: \newblock{\em MSRI Workshop: Mathematics of Relaying and Cooperation in Communication Networks}, Berkeley, 2006.

4817:

4818: \bibitem{YeungBook}

4819: R.~W. Yeung.

4820: \newblock{\em A First Course in Information Theory}.

4821: \newblock Springer, 2002.

4822:

4823: \bibitem{cover-thomas:it-book}

4824: T.~M. Cover and J.~Thomas.

4825: \newblock {\em {Elements of Information Theory}}.

4826: \newblock John Wiley and Sons Inc., 1991.

4827:

4828: \bibitem{WZ:1976}

4829: A.~Wyner and J.~Ziv.

4830: \newblock{``The Rate-Distortion Function for Source Coding with Side Information at the Decoder"}.

4831: \newblock {\em IEEE Trans. Inform. Theory}, 22(1):1--10, 1976.

4832:

4833: \bibitem{Willems:83}

4834: F.~M.~J. Willems.

4835: \newblock {``The Discrete Memoryless Multiple Access Channel with Partially

4836:   Cooperating Encoders"}.

4837: \newblock {\em IEEE Trans. Inform. Theory}, 29(3):441--445, 1983.

4838:

4839: \bibitem{RonISIT05:05}

4840: R.~Dabora and S.~D. Servetto.

4841: \newblock{``On the Rates for the General Broadcast Channel with Partially Cooperating Receivers"}.

4842: \newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Adelaide,  Australia, 2005, pp. 2174--2178.

4843:

4844: \bibitem{Cover:98}

4845: T.~M. Cover.

4846: \newblock {``Comments on Broadcast Channels"}.

4847: \newblock {\em IEEE Trans. Inform. Theory}, 44(6):2524--2530, 1998.

4848:

4849: \bibitem{ElGamalM:81}

4850: A.~A. {El Gamal} and E.~C. van~der Meulen.

4851: \newblock {``A Proof of Marton's Coding Theorem for the Discrete Memoryless

4852:   Broadcast Channel"}.

4853: \newblock {\em IEEE Trans. Inform. Theory}, IT-27(1):120--122, 1981.

4854:

4855: \bibitem{Kaspi:85}

4856: A.~H. Kaspi.

4857: \newblock{``Two-Way Source Coding with a Fidelity Criterion"}.

4858: \newblock {\em IEEE Trans. Inform. Theory}, IT-31(6):735--740, 1985.

4859:

4860: \bibitem{Shlomo_BZ}

4861: A. Steiner, A. Sanderovich and S. Shamai.

4862: \newblock{``Broadcast Cooperation Strategies for Two Colocated Users"}.

4863: \newblock{Submitted to the {\em IEEE Trans. Inform. Theory}}, August 2007.

4864:

4865: \bibitem{ron:ISIT06}

4866: R.~Dabora and S.~D. Servetto.

4867: \newblock{``A Multi-Step Conference for Cooperative Broadcast"}.

4868: \newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Seattle, WA, July 2006.

4869:

4870: \end{thebibliography}

4871: \end{document}

4872: