cs0605135/pp.tex
1: \documentclass[onecolumn,draftcls,dvips,letter]{IEEEtran}
2: 
3: 
4: \usepackage{amsmath,amssymb,epsfig,color}
5: \usepackage{graphicx,verbatim}
6: \usepackage[section]{placeins}
7: \usepackage{afterpage}
8: %\usepackage{isuthesis}
9: 
10: 
11: \newtheorem{definition}{Definition}
12: \newtheorem{theorem}{Theorem}
13: \newtheorem{lemma}{Lemma}
14: \newtheorem{proposition}{Proposition}
15: \newtheorem{corollary}{Corollary}
16: \newtheorem{remark}{Remark}
17: 
18: \setlength{\unitlength}{1mm} \setlength\arraycolsep{2pt}
19: 
20: \newcommand{\eps}{\epsilon}
21: \newcommand{\styp}{A^{*(n)}_{\eps}}
22: \newcommand{\stypd}{A^{*(n)}_{\delta}}
23: \newcommand{\stypdp}{A^{*(n)}_{\delta'}}
24: \newcommand{\stypm}{A^{*(m)}_{\delta}}
25: \newcommand{\stypp}{A^{*(n-m)}_{\eps'}}
26: \newcommand{\typ}{A_{\epsilon}^{(n)}}
27: \newcommand{\typm}{A_{\delta}^{(m)}}
28: \newcommand{\ttyp}{\tilde{A}_{\epsilon, \delta}^{(n)}}
29: \newcommand{\btyp}{\bar{A}_{\epsilon, \delta}^{(n)}}
30: \newcommand{\hP}{\hat{P}}
31: \newcommand{\hY}{\hat{Y}}
32: \newcommand{\hhY}{\hat{\hat{Y}}}
33: \newcommand{\hy}{\hat{y}}
34: \newcommand{\hhy}{\hat{\hat{y}}}
35: \newcommand{\hw}{\hat{w}}
36: \newcommand{\hm}{\hat{m}}
37: \newcommand{\hs}{\hat{s}}
38: \newcommand{\hq}{\hat{q}}
39: \newcommand{\mN}{\mathcal{N}}
40: \newcommand{\mC}{\mathcal{C}}
41: \newcommand{\mD}{\mathcal{D}}
42: \newcommand{\mS}{\mathcal{S}}
43: \newcommand{\mU}{\mathcal{U}}
44: \newcommand{\mV}{\mathcal{V}}
45: \newcommand{\mL}{\mathcal{L}}
46: \newcommand{\mA}{\mathcal{A}}
47: \newcommand{\mW}{\mathcal{W}}
48: \newcommand{\mX}{\mathcal{X}}
49: \newcommand{\mY}{\mathcal{Y}}
50: \newcommand{\mZ}{\mathcal{Z}}
51: \newcommand{\mM}{\mathcal{M}}
52: \newcommand{\mQ}{\mathcal{Q}}
53: \newcommand{\mR}{\mathcal{R}}
54: \newcommand{\mhY}{\hat{\mathcal{Y}}}
55: \newcommand{\mhhY}{\hat{\hat{\mathcal{Y}}}}
56: \newcommand{\tmL}{\tilde{\mathcal{L}}}
57: \newcommand{\xvec}{\mathbf{x}}
58: \newcommand{\yvec}{\mathbf{y}}
59: \newcommand{\Xvec}{\mathbf{X}}
60: \newcommand{\Yvec}{\mathbf{Y}}
61: \newcommand{\hYvec}{\hat{\mathbf{Y}}}
62: \newcommand{\tYvec}{\tilde{\mathbf{Y}}}
63: \newcommand{\tZvec}{\tilde{\mathbf{Z}}}
64: \newcommand{\tTvec}{\tilde{\mathbf{T}}}
65: \newcommand{\tXvec}{\tilde{\mathbf{X}}}
66: \newcommand{\uvec}{{\bf u}}
67: \newcommand{\vvec}{{\bf v}}
68: \newcommand{\svec}{{\bf s}}
69: \newcommand{\wvec}{{\bf w}}
70: \newcommand{\avec}{{\bf a}}
71: \newcommand{\hyvec}{\hat{\mathbf{y}}}
72: \newcommand{\hhyvec}{\hat{\hat{\mathbf{y}}}}
73: \newcommand{\Pe}{P_{e}^{(n)}}
74: \newcommand{\rend}{\hfill$\square$}
75: \newcommand{\tend}{\hfill$\blacksquare$}
76: \newcommand{\muvec}{\boldsymbol{\mu}}
77: \newcommand{\Rgood}{R_{x1}}
78: \newcommand{\Rbad}{R_{x2}}
79: \newcommand{\dvec}{\boldsymbol{\delta}}
80: \newcommand{\lvec}{\boldsymbol{\lambda}}
81: \newcommand{\cardY}{||\mY_1||}
82: \newcommand{\cardYY}{||\mY_2||}
83: \newcommand{\cardX}{||\mX||}
84: \newcommand{\cardS}{||\mS||}
85: \newcommand{\reals}{\mathbb{R}}
86: \newcommand{\Bt}{\mbox{Bin}_N(\theta)}
87: \newcommand{\Bl}{\mbox{Bin}(l)}
88: \newcommand{\Bj}{\mbox{Bin}_{L_i'}(j)}
89: \newcommand{\Blp}{\mbox{Bin}(l')}
90: \newcommand{\sigR}{\sigma_1^2}
91: \newcommand{\sigW}{\sigma_W^2}
92: \newcommand{\sigD}{\sigma^2}
93: \newcommand{\sigQ}{\sigma_Q^2}
94: \newcommand{\sigQs}{\sigma_Q^{2*}}
95: \newcommand{\nQ}{N_Q}
96: \newcommand{\negdista}{\!\!\!\!\!\!\!\!\!\!}
97: \newcommand{\ners}{\mbox{\scriptsize no erase}}
98: \newcommand{\ers}{\mbox{\scriptsize erase}}
99: 
100: \title{On the Role of Estimate-and-Forward with Time-Sharing in Cooperative Communication
101: \thanks{The authors are with the School of Electrical and Computer
102: Engineering, Cornell University, Ithaca, NY. URL: {\tt
103: http://cn.ece.cornell.edu/}.
104: Work supported by the National Science Foundation, under awards
105: CCR-0238271 (CAREER), CCR-0330059, and ANR-0325556.}}
106: \author{Ron Dabora \hspace{2cm} Sergio D.\ Servetto}
107: 
108: \begin{document}
109: \maketitle
110: \begin{picture}(0,0)
111: \put(0,70){\tt\small Submitted to the IEEE Transactions on
112: Information Theory, October 2006.}
113: \end{picture}
114: \begin{abstract}
115:     \it\noindent
116:     In this work we focus on the general relay channel.
117:     We investigate the application of estimate-and-forward (EAF) to different scenarios. Specifically,
118:     we consider assignments of the auxiliary random variables that always satisfy the feasibility constraints.
119:     We first consider the multiple relay channel and obtain an achievable rate without decoding at the relays. We demonstrate
120:     the benefits of this result via an explicit discrete memoryless multiple relay scenario
121:     where multi-relay EAF is superior to multi-relay decode-and-forward (DAF).
122:     We then consider the Gaussian relay channel with coded modulation, where we show that a three-level quantization outperforms the
123:     Gaussian quantization commonly used to evaluate the achievable rates in this scenario. Finally we consider
124:     the cooperative general broadcast scenario with a multi-step conference. We apply
125:     estimate-and-forward to obtain a general multi-step achievable rate region. We then give an
126:     explicit assignment of the auxiliary random variables, and use this result to
127:     obtain an explicit expression for the single common message broadcast scenario with a two-step conference.
128: \end{abstract}
129: 
130: 
131: \section{Introduction}
132: The relay channel was introduced by van der Meulen in 1971
133: \cite{Meulen:71}. In this setup, a single transmitter with channel input $X^n$ communicates with a single receiver with channel
134: output $Y^n$, where the superscript $n$ denotes the length of a vector. In addition, an external transceiver, called a relay,
135: listens to the channel and is able to output signals to the channel. We denote the relay output with $Y_1^n$ and its input with $X_1^n$.
136: This setup is depicted in figure \ref{fig:relay_setup}.
137: \begin{figure}[h]
138:     \centering
139:     \scalebox{0.6}{\includegraphics{Relay_Channel.eps}}
140:     \caption{The relay channel. The encoder sends a message $W$ to the decoder.}
141:     \label{fig:relay_setup}
142: \end{figure}
143: 
144: \subsection{Relaying Strategies}
145: \label{sec:relay_strategies}
146: In \cite{CoverG:79} Cover \& El-Gamal introduced two relaying
147: strategies commonly referred to as decode-and-forward (DAF) and
148: estimate-and-forward (EAF). In DAF the relay decodes the message
149: sent from the transmitter and then, at the next time interval,
150: transmits a codeword based on the decoded message. The rate
151: achievable with DAF is given in \cite[theorem 1]{CoverG:79}:
152: \begin{theorem}
153:     \label{thm:CEG_DAF}
154:     \it (achievability of \cite[theorem 1]{CoverG:79}) For the general relay channel any rate $R$ satisfying
155:     \begin{equation}
156:     \label{eqn:CEG_DAF}
157:         R \le \min \left\{I(X,X_1;Y), I(X;Y_1|X_1)\right\}
158:     \end{equation}
159:     for some joint distribution $p(x,x_1,y,y_1) = p(x,x_1)p(y,y_1|x,x_1)$, is achievable.
160: \end{theorem}
161: We note that for DAF to be effective, the rate to the relay has to be greater than the point-to-point rate
162: i.e.
163: \begin{equation}
164:     \label{eqn:DAF_condition}
165:     I(X;Y_1|X_1) > I(X;Y|X_1),
166: \end{equation}
167: otherwise higher rates could be obtained without using the relay at all.
168: For relay channels where DAF is not useful or not optimal, \cite{CoverG:79} proposed the EAF strategy. In this strategy,
169: the relay sends an estimate of its channel input to the destination, without decoding the source message at all.
170: The achievable rate with EAF is given in
171: \cite[theorem~6]{CoverG:79}:
172: \begin{theorem}
173:     \label{thm:CEG_EAF}
174:     \it (\cite[theorem 6]{CoverG:79}) For the general relay channel any rate $R$ satisfying
175:     \begin{eqnarray}
176:     \label{eqn:EAF_rate}
177:         R  &\le & I(X;Y,\hY_1|X_1),\\
178:     \label{eqn:EAF_feasible}
179:      \mbox{subject to }   I(X_1;Y) & \ge & I(Y_1;\hY_1|X_1,Y),
180:     \end{eqnarray}
181:     for some joint distribution $p(x,x_1,y,y_1,\hy_1) = p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|y_1,x_1)$, where
182:     $||\mhY_1|| < \infty$, is achievable.
183: \end{theorem}
184: 
185: Of course, one can combine the DAF and EAF schemes by performing partial decoding at the relay, thus obtaining
186: higher rates as in \cite[theorem 7]{CoverG:79}.
187: 
188: \subsection{Related Work}
189: In recent years, the research in relaying has mainly focused on multiple-level
190: relaying and the MIMO relay channel. In the context of multiple-level relaying based on DAF, several DAF variations
191: were considered.
192:  In \cite{GuptaKumar:2003} Cover \& El-Gamal's block Markov encoding/succesive decoding DAF method was applied to the
193: multiple-relay case. Later work \cite{XieKumar:2004}, \cite{XieKumar:2005} and
194: \cite{Kramer:2003} applied the so-called regular encoding/sliding-window decoding and
195: the regular encoding/backward decoding techniques to the multiple-relay scenario.
196: In \cite{Madsen:2005} the DAF strategy was applied to the MIMO relay channel.
197: The EAF strategy was also applied to the multiple-relay scenario.
198: The work in \cite{Kramer:2005}, for example, considered the EAF strategy for multiple relay scenarios and the Gaussian relay
199: channel, in addition to considering the DAF strategy.
200: Also \cite{Gastpar:2002} considered the EAF strategy in the multiple-relay setup.
201: % and in \cite{SchienGallager:2000}
202: %communication over two parallel relay channels to a destination, without a direct link
203: %between the source and the destination, was considered .
204: Another approach applied recently to the relay channel is that
205: of iterative decoding. In \cite{ElGamalH:2006} the three-node network in the half-duplex regime was considered.
206: In the relay case, \cite{ElGamalH:2006} uses a feedback scheme where the receiver first uses EAF to
207: send information to the relay and then the relay decodes and uses DAF at the next time interval to help
208: the receiver decode its message. Combinations of EAF and DAF were also considered in \cite{Goldsmith:2006}, where
209: conferencing schemes over orthogonal relay-receiver channels were analyzed and compared.
210: Both \cite{ElGamalH:2006} and \cite{Goldsmith:2006} focus on the Gaussian case.
211: % In \cite{Mine:06} we applied simultaneous decoding to the EAF method which resulted in an
212: % increased feasible region for this strategy compared to \cite[theorem 6]{CoverG:79}.
213: % Another work that should be noted in that context is \cite{Motani:2005} where simultaneous decoding
214: % is used to improve upon Cover and El-Gamal's combined DAF/EAF result of \cite[theorem 7]{CoverG:79}.
215: % However, when specialized to the EAF setup, the result of \cite{Motani:2005} converges to
216: % \cite[theorem 6]{CoverG:79}.
217: 
218: An extension of the relay scenario to a hybrid broadcast/relay system was
219: introduced in \cite{DraperFK:03}
220: in which the authors applied a combination of EAF and DAF strategies to the independent broadcast
221: channel with a single common message, and then extended this strategy to the multi-step conference.
222: In \cite{RonSer:2005} we used both a single-step and a two-step conference
223: with orthogonal conferencing channels in the discrete memoryless framework.
224: A thorough investigation of the broadcast-relay channel was done in \cite{LiagV:2005}, where the authors
225: applied the DAF strategy to the case where only one user is helping the other user, and also presented an upper bound for
226: this case. Then, the fully cooperative scenario was analyzed. The authors applied both the
227: DAF and the EAF methods to that case.
228: 
229: 
230: \subsection{The Gaussian Relay Channel with Coded Modulation}
231: One important instance of the relay channel we consider in this work is the Gaussian relay channel with
232: coded modulation. This scenario is important in evaluating the rates achievable with practical communication
233: systems, where components in the receive chain, such as equalization for example, require
234: a uniformly distributed finite constellation for optimal operation.
235: In Gaussian relay channel scenarios, most often three types for relaying techniques are encountered:
236: \begin{itemize}
237:     \item The first technique is decode-and-forward. This technique achieves capacity for the physically degraded
238:         Gaussian relay channel (see \cite[section IV]{CoverG:79}), and also for more general relay
239:         channels under certain conditions (see \cite{Goldsmith:2006}).
240: 
241:     \item The second technique is estimate-and-forward, where the auxiliary variable $\hY_1$ is assigned a Gaussian
242:     distribution. For example, in \cite[section IV]{ElGamal:06} a Gaussian auxiliary random variable (RV) is used in conjunction with
243:     time-sharing at the transmitter, and in \cite{HostMadsen:05} the ergodic capacity for full duplex
244:     transmission with Gaussian EAF is obtained.
245: 
246:     \item The third technique is linear relaying, where the relay transmits a weighted sum of
247:     all its previously received inputs \cite[section V]{ElGamal:06}. An important subclass of this
248:     family of relaying functions is when the relay transmits a scaled version of its input. This method is called
249:     amplify-and-forward \cite{Laneman:2000}, and was later combined with DAF to produce the
250:     decode-amplify-and-forward method of \cite{Bao:2005}.
251: \end{itemize}
252: % In this paper we also consider the relay channel with coded modulation.
253: % The coded modulation relay scenario is important when evaluating the rates that can be obtained by practical
254: %systems,
255: Several recent papers consider the Gaussian relay channel with coded modulation.
256: In \cite{Kramer:Asi05} the author considered variations of DAF for different practical systems. In
257: \cite{Laneman:2000} DAF and amplify-and-forward were considered for coherent orthogonal BPSK signalling, and in
258: \cite{Stankovic:05} a practical construction that implements a half-duplex EAF coding scheme was proposed.
259: 
260: As indicated by several authors (see \cite{ElGamal:06}) it is not obvious if a Gaussian relay function is
261: indeed optimal. In this paper we show that for the case of coded modulation, there are scenarios where
262: non-Gaussian assignments of the auxiliary RV result in a higher rate than the commonly applied Gaussian assignment.
263: 
264: 
265: \subsection{Main Contributions}
266: 
267: In the following we summarize the main contributions of this work:
268: \begin{itemize}
269:     \item We give an intuitive insight into the relay channel in terms of information flow on a graph,
270:     and show how to obtain \cite[theorem 6]{CoverG:79} from flow considerations. Using flow considerations
271:     we also obtain the rate of the EAF strategy when the receiver uses joint-decoding.
272:     A similar expression can be obtained by specializing the result of
273:     \cite{Motani:06} to the case where the relay does not perform partial decoding.
274:     We then show that joint-decoding does not increase the maximum rate of the EAF strategy, and
275:     find the time-sharing assignment that obtains the joint-decoding rate from the general EAF expression. We also
276:     present another time-sharing assignment that always exceeds the joint-decoding rate.
277: 
278:     \item We introduce an  achievable rate expression for the multiple relay scenario based on EAF, that is also practically computabe.
279:     As discussed in section \ref{sec:relay_strategies}, in
280:     the ``noisy relay" case EAF outperforms DAF. However, for the multiple relay scenario there is no explicit, computationally practical  expression
281:     based on EAF that can be compared with the DAF-based result presented in \cite{XieKumar:2005}, so that the
282:     best strategy can be selected. As indicated in \cite[remark 22, remark 23]{Kramer:2005}, applying general EAF to
283:     a network with an arbitrary number of relays
284:     is computationally impractical due to the large number of constraints that characterize the feasible region.
285:     Therefore, it is interesting to explore a computationally simple assignment that allows to derive a
286:     result that extends to an arbitrary number of relays. We also provide an explicit numerical example
287:     to demonstrate that indeed there are cases where multi-relay EAF outperforms the multi-relay DAF.
288: 
289:     \item We consider the optimization of the EAF auxiliary random variable for the
290:     Gaussian relay channel with an orthogonal relay. We consider the coded modulation scenario, and
291:     show that there are three regions: high SNR on the source-relay link, where DAF is the best strategy,
292:     low SNR on the source-relay link in which the common
293:     EAF with Gaussian assignment is best, and an intermediate region where EAF with hard-decision
294:     per symbol is optimal. For this intermediate SNR region we consider two kinds of hard-decisions: deterministic and
295:     probabilistic, and show that each one of them can be superior, depending on the channel conditions.
296: 
297:     \item Lastly, we consider the cooperative broadcast scenario with a multi-step conference. We present a
298:     general rate region, extending the Marton rate region of \cite{Marton:79} to the case where the
299:     receivers hold a $K$-cycle conference prior to decoding the messages. We then specialize this result
300:     to the single common message case and obtain explicit expressions (without auxiliary RVs)
301:     for the two-step conference.
302:     %  that demonstrate that indeed it exceeds the two-step conference.
303:  %   Contrary to the two-step conference, the three-step scheme achieves
304:  %   the full cooperation bound when the conference capacities are less than those given by the Slepian-Wolf
305:  %   theorem \cite[theorem 14.4.1]{cover-thomas:it-book}.
306: 
307: \end{itemize}
308: 
309: 
310: %In the third part of this paper we demonstrate our new strategy in the cooperative broadcast channel with a
311: %single common message scenario. For this setup
312: %we present an explicit three-step cooperation scheme that does not require
313: %auxiliary random variables. This new cooperation scheme yields a rate
314: %increase over the non-cooperative rate for any given cooperation capacity. In addition, this scheme achieves
315: %the full cooperation bound when the conference capacities are less than those given by the Slepian-Wolf
316: %theorem \cite[theorem 14.4.1]{cover-thomas:it-book}.
317: 
318: The rest of this paper is organized as follows:
319: %in section \ref{sec:defs} we define the mathematical framework and also
320: %present an intuitive formulation of the relay channel using information flow on a graph.
321: in section \ref{sec:timeshare_single} we discuss the single relay case. We consider the EAF strategy with
322: time-sharing (TS) and relate it to the EAF rate expression for joint-decoding at the destination receiver.
323: In section \ref{sec:MultipleRelays} we present an achievable region for the multiple-relay channel, and
324: in section \ref{sec:Gauss_relay} we examine the Gaussian relay channel with coded modulation.
325: In section \ref{sec:application_multi_step} we investigate the general cooperative broadcast scenario, and
326: obtain an explicit rate expression by applying TS-EAF to the general multi-step conference.
327: Finally, section \ref{sec:conclu} presents concluding remarks.
328: 
329: 
330: 
331: 
332: 
333: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
334: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
335: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
336: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
337: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
338: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
339: 
340: 
341: 
342: 
343: 
344: 
345: \section{Time-Sharing for the Single-Relay Case}
346: \label{sec:timeshare_single}
347: 
348: 
349: \subsection{Definitions}
350: \label{sec:defs}
351: First, a word about notation:
352: we denote discrete random variables with capital letters e.g. $X$, $Y$, and their realizations with lower case letters
353: $x$, $y$. A random variable $X$ takes values in a set $\mX$. We use $||\mX||$ to denote the cardinality
354: of a finite discrete set $\mX$, and $p_X(x)$ denotes the probability distribution function (p.d.f.) of $X$ on $\mX$. For brevity we may omit the subscript $X$ when it is obvious from
355: the context. We denote vectors with boldface letters, e.g. $\xvec$, $\yvec$; the $i$'th element of a vector $\xvec$ is
356: denoted by $x_i$ and we use $\xvec_i^j$ where $i<j$ to denote $(x_i, x_{i+1},...,x_{j-1},x_j)$.
357: We use $\styp(X)$ to denote the set of $\eps$-strongly typical sequences w.r.t. distribution
358: $p_X(x)$ on $\mX$, as defined in \cite[ch. 5.1]{YeungBook} and $\typ(X)$ to denote the $\eps$-weakly typical set
359: as defined in \cite[ch. 3]{cover-thomas:it-book}.
360: 
361: We also have the following definitions:
362: \begin{definition}
363:     \label{def:relay_channel}
364:     The {\em discrete relay channel} is defined by two discrete input alphabets $\mX$ and $\mX_1$, two
365:     discrete output alphabets $\mY$ and $\mY_1$ and a probability density function $p(y,y_1|x,x_1)$ giving the
366:     probability distribution on $\mY \times \mY_1$ for each $(x,x_1) \in \mX \times \mX_1$.
367:     The relay channel is called {\em memoryless} if the probability of a block of $n$ transmissions is given by
368:     $p(\yvec,\yvec_1|\xvec,\xvec_1) = \prod_{i=1}^n p\left(y_i, y_{1,i}|x_i,x_{1,i}\right)$.
369: \end{definition}
370: In this paper we consider only the memoryless relay channel.
371: \begin{definition}
372:     \label{def:code}
373:     A {\em $(2^{nR},n)$ code} for the relay channel consists of a source message set
374:     $\mW = \left\{1,2,...,2^{nR}\right\}$, a mapping function $f$ at the encoder,
375:     \[
376:         f: \mW \mapsto \mX^n,
377:     \]
378:     a set of $n$ relay functions
379:     \[
380:         x_{1,i} = t_i\left(y_{1,1},y_{1,2},...,y_{1,i-1} \right),
381:     \]
382:     where the $i$'th relay function $t_i$ maps the first $i-1$ channel outputs at the relay into a transmitted
383:     relay symbol at time $i$. Lastly we have a decoder
384:     \[
385:         g: \mY^n \mapsto \mW.
386:     \]
387: \end{definition}
388: \begin{definition}
389:     \label{def:Perr}
390:     The {\em average probability of error} for a code of length $n$ for the relay channel is defined as
391:     \[
392:         \Pe = \Pr(g(Y^n) \ne W),
393:     \]
394:     where $W$ is selected uniformly over $\mW$.
395: \end{definition}
396: \begin{definition}
397:     A rate $R$ is called {\em achievable} if there exists a sequence of $(2^{nR},n)$ codes with
398:     $\Pe \rightarrow 0$ as $n \rightarrow \infty$.
399: \end{definition}
400: 
401: 
402: 
403: 
404: 
405: 
406: 
407: 
408: 
409: 
410: 
411: 
412: 
413: \subsection{The Single Relay EAF with Time-Sharing}
414: \label{sec:ts-single-subsec}
415: Consider the following assignment of the auxiliary random variable
416: of theorem \ref{thm:CEG_EAF}:
417: \begin{equation}
418:     \label{eqn:time-sharing-mapping}
419:     p(\hy_1|y_1,x_1) = \left\{
420:             \begin{array}{cl}
421:                 q &, \hy_1 = y_1\\
422:                 1-q & ,\hy_1 = \Omega \notin \mY_1.
423:             \end{array}
424:         \right.
425: \end{equation}
426: Under this assignment, the feasibility condition of
427: \eqref{eqn:EAF_feasible} becomes
428: \begin{eqnarray*}
429:     I(X_1;Y) & \ge & I(Y_1;\hY_1|X_1,Y) \\
430:              & = &   H(Y_1|X_1,Y) - H(Y_1|X_1,Y,\hY_1) \\
431:              & = &   H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y) - q H(Y_1|X_1,Y,Y_1)\\
432:              & = &   q H(Y_1|X_1,Y),
433: \end{eqnarray*}
434: and the rate expression \eqref{eqn:EAF_rate} becomes
435: \begin{eqnarray*}
436:     R & \le & I(X;Y,\hY_1|X_1)\\
437:         & = & I(X;Y|X_1) + I(X; \hY_1|X_1,Y)\\
438:         & = & I(X;Y|X_1) + H(X| X_1,Y) - H(X|X_1,Y,\hY_1)\\
439:         & = & I(X;Y|X_1) + H(X| X_1,Y) - (1-q) H(X|X_1,Y) - q H(X|X_1,Y,Y_1)\\
440:         & = & I(X;Y|X_1) + q I(X;Y_1|X_1,Y).
441: \end{eqnarray*}
442: Clearly, maximizing the rate implies maximizing $q$ subject to the
443: constraint $q\in [0,1]$. This gives the following corollary to theorem \ref{thm:CEG_EAF}:
444: \begin{corollary}
445:     \label{corr:single_relay_TAF}
446:     \it For the general relay channel any rate $R$ satisfying
447:     \begin{equation}
448:     \label{eqn:main_corr}
449:         R \le I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{H(Y_1|X_1,Y)} \right]^* I(X;Y_1|X_1,Y),
450:     \end{equation}
451:     for the joint distribution $p(x,x_1,y,y_1) = p(x) p(x_1) p(y,y_1|x,x_1)$, with $[x]^* \triangleq \min(x,1)$,
452:     is achievable.
453: \end{corollary}
454: 
455: Now, consider the following distribution chain:
456: \begin{equation}
457: \label{eqn:extended_prob_chain}
458:     p(x,x_1,y,y_1,\hy_1,\hhy_1) = p(x)p(x_1)p(y,y_1|x,x_1) p(\hy_1|x_1,y_1) p(\hhy_1 | \hy_1).
459: \end{equation}
460: We note that this extended chain can be put into the standard form by letting $p(\hhy_1|x_1,y_1) = \sum_{\mhY_1}p(\hy_1,\hhy_1|x_1,y_1) =
461: \sum_{\mhY_1}p(\hy_1|x_1,y_1)p(\hhy_1|\hy_1) $.
462: After compression of $Y_1$ into $\hY_1$, there is a second compression operation, compressing $\hY_1$ into $\hhY_1$. The output
463: of the second compression is used to facilitate cooperation between the relay and the destination. Therefore, the
464: receiver decodes the message based on $\hhyvec_1$ and $\yvec$, repeating exactly the same step as in the standard relay decoding, with
465: $\hhyvec$ replacing $\hyvec$. Then, the expressions of theorem \ref{thm:CEG_EAF} become
466: \begin{eqnarray}
467:     \label{eqn:EAF_rate_extended}
468:         R  &\le & I(X;Y,\hhY_1|X_1),\\
469:     \label{eqn:EAF_feasible_extended}
470:      \mbox{subject to }   I(X_1;Y) & \ge & I(Y_1;\hhY_1|X_1,Y).
471: \end{eqnarray}
472: Now, applying TS to $\hhY_1$ with
473: \begin{equation}
474:     \label{eqn:assignment_hhy}
475:         p(\hhy_1|\hy_1) = \left\{
476:             \begin{array}{cl}
477:                 q &,\hhy_1 = \hy_1\\
478:                 1-q & ,\hhy_1 = \Delta \notin \mhY_1
479:             \end{array}
480:         \right.,
481: \end{equation}
482: the expressions in \eqref{eqn:EAF_rate_extended} and \eqref{eqn:EAF_feasible_extended} become
483: \begin{eqnarray}
484:     R & \le & I(X;Y|X_1) + I(X;\hhY_1|X_1,Y)\nonumber\\
485:      & = & I(X;Y|X_1) + H(X|X_1,Y) - H(X|\hhY_1,X_1,Y)\nonumber\\
486:      & = & I(X;Y|X_1) + q(H(X|X_1,Y)  -  H(X|\hY_1,X_1,Y)) \nonumber\\
487:      \label{eqn:rate_CEG_extended_chain}
488:      & = & I(X;Y|X_1) + q I(X;\hY_1|X_1,Y), \\
489:     I(X_1;Y) & \ge & I(Y_1;\hhY_1|X_1,Y)\nonumber\\
490:         & = & H(Y_1|X_1,Y) - H(Y_1|\hhY_1,X_1,Y)\nonumber\\
491:         & = & H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y) - q H(Y_1|\hY_1,X_1,Y)\nonumber\\
492:     \label{eqn:feasibility_CEG_extended_chain}
493:         & = & q I(Y_1;\hY_1|X_1,Y).
494: \end{eqnarray}
495: Combining this with the constraint $q \in [0,1]$ we obtain the following corollary to theorem \ref{thm:CEG_EAF}:
496:     \begin{proposition}
497:     \label{prop:TAF}
498:     \it
499:         For the general relay channel, any rate $R$ satisfying
500:         \[
501:             R \le I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{I(Y_1;\hY_1|X_1,Y)}\right]^* I(X;\hY_1|X_1,Y),
502:         \]
503:         for some joint distribution $p(x,x_1,y,y_1,\hy_1) = p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$,
504:         is achievable.
505:     \end{proposition}
506: \smallskip
507: This proposition generalizes on corollary \ref{corr:single_relay_TAF} by performing a general Wyner-Ziv (WZ) compression combined with
508: TS (which is a specific type of WZ compression), intended to guarantee feasibility of the first compression step.
509: In section \ref{sec:Gauss_relay} we apply a similar idea to the EAF relaying in the Gaussian relay channel scenario with coded modulation.
510: Before we discuss the relationship between joint-decoding and time-sharing we present an intuitive way to view the EAF strategy.
511: 
512: 
513: 
514: 
515: 
516: 
517: 
518: \subsection{An Intuitive View of Estimate-and-Forward}
519: \label{sec:intuitive_explanation}
520: Consider the rate bound and the feasible region of theorem \ref{thm:CEG_EAF}
521: given in equations \eqref{eqn:EAF_rate} and \eqref{eqn:EAF_feasible}.
522: We note that the following intuitive explanation does not constitute a proof but it does provide an insight into the
523: relay achievability results. We emphasize that the achievable rates stated in this section can also be proved rigorously.
524: In the following we provide an intuitive insight into these expressions in terms of a flow on a graph.
525: 
526: In constructing the intuitive information flow representation for the relay channel, we first need to specify
527: the underlaying assumptions and the operations performed at the source, the relay and the destination receiver:
528: \begin{itemize}
529:     \item The source and the relay generate their codebooks independently.
530: 
531:     \item The relay compresses its channel output $\yvec_1$ into $\hyvec_1$, which represents the information
532:     conveyed to the destination receiver to assist in decoding the source message.
533: 
534:     \item Based on the above two restrictions we have the following Markov chain:
535:     $p(x)p(x_1)x(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$.
536: 
537:     \item The relay input signal $\xvec_1$ is based only on the compressed $\hyvec_1$.
538: 
539:     \item The destination uses $\xvec_1$, $\hyvec_1$ and $\yvec$ to decode the source message $\xvec$.
540: \end{itemize}
541: We also use the following representation for transmission, reception and compression:
542: \begin{itemize}
543:     \item  We represent an information source
544:         as a source whose output flow is equal to its information rate.
545: 
546:     \item We represent the compression
547:         operation as a flow sink whose flow consumption is equal to the mutual information between the
548:         original and the compressed sequences.
549: 
550:     \item The destination is represented as a flow sink.
551: 
552:     \item  As in a standard flow on a graph, the flows are additive, following the
553:         chain rule of mutual information.
554: \end{itemize}
555: 
556: Now consider the following flow diagram of figure \ref{fig:Relay_flow}.
557: \begin{figure}[ht]
558:     \centering
559:     \scalebox{0.6}{\includegraphics{Intuitive_Current_Flow.eps}}
560:     \caption{The information flow budget for the general relay channel with compression at the relay.}
561:     \label{fig:Relay_flow}
562: \end{figure}
563: As can be observed from the figure, the source has an output flow of
564: \[
565:     i_T = I(X;Y,\hY_1,X_1) = I(X;Y,\hY_1|X_1).
566: \]
567: This follows from the fact that the destination uses $\xvec_1,\hyvec_1$ and $\yvec$ to decode $\xvec$ and the fact that
568: $X$ and $X_1$ are independent. This total
569: flow reaches the receiver through two branches, the direct branch (D) which carries a flow of $i_D = I(X;Y|X_1)$ and
570: the relay branch (ABCE). Now, the quantities in the relay branch are calculated given $X_1$ and $Y$ to represent only the
571: rate increase over the direct path.
572: The relay branch has four parts: an edge
573: (A) which carries a flow of $I(X;\hY_1|X_1,Y)$, a sink (B) with consumption $I(Y_1;\hY_1|X_1,Y)$,
574: a relay source (C) with an output flow of $I(X_1;Y)$ and an edge (E) from the relay to the destination.
575: Here, the relay transmission to the destination (C) is done at a fixed rate $I(X_1;Y)$, independent of the type
576: of compression $p(\hy_1|x_1)$ used at the relay, since we always transmit from the relay to the destination
577: at the maximum possible rate in order to obtain the best performance.
578: The rate loss due to compression is represented by $I(\hY_1;Y_1|X_1,Y)$, since we consider only the excess
579: rates over the direct one.
580: 
581: Now, from the laws of flow addition and conservation, the overall flow from the source to the destination through
582: the relay branch is $i_E = i_A + i_B + i_C$. To assist the direct link (D) we need
583: the flow on (ABCE) to be positive. In theorem \ref{thm:CEG_EAF} the scheme considers only the last two elements,
584: $i_B + i_C$, and verifies that their net flow is positive, namely
585: \begin{equation}
586: \label{eqn:intuive_CEG}
587:     -I(Y_1;\hY_1|X_1,Y) + I(X_1;Y) > 0.
588: \end{equation}
589: This condition guarantees a net positive flow on (ABCE) since always $i_A \ge 0$.
590: Now, the flow to the destination can be obtained as the minimum
591: \begin{equation}
592: \label{eqn:intuit_CEG_Rate}
593:     R \le \min \left\{ i_D + i_E, i_T\right\},
594: \end{equation}
595: where, the second term in the minimum is obtained from the transmitter, since
596: trivially the information rate at the receiver cannot exceed $i_T$. We note that because $i_B + i_C \ge 0$, the minimum in \eqref{eqn:intuit_CEG_Rate}
597: is $i_T$. Therefore, the resulting achievable rate is
598: \[
599:     R \le I(X;Y,\hY_1|X_1),
600: \]
601: which combined with \eqref{eqn:intuive_CEG} gives the result of \cite[theorem 6]{CoverG:79}.
602: 
603: 
604: However, the condition in \eqref{eqn:intuive_CEG} is not tight since
605: even when $i_B + i_C < 0$ the  flow on (ABCE) is still non-negative if the entire sum $i_A + i_B + i_C$ is
606: non-negative, i.e.
607: \begin{equation}
608: \label{eqn:tighter_condition}
609:     I(X;\hY_1|X_1,Y) - I(\hY_1;Y_1|X_1,Y) + I(X_1;Y) \ge 0.
610: \end{equation}
611: Then, the achievable rate to the destination is bounded by
612: \begin{equation}
613: \label{eqn:intuit_jt_rate}
614:     R \le i_D + i_E = I(X;Y|X_1) + I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y).
615: \end{equation}
616: Indeed, when the flow through the relay branch (ABCE) is zero we obtain the
617: non-cooperative rate $I(X;Y|X_1)$.
618:  Plugging the expression \eqref{eqn:intuit_jt_rate} into \eqref{eqn:intuit_CEG_Rate} yields the following achievable rate:
619: \begin{eqnarray*}
620:     R  & \le & \min\left\{i_D + i_E , i_T\right\}\\
621:        &  =  & \min\left\{ I(X;Y|X_1) + I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y), I(X;Y,\hY_1|X_1)\right\}\\
622:        &  =  & I(X;Y|X_1) + \min\left\{  I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y), I(X;\hY_1|X_1,Y)\right\}.
623: %       &  =  & \min\left\{ I(X,X_1;Y)  - I(\hY_1;Y_1|X,X_1,Y), I(X;Y,\hY_1|X_1)\right\}.
624: \end{eqnarray*}
625: Combining this with \eqref{eqn:tighter_condition}, (informally) proves the following proposition:
626: \begin{proposition}
627:     \label{prop:jt-rate}
628:     \it
629:     For the general relay channel, any rate $R$ satisfying
630:     \begin{eqnarray*}
631:         R  & \le & I(X;Y|X_1) + \min\left\{  I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y), I(X;\hY_1|X_1,Y)\right\},\\
632:         \mbox{subject to } I(X_1;Y) & \ge & I(\hY_1;Y_1|X,X_1,Y) =  I(\hY_1;Y_1|X_1,Y) - I(X;\hY_1|X_1,Y),
633:     \end{eqnarray*}
634:     for some joint distribution $p(x,x_1,y,y_1,\hy_1) = p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$, is achievable.
635: \end{proposition}
636: \bigskip
637: The proof of proposition \ref{prop:jt-rate} can be made formal using joint-decoding at the destination receiver,
638: but as in the next subsection we show that this expression is a special case of \cite[theorem 6]{CoverG:79} obtained by time-sharing, we omit the
639: details of the proof here.
640: 
641: 
642: 
643: 
644: 
645: 
646: 
647: 
648: 
649: 
650: \subsection{Joint-Decoding and Time-Sharing}
651: In the original work of \cite[theorem 6]{CoverG:79}, the decoding
652: procedure at the destination receiver for decoding the message
653: $w_{i-1}$ at time $i$ is composed of three steps (the notations
654: below are identical to \cite[theorem 6]{CoverG:79}. The reader is referred to the proof of \cite[theorem 6]{CoverG:79}
655: to recall the definitions of the sets and variables used in the following description):
656: \begin{enumerate}
657:     \item Decode the relay index $s_i$ using $\yvec(i)$, the received signal at time $i$.
658:     \item Decode the relay message $z_{i-1}$, using $s_i$, the received
659:         signal $\yvec(i-1)$ and the previously decoded $s_{i-1}$.
660:     \item Decode the source message $w_{i-1}$ using $\yvec(i-1)$,
661:     $z_{i-1}$ and $s_{i-1}$.
662: \end{enumerate}
663: 
664: Evidently, when decoding the relay message $z_{i-1}$ at the second step, the receiver does not make use of
665: the statistical dependence
666: between $\hyvec_{1}(i-1)$, the relay sequence at time $i-1$, and $\xvec(w_{i-1})$, the transmitted source codeword at time $i-1$.
667: The way to use this dependence is to jointly decode $z_{i-1}$ and $w_{i-1}$ after decoding $s_i$ and $s_{i-1}$. The joint-decoding procedure
668: then has the following steps:
669:     \begin{enumerate}
670:         \item From $\yvec(i)$, the received signal at time $i$, the receiver decodes $s_i$ by looking for a unique
671:         $s \in \mS$, the set of indices used to select $\xvec_1$, such that $\big(\xvec_1(s), \yvec(i)\big) \in \styp$.
672:         As in \cite[theorem 6]{CoverG:79},
673:         the correct $s_i$ can be decoded with an arbitrarily small probability of error by taking $n$ large
674:         enough as long as
675:         \begin{equation}
676:         \label{eqn:R0_conds}
677:             R_0 \le I(X_1;Y),
678:         \end{equation}
679:         where $||\mS|| = 2^{nR_0}$.
680: 
681:         \item The receiver now knows the set $S_{s_i}$ into which $z_{i-1}$ (the relay message at time $i-1$) belongs.
682:         Additionally, from decoding at time $i-1$
683:         the receiver knows $s_{i-1}$, used to generate $z_{i-1}$.
684: 
685:         \item The receiver generates the set
686:         $\mL(i-1) = \left\{ w \in \mW: \big(\xvec(w), \yvec(i-1), \xvec_1(s_{i-1})\big) \in \styp\right\}$.
687: 
688:         \item The receiver now looks for a unique $w \in \mL(i-1)$ such that
689:             $\big( \xvec(w), \yvec(i-1), \hyvec_1(z|s_{i-1}), \xvec_1(s_{i-1})\big) \in \styp$ for some
690:             $z \in S_{s_i}$.
691:             If such a unique $w$ exists then it is the decoded $\hw_{i-1}$,
692:             otherwise the receiver declares an error.
693:     \end{enumerate}
694: We do not give here a formal proof for the resulting rate expression, but as indicated in section
695: \ref{sec:intuitive_explanation}, the rate expression resulting from this decoding procedure is given by
696: proposition \ref{prop:jt-rate}.
697: 
698: Let us now compare the the rates obtained with joint-decoding (proposition \ref{prop:jt-rate}) with the rates obtained with the sequential
699: decoding of \cite[thoerem 6]{CoverG:79}:
700: to that end we consider the joint-decoding result of proposition \ref{prop:jt-rate} with the extended probability chain of
701: \eqref{eqn:extended_prob_chain}:
702: \[
703: %    \label{eqn:extended_chain}
704:     p(x,x_1,y,y_1,\hy_1,\hhy_1) = p(x)p(x_1)p(y,y_1|x,x_1) p(\hy_1|x_1,y_1) p(\hhy_1 | \hy_1),
705: \]
706: where $\hhY_1$ represents the information relayed to the destination.
707: %Applying exactly the same steps as in the proof of proposition \ref{prop:jt-rate} we obtain the expression:
708: %\begin{eqnarray}
709: %    \label{eqn:rate_2_hats}
710: %        R & \le & I(X;Y|X_1)  + \min\left\{I(X_1;Y) - I(\hhY_1;Y_1|X,X_1,Y),  I(X;\hhY_1|X_1,Y)\right\}\\
711: %    \label{eqn:constr_2_hats}
712: %        \mbox{subject to }I(X_1;Y) & \ge & I(\hhY_1;Y_1|X,X_1,Y) = I(\hhY_1;Y_1|X_1,Y) - I(X;\hhY_1|X_1,Y).
713: %\end{eqnarray}
714: %Now consider the expressions in \eqref{eqn:rate_2_hats} and \eqref{eqn:constr_2_hats}. Setting $p(\hhy_1 | \hy_1)$ the same as in
715: %\eqref{eqn:assignment_hhy}
716: %subject to $q \in [0,1]$, we obtain that \eqref{eqn:rate_2_hats} and \eqref{eqn:constr_2_hats} become
717: Expanding the expressions of proposition \ref{prop:jt-rate} using the assignment \eqref{eqn:assignment_hhy}, similarly to proposition
718: \ref{prop:TAF}, we obtain the expressions:
719: \begin{eqnarray}
720:     \label{eqn:rate_2_hats_q}
721:         R & \le & I(X;Y|X_1)  + \min\left\{I(X_1;Y) - q I(\hY_1;Y_1|X,X_1,Y),  q I(X;\hY_1|X_1,Y)\right\}\\
722:     \label{eqn:constr_2_hats_q}
723:         \mbox{subject to }I(X_1;Y) & \ge & q I(\hY_1;Y_1|X,X_1,Y) = q\left(I(\hY_1;Y_1|X_1,Y) - I(X;\hY_1|X_1,Y)\right).
724: \end{eqnarray}
725: 
726: We can now make the following observations:
727: \begin{enumerate}
728:     \item Setting $q = 1$ we obtain proposition \ref{prop:jt-rate}. Additionally, if
729:         $I(X_1;Y) > I(\hY_1;Y_1|X_1,Y)$ then both proposition \ref{prop:jt-rate} and \cite[theorem 6]{CoverG:79} give
730:         identical expressions.
731:     \item When $q=1$ and
732:         \begin{equation}
733:         \label{eqn:cond_joint}
734:             I(\hY_1;Y_1|X_1,Y) - I(X;\hY_1|X_1,Y) < I(X_1;Y) < I(\hY_1;Y_1|X_1,Y),
735:         \end{equation}
736:         then {\em for the same} mapping $p(\hy_1|x_1, y_1)$ we obtain that proposition \ref{prop:jt-rate} provides rate but
737:         \cite[theorem 6]{CoverG:79} does not. The rate expression under these conditions is
738:         \begin{equation}
739:         \label{eqn:rate_q_is_one}
740:             R  \le  I(X;Y|X_1)  + I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y).
741:         \end{equation}
742: 
743:     \item
744:         Now, fix the probability chain $p(x)p(x_1)p(y,y_1|x,x_1)p(\hy_1|x_1,y_1)$ and  examine the expressions
745:         \eqref{eqn:rate_2_hats_q} and \eqref{eqn:constr_2_hats_q} when \eqref{eqn:cond_joint} holds:
746:         when $q < 1$, then \eqref{eqn:cond_joint}
747:         guarantees that condition \eqref{eqn:constr_2_hats_q} is still satisfied.
748:         If $q$ is close enough to $1$ such that we also have
749:         $I(X_1;Y) \le q I(\hY_1;Y_1|X_1,Y)$, the rate from \eqref{eqn:rate_2_hats_q}, i.e.,
750:         \[
751:             R \le  I(X;Y|X_1)  + I(X_1;Y) - qI(\hY_1;Y_1|X,X_1,Y),
752:         \]
753:         is now greater than \eqref{eqn:rate_q_is_one}. In this case can  keep decreasing $q$ until
754:         \begin{equation}
755:         \label{eqn:optim_q}
756:             I(X_1;Y) - qI(\hY_1;Y_1|X,X_1,Y) = qI(X;\hY_1|X_1,Y)
757:         \end{equation}
758:         at which point the rate becomes
759:         \begin{equation}
760:         \label{eqn:rate-jt-optim}
761:             R \le I(X;Y|X_1)  + q I(X;\hY_1|X_1,Y).
762:         \end{equation}
763:         This rate can be obtained from \cite[theorem 6]{CoverG:79} by applying the extended probability chain of \eqref{eqn:extended_prob_chain},
764:         as long as $I(X_1;Y) \ge q I(\hY_1,Y_1|X_1,Y)$.
765: \end{enumerate}
766: We therefore conclude that all the rates that joint decoding allows can also be obtained  or exceeded by the original EAF with an
767: appropriate time sharing\footnote{This argument is due to Shlomo Shamai and Gerhard Kramer.}.
768: 
769:     Note that equality in \eqref{eqn:optim_q} implies
770:     \[
771:         q_{opt} = \min\left\{1,\frac{I(X_1;Y)}{I(\hY_1;Y_1|X,X_1,Y)+ I(X;\hY_1|X_1,Y)}\right\}
772:             =\min\left\{1, \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)}\right\}
773:             ,
774:     \]
775:     hence $q_{opt}$ is the maximum $q$ that makes the mapping $p(\hy_1|x_1,y_1)$ feasible for \cite[theorem 6]{CoverG:79}.
776:     Plugging $q_{opt}$ into \eqref{eqn:rate-jt-optim}, we obtain the rate expression of proposition \ref{prop:TAF}.
777: 
778: 
779:     Finally, consider again the region where joint decoding is useful \eqref{eqn:cond_joint}:
780:     \begin{eqnarray*}
781:         I(\hY_1;Y_1|X,X_1,Y) & \le I(X_1;Y) \le &   I(\hY_1;Y_1|X_1,Y)\\
782:     \Rightarrow   0 & \le I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y) \le &   I(\hY_1;Y_1|X_1,Y) - I(\hY_1;Y_1|X,X_1,Y)\\
783:    \Rightarrow   0 & \le I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y) \le &    I(X_1;\hY_1|X_1,Y)\\
784:         \Rightarrow   0 & \le \frac{I(X_1;Y) - I(\hY_1;Y_1|X,X_1,Y)}{I(X;\hY_1|X_1,Y)} \le &   1.
785:     \end{eqnarray*}
786:     If $I(X;\hY_1|X_1,Y) > 0$, then using time-sharing on $\hY_1$ with
787:     \begin{equation}
788:     \label{eqn:assign_q_joint}
789:         q = \frac{I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y)}{I(X;\hY_1|X_1,Y)}
790:     \end{equation}
791:     into equations \eqref{eqn:rate_CEG_extended_chain} and \eqref{eqn:feasibility_CEG_extended_chain} yields:
792:     \[
793:         I(X;Y|X_1) + q I(X;\hY_1|X_1,Y) = I(X;Y|X_1) + I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y),
794:     \]
795:     as long as $I(X_1;Y) \ge qI(\hY_1;Y_1|X_1,Y)$, or equivalently
796:     \begin{equation}
797:     \label{eqn:cond_TS_CEG6}
798:         q \le \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)}.
799:     \end{equation}
800:     Plugging assignment \eqref{eqn:assign_q_joint} into \eqref{eqn:cond_TS_CEG6} we obtain:
801:     \begin{eqnarray*}
802:         \frac{I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y)}{I(X;\hY_1|X_1,Y)} & \le & \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)}\\
803:         \Rightarrow \quad \left(I(X_1;Y)-I(\hY_1;Y_1|X,X_1,Y)\right)I(\hY_1;Y_1|X_1,Y)
804:             & \le & I(X_1;Y)I(X;\hY_1|X_1,Y)\\
805:         \Rightarrow \quad I(X_1;Y)I(\hY_1;Y_1|X_1,Y)- I(X_1;Y)I(X ;\hY_1|X_1,Y)
806:             & \le & I(\hY_1;Y_1|X ,X_1,Y)I(\hY_1;Y_1|X_1,Y)\\
807:         \Rightarrow \quad I(X_1;Y)I(\hY_1;Y_1|X ,X_1,Y) & \le & I(\hY_1;Y_1|X ,X_1,Y)I(\hY_1;Y_1|X_1,Y)\\
808:         \Rightarrow \quad I(X_1;Y) & \le & I(\hY_1;Y_1|X_1,Y),
809:     \end{eqnarray*}
810:     as long as $I(\hY_1;Y_1|X ,X_1,Y) > 0$,
811:     which is the region where joint-decoding is supposed to be useful.
812:     Hence the joint-decoding rate of proposition \ref{prop:jt-rate} can be obtained by time sharing
813:     on the \cite[theorem 6]{CoverG:79} expression. Therefore, joint-decoding does not improve on the
814:     rate of \cite[theorem 6]{CoverG:79}. In fact the rate of proposition \ref{prop:TAF} is always at least as large as
815:     that of proposition \ref{prop:jt-rate}.
816: 
817: 
818: 
819: 
820: 
821: 
822: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
823: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
824: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
825: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
826: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
827: 
828: 
829: 
830: 
831: 
832: 
833: 
834: 
835: 
836: 
837: 
838: 
839: \section{An Achievable Rate for the Relay Channel with Multiple Relays}
840: \label{sec:MultipleRelays}
841: When the source-relay
842: % The multiple-relay channel was studied by in \cite{GuptaKumar:2003}, \cite{XieKumar:2004}, \cite{XieKumar:2005}
843: % and \cite{Kramer:2003}. These results are based on decoding at the relays (according to an hierarchy)
844: % prior to generating the relay codeword. However, when the
845: channel is very noisy then,
846: as discussed in the introduction, it may be better not to use the relay at all than to
847: employ the decode-and-forward strategy. Alternatively, when decode-and-forward is not useful, one
848: could employ estimate-and-forward. One result for multiple relays based on EAF can be found in
849: \cite{Gastpar:2002} which considered the two-relay case. In \cite[theorem 3]{Kramer:2005} the EAF
850: strategy, with partial decoding was applied to the multiple-relay case, and in \cite[theorem 4]{Kramer:2005} a mixed
851: EAF and DAF strategy was applied.
852: However, as stated in \cite[remark 22, remark 23]{Kramer:2005} applying the general estimate-and-forward to
853: a network with an arbitrary number of relays
854: is computationally impractical due to the large number of constraints that
855: characterize the feasible region (for two relays
856: we need to satisfy $9$ constraints). Moreover, the rate computation is prohibitive since
857: it would imply solving a non-convex optimization problem. In conclusion, an alternative achievable rate
858: to that based on decode-and-forward, which can also be evaluated with a reasonable effort, has not been presented to date.
859: In this section we derive an explicit achievable rate based on estimate-and-forward.
860: The strategy we use is to pick the auxiliary random variable
861: such that the feasibility constraints are satisfied. This is not a trivial choice since setting the
862: auxiliary random variable in theorem \ref{thm:CEG_EAF} to be the relay channel output (i.e. $\hY_1 = Y_1$) does
863: not remove this constraint, and we therefore need to incorporate time-sharing as discussed in the following.
864: 
865: 
866: 
867: 
868: 
869: 
870: 
871: 
872: \subsection{A General Achievable Rate}
873: \label{sec:achieve_general}
874: %The rate derived in the previous section is based on separate decoding of the pair $(\xvec_i, \hyvec_i)$ for
875: %the relay $i$. However, we can improve on this rate if, when decoding the information for relay $i$, we
876: %use all the information obtained from decoding for previous relays. This results in the following theorem:
877: We extend the idea of section \ref{sec:ts-single-subsec} to the relay channel with $N$ relays. This channel consists of
878: a source with channel input $X$, $N$ relays where for relay $i$, $X_i$ denotes the channel input and $Y_i$ denotes the channel output,
879: and a destination with channel output $Y$. This channel is denoted by
880: $\left( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N), \mY \times_{i=1}^N \mY_i\right)$.
881: Let $\Xvec = \left(X_1,X_2,...,X_N\right)$ and $\Yvec = \left(Y_1,Y_2,...,Y_N\right)$. We now have the
882: following theorem:
883: \begin{theorem}
884:     \label{thm:achieve_N_result_2}
885:     \it
886:     For the general multiple-relay channel with $N$ relays,
887:         $\Big( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N),$ ${\mY \times_{i=1}^N \mY_i}\Big)$, any rate $R$ satisfying
888:         \[
889:             R \le I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y),
890:         \]
891:         where $\Bt$ is an $N$-element vector that contains $'1'$ in the locations where the $N$-bit binary representation
892:         of the integer $\theta$ contains $'1'$,
893:         $P(\Bt) = \prod_{i:\Bt_i = 0} (1-q_i) \prod_{i:\Bt_i = 1} q_i$, $\Bt_i$ is the $i$'th bit in the
894:         $N$-bit binary representation of $\theta$,
895:         $\Yvec_{\Bt} = \left(Y_{i_1}, Y_{i_2},...,Y_{i_M} \right)$, where $i_1$, $i_2$, ..., $i_M$ are
896:         the locations of the $'1'$ in $\Bt$, and
897:         \begin{equation}
898:             \label{eqn:q_i_assgn_full_thm}
899:   %     q_i = \left[\frac{\sum_{j = 1}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)) }
900:            q_i = \left[\frac{I(X_i;Y|\tZvec_i) }
901:                         {H(Y_i|\Xvec,Y) -\sum_{j = 1}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;\tYvec_{l',\Bj}(\tTvec_i)|\Xvec,Y)}
902:             \right]^*,
903:         \end{equation}
904:                 for the joint distribution
905:         $p(x,x_1,x_2,...,x_N,y,y_1,y_2,...,y_N)=p(x)p(x_1)...p(x_N)p(y,y_1,...,y_N|x,x_1,...,x_N)$ is achievable.
906:         In \eqref{eqn:q_i_assgn_full_thm} $\tZvec_i$ is the vector containing all the variables
907:         $X_j$ decoded prior to decoding $X_i$, $\tTvec_i$ is a vector that contains all the variables $\hY_p$ decoded
908:         prior to decoding $\hY_i$, and $\tYvec_{l',\Bj}(\tTvec_i)$ contains all the $Y_{l_r'}$, such that
909:          $\hY_{l_r'} \in \tTvec_i$, and $r$ is a location of  $\; '1'$ in the $L_i'$-bit binary representation of $j$.
910:          $L_i'$ if the number of elements in $\tTvec_i$. Note that if $\hY_p \in \tTvec_i$ then we must have
911:          $X_p \in \tZvec_i$.
912: 
913: \end{theorem}
914: 
915:         To facilitate the understanding of the expressions in theorem \ref{thm:achieve_N_result_2}, we first look at a simplified case
916:         where the destination decodes each relay message independently of the messages of the other relays.
917: %Therefore, each $s_{i,k}$ is decoded using only $\yvec(k)$
918: %and each $\mL_i(k-1)$ is generated based only on $\xvec_1(s_{1,k-1}),\xvec_2(s_{2,k-1}),...,\xvec_N(s_{N,k-1})$ and $\yvec(k-1)$.
919: %This is the simplest implementation of the multi-relay EAF strategy.
920: This can be obtained from theorem
921: \ref{thm:achieve_N_result_2} by setting $\tZvec_i = \varnothing$ and $\tTvec_i = \varnothing$, $i = 1,2,...,N$. The result is summarized in the
922: following corollary:
923: \begin{corollary}
924:     \label{corr:achieve_N_result_1}
925:     \it
926:     For the general multiple-relay channel
927:         $\left( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N), \mY \times_{i=1}^N \mY_i\right)$, any rate $R$ satisfying
928:         \begin{equation}
929:         \label{eqn:rate_expression_multi_relay}
930:             R \le I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y),
931:         \end{equation}
932:         is achievable,
933: %        where $\Bt$ is an $N$-element vector that contains $'1'$ in the location where the $N$-bit binary representation
934: %        of the integer $\theta$ contains $'1'$,
935: %        $P(\Bt) = \prod_{i:\Bt_i = 0} (1-q_i) \prod_{i:\Bt_i = 1} q_i$,
936: %        $\Yvec_{\Bt} = \left(Y_{i_1}, Y_{i_2},...,Y_{i_M} \right)$, where $i_1$, $i_2$, ..., $i_M$ are
937: %        the locations of the $'1'$ in $\Bt$, and
938:         where
939:         \begin{equation}
940:             \label{eqn:q_i_assgn_simple}
941:             q_i = \left[ \frac{I(X_i;Y)}{H(Y_i|\Xvec,Y)} \right]^*,
942:         \end{equation}
943:         for the joint distribution
944:         $p(x,x_1,x_2,...,x_N,y,y_1,y_2,...,y_N)=p(x)p(x_1)...p(x_N)p(y,y_1,...,y_N|x,x_1,...,x_N)$.
945: \end{corollary}
946: \bigskip
947: 
948:     In the multi-relay strategy we employ in this section
949:     each relay transmits its channel output $Y_i$ with probability $q_i$, independent of the other relays.
950:     Therefore, when considering a group of $N$ relays,
951:     the probability that any subgroup of relays will transmit their channel outputs simultaneously is simply the product of all transmission
952:     probabilities $q_i$ at each relay in the group, multiplied by the product of erasure probabilities $(1-q_i)$ for each relay in the complement
953:     group. Now, considering the rate expression of \eqref{eqn:rate_expression_multi_relay} we observe
954:     that the rate is obtained by taking all possible groupings of relays. For each grouping the resulting rate is the
955:     rate obtained when using all the channel outputs of all the relays in that group to assist in decoding. This is indicated by the
956:     term $\Yvec_{\Bt}$. This rate has to be weighted by the probability of such an overlap occurring, which is given by $P(\Bt)$.
957:     We then sum over all such groupings to obtain the achievable rate. The parameter $q_i$ for each relay, which is determined by
958:     \eqref{eqn:q_i_assgn_simple}, can be interpreted by considering the terms in the denominator and numerator: the denominator
959:     $H(Y_i|\Xvec,Y)$ is the (exponent of the) size of uncertainty at the destination receiver about relay $i$'s output $Y_i$. The numerator is
960:     the (exponent of the) size of the information set that can be transmitted from relay $i$ to the destination receiver. Therefore, the fraction
961:     $\frac{I(X_i;Y)}{H(Y_i|\Xvec,Y)}$
962:      can be interpreted as the maximal fraction of the uncertainty at the destination about relay $i$'s channel output $Y_i$,
963:      that can be compensated by the relay transmission. Of course, this faction has to be upper bounded by one. In the more general setup
964:      of theorem \ref{thm:achieve_N_result_2}, the decoding of the relay information from relay $i$ is done by using the information
965:      from the relays which were decoded before relay $i$ to assist in decoding. This results in the conditioning at the numerator and
966:      the negative terms in the denominator, both contribute to increasing the value of $q_i$.
967: 
968: 
969: \subsection{Proof of Theorem \ref{thm:achieve_N_result_2}}
970:     \subsubsection{Overview of Coding Strategy}
971:     The transmitter generates its codebook independent of the relays. Next, each relay generates
972:     its own codebook independent of the other relays following the construction of \cite[theorem 6]{CoverG:79}, with the mapping
973:     $p(\hy_i|x_i,y_i)$ at each relay set to the time-sharing mapping of \eqref{eqn:time-sharing-mapping} with parameter
974:     $q_i$. The destination receiver first needs to decode all the relay codewords $\left\{X_i^n\right\}_{i=1}^N$ and use this information to decode the relay messages
975:     $\left\{\hY_i^n\right\}_{i=1}^N$. To this end, the relay decides on a decoding order for the $X_i^n$ sequences and
976:     a decoding order for the $\hY_i^n$ sequences. These decoding orders determine the maximum value of $q_i$ that can be selected for each relay,
977:     thereby allowing us to determine the auxiliary variables' mappings and obtain an explicit rate expression. Finally, the receiver uses all the
978:     decoded $\left\{X_i^n\right\}_{i=1}^N$ and $\left\{\hY_i^n\right\}_{i=1}^N$ sequences, together with its channel input to decode the
979:     source message.
980: 
981:     \bigskip
982:     We now give the details of the construction:
983:     fix the distributions $p(x)$, $p(x_1)$, $p(x_2)$,...,$p(x_N)$, and
984:     \begin{equation}
985:     \label{eqn:aux_assign_simple}
986:         p(\hy_i|x_i,y_i) = \left\{
987:             \begin{array}{cl}
988:                 q_i &, \hy_i = y_i\\
989:                 1-q_i & ,\hy_i = \Omega \notin \mY_i
990:             \end{array}
991:         \right.,
992:     \end{equation}
993:     $i = 1,2,...,N$. Let $\mW = \left\{1,2,...,2^{nR}\right\}$ be the source message set.
994:     \subsubsection{Code Construction at the Transmitter and the Relays}
995:     \begin{itemize}
996:         \item  Code construction and transmission at the transmitter are the same as in \cite[theorem 6]{CoverG:79}.
997: 
998:         \item Code construction at the relays is done by repeating the relay code construction
999:             of \cite[theorem 6]{CoverG:79}
1000:             for each relay, where relay $i$ uses the distributions $p(\hy_i|x_i,y_i)$ and
1001:             $p(x_i)$. We denote the relay message, the transmitted message and the partition
1002:             set at relay $i$ at time $k$ with $z_{i,k}$, $s_{i,k}$ and $S^{(i)}_{s_{i,k}}$ respectively. The message set
1003:             for $s_i$ is denoted $\mW_i$, where $||\mW_i|| = 2^{n R_i}$. The message set for $z_i$ is denoted
1004:             $\mW_i'$, $||\mW_i'|| = 2^{n R_i'}$. The relay codewords at relay $i$ are denoted $\hyvec_i(z_i|s_i)$, and
1005:             the transmitted codewords at relay $i$ are denoted $\xvec_i(s_i)$, $s_i \in \mW_i$, $z_i \in \mW_i'$.
1006:     \end{itemize}
1007: 
1008:     \subsubsection{Decoding and Encoding at the Relays}$ $
1009: 
1010:     Consider relay $i$ at time $k-1$:
1011:     \begin{itemize}
1012:         \item From the relay transmission at time $k-1$, the relay knows $s_{i,k-1}$. Now the relay looks for a message
1013:             $z_i \in \mW_i'$, such that
1014:             \[
1015:                 \big(\hyvec_i(z_i|s_{i,k-1}), \yvec_i(k-1), \xvec_i(s_{i,k-1}) \big) \in \styp(\hY_i, Y_i,X_i).
1016:             \]
1017:             Following the argument in \cite[theorem 6]{CoverG:79}, for $n$ large enough there is such a message $z_i$ with
1018:             a probability that is arbitrarily close to $1$, as long as
1019:             \begin{equation}
1020:             \label{eqn:relay_rate_constr_1}
1021:                 R_i' > I(\hY_i;Y_i|X_i) + \eps = q_iH(Y_i|X_i) + \eps.
1022:             \end{equation}
1023:         Denote this message with $z_{i,k-1}$.
1024:         \item Let $s_{i,k}$ be the index of the partition of $\mW_i'$ into which $z_{i,k-1}$ belongs, i.e.,
1025:             $z_{i,k-1} \in S^{(i)}_{s_{i,k}}$.
1026: 
1027:         \item At time $k$ relay $i$ transmits $\xvec_i(s_{i,k})$.
1028:     \end{itemize}
1029: 
1030:     \subsubsection{Decoding at the Destination}
1031: %    Therefore, at each relay we have the rate constraint \ref{eqn:relay_rate_constr_1}, i.e.
1032: %    \[
1033: %        R'_i > q_i H(Y_i|X_i) + \eps
1034: %    \]
1035:     \begin{itemize}
1036:     \item Consider the decoding of $w_{k-1}$ at time $k$, for a
1037:     fixed decoding order: let $\tZvec_i$ contain all the $X_j$'s whose $s_{j,k}$'s are decoded prior
1038:     to decoding $s_{i,k}$. Therefore, decoding $s_{i,k}$ is done by looking for a unique message $s_i \in \mW_i$
1039:     such that
1040:     \begin{eqnarray*}
1041:        &  &  \big(\xvec_i(s_{i}),\xvec_{m_1}(s_{m_1,k}), \xvec_{m_2}(s_{m_2,k}),...,\xvec_{m_{M_i}}(s_{m_{M_i},k}),\yvec(k)
1042:        \big) \in \styp(X_i,\tZvec_i,Y),
1043: %       &  &  \qquad\qquad \hyvec_{l_1}(z_{l_1,k-1}|s_{l_1,k-1}),\hyvec_{l_2}(z_{l_2,k-1}|s_{l_2,k-1}),...,
1044: %            \hyvec_{l_{L_i}}(z_{l_{L_i},k-1}|s_{l_{L_i},k-1})\big) \in \styp(X_i,Y,\tZvec_i),
1045:     \end{eqnarray*}
1046:     where $m_1$, $m_2$,...,$m_{M_i}$ enumerate all the $X_j$'s in
1047:     $\tZvec_i = \left(X_{m_1},X_{m_2},...X_{m_{M_i}} \right)$.
1048: %    and $l_1$, $l_2$,...,$l_{L_i}$ enumerate all the $\hY_l$'s in $\tZvec_i$. Of course, if $\hY_l$ is in
1049: %    $\tZvec_i$ then also $X_l$ must be there (i.e. we use only legal orderings).
1050:     Assuming correct decoding at the previous steps, then by the point-to-point channel achievability proof
1051:     we obtain that the probability of error for decoding
1052:     $s_{i,k}$ can be made arbitrarily small by taking $n$ large enough as long as
1053:     \begin{equation}
1054:         R_i < I(X_i;Y,\tZvec_i) - \eps = I(X_i;Y|\tZvec_i) - \eps.
1055:     \end{equation}
1056: \begin{comment}
1057:                 Evaluating $I(X_i;Y|\tZvec_i)$ we can write
1058:                 \begin{eqnarray*}
1059:                     I(X_i;Y|\tZvec_i) & = &  I(X_i;Y|\tYvec_i(\tZvec_i),\tXvec_i(\tZvec_i))\\
1060:                                       & = &  H(X_i|\tYvec_i(\tZvec_i),\tXvec_i(\tZvec_i)) - H(X_i|\tYvec_i(\tZvec_i),\tXvec_i(\tZvec_i),Y)\\
1061:                                       & = &  (1-q_{l_1})(H(X_i|\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i)) - H(X_i|\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i),Y)) +\\
1062:                     &  & \qquad\qquad             q_{l_1}(H(X_i|Y_1,\tYvec_{i,2}^{L_i},\tXvec_i(\tZvec_i)) - H(X_i|Y_1,\tYvec_{i,2}^{L_i},\tXvec_i(\tZvec_i),Y))\\
1063:                                       & = &  (1-q_{l_1}) I(X_i;Y|\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i))+
1064:                                             q_{l_1}I(X_i;Y|Y_1,\tYvec_{i,2}^{L_i}(\tZvec_i),\tXvec_i(\tZvec_i))\\
1065:                                       & ... & \\
1066:                                       & = & \sum_{j = 1}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)),
1067:                 \end{eqnarray*}
1068:                 \[
1069:                     P_l(\Bj) = \left\{ \begin{array}{cl}
1070:                                             q_{l_i}, & \Bj_i = 1\\
1071:                                         1 - q_{l_i}, & \Bj_i = 0
1072:                                         \end{array}
1073:                                         \right.,
1074:                 \]
1075:                 $\tYvec_{l,{\Bj}}(\tZvec_i) = (Y_{l_{m_1}}, Y_{l_{m_2}},...,Y_{l_{m_M}})$, $m_1, m_2,...,m_M$ are the locations
1076:                 of $'1'$ in the binary representation of $j$ and $l_i$'s are the indices of the elements of $\Yvec$ in $\tZvec_i$.
1077:                 For example, if $j = 3$ then $m_1 = m_3 = 1$ and $m_2 = 0$. Then,
1078:                 \begin{eqnarray*}
1079:                     P_l(\mbox{Bin}(3)) & = & q_{l_1}(1-q_{l_2})q_{l_3},\\
1080:                     \tYvec_{l,{\mbox{Bin}(3)}}(\tZvec_i)) & = & (Y_{l_1}, Y_{l_3}).
1081:                 \end{eqnarray*}
1082:                 Now, decoding $z_{i,k-1}$ is done only after decoding $s_{i,k}$ therefore the vector
1083:                 of priors should contain $X_i$. Also, if $\hY_j \in \tTvec_i$ then also $X_j \in \tTvec_i$. Now,
1084:                 the destination decodes $z_{i,k-1}$ in the following way:
1085: 
1086: \end{comment}
1087:     Let $\tTvec_i$ contain all the $\hY_{l'}$'s whose $z_{l',k-1}$'s are decoded prior to decoding $z_{i,k-1}$. Note that all
1088:     the $\left\{s_{i,k-1}\right\}_{i=1}^N$ were already decoded at the previous time interval when $w_{k-2}$ was decoded.
1089: 
1090:         \item The destination generates the set
1091:             \begin{eqnarray}
1092:             &  & \mL_i(k-1) = \bigg\{ z_i \in \mW'_i : \big(\yvec(k-1), \hyvec_i(z_i|s_{i,k-1}),
1093:                 \hyvec_{l'_1}(z_{l'_1,k-1}|s_{l'_1,k-1}), ...,\hyvec_{l'_{L'_i}}(z_{l'_{L'_i},k-1}|s_{l'_{L'_i},k-1}),\nonumber\\
1094:             \label{eqn:set_for_multi_relay_EAF}
1095:             &  & \qquad \qquad \qquad \qquad\qquad\qquad
1096: %                    \xvec_{m'_1}(s_{m'_1,k-1}),...,\xvec_{m'_{M'_i}}(s_{m'_{M'_i},k-1}) \big) \in \styp(\hY_i,Y,\tTvec_i)\bigg\},
1097:                     \xvec_{1}(s_{1,k-1}),\xvec_{2}(s_{2,k-1}),...,\xvec_{N}(s_{N,k-1}) \big) \in \styp(Y,\hY_i,\tTvec_i,\Xvec)\bigg\},
1098:             \end{eqnarray}
1099:             where
1100: %            $m'_1$, $m'_2$,...,$m'_{M'_i}$ enumerate all the $X_j$'s in $\tTvec_i$ and
1101:             $l'_1$, $l'_2$,...,$l'_{L_i}$ enumerate all the $\hY_{l'}$'s in $\tTvec_i$.
1102:             The average size of $\mL_i(k-1)$ can be bounded using the standard technique of
1103:             \cite[equation (36)]{CoverG:79} and the fact that when $z_i \ne z_{i,k-1}$, then the corresponding
1104:             $\hyvec_i(z_i|s_{i,k-1})$ is independent of all the variables in \eqref{eqn:set_for_multi_relay_EAF}
1105:             except $\xvec_i(s_{i,k-1})$. The resulting bound is
1106:             \[
1107:                  E\left\{||\mL_i(k-1)||\right\} \le 1 + 2^{n(R_i' - I(\hY_i;Y,\Xvec_{-i},\tTvec_i|X_i) + 3\eps)},
1108:             \]
1109:             where $\Xvec_{-i}$ is an $N-1$ element vector that contains all the elements of $\Xvec$ except $X_i$.
1110: %    \begin{figure}[ht]
1111: %         \epsfxsize=0.26\textwidth \leavevmode\centering\epsffile{Markov_chain.eps}
1112: %        \caption{The Markov relationship between the random variables used for decoding $z_i$, for the case
1113: %        of $L_i' = 2$. Edges in the figure represent Markov relationship.}
1114: %        \label{fig:Markov-relation}
1115: %    \end{figure}
1116: 
1117:         \item Now, the destination looks for a unique $z_{i} \in \mL_i(k-1) \bigcap S^{(i)}_{s_{i,k}}$.
1118:             Therefore, making the probability of error arbitrarily small by taking $n$ large enough
1119:             can be done as long as
1120:             \begin{equation}
1121:             \label{eqn:Ri'_upper_bound}
1122:                 R'_i < I(\hY_i;Y,\Xvec_{-i},\tTvec_i|X_i) + I(X_i;Y|\tZvec_i) -  4\eps.
1123:             \end{equation}
1124:     \end{itemize}
1125:      We note that using the assignment \eqref{eqn:aux_assign_simple} we can write
1126:          \begin{eqnarray*}
1127:              I(\hY_i;Y,\Xvec_{-i},\tTvec_i|X_i) & =  & H(Y,\Xvec_{-i},\tTvec_i|X_i) - H(Y,\Xvec_{-i},\tTvec_i|X_i,\hY_i)\\
1128:                     & = & H(Y,\Xvec_{-i},\tTvec_i|X_i)  - (1-q_i)H(Y,\Xvec_{-i},\tTvec_i|X_i) - q_i H(Y,\Xvec_{-i},\tTvec_i|X_i,Y_i)\\
1129:                     & = & q_iH(Y,\Xvec_{-i},\tTvec_i|X_i)  - q_i H(Y,\Xvec_{-i},\tTvec_i|X_i,Y_i)\\
1130:                     & = & q_i I(Y_i;Y,\Xvec_{-i},\tTvec_i|X_i)\\
1131:                     & = & q_i\left(H(Y_i|X_i) - H(Y_i|Y,\Xvec_{-i},X_i, \hY_{l_1'},\tTvec_{i,2}^{L_i'})  \right)\\
1132:                     & = & q_i\Big(q_{l_1'}H(Y_i|X_i) + (1-q_{l_1'}) H(Y_i|X_i)\\
1133:                     &   & \qquad \qquad  - q_{l_1'}H(Y_i|Y,\Xvec_{-i},X_i, Y_{l_1'},\tTvec_{i,2}^{L_i'})   - (1-q_{l_1'})H(Y_i|Y,\Xvec_{-i},X_i,\tTvec_{i,2}^{L_i'})\Big)\\
1134:                     & = & q_i\Big(q_{l_1'}I(Y_i;Y,\Xvec_{-i}, Y_{l_1'},\tTvec_{i,2}^{L_i'}|X_i)
1135:                              + (1-q_{l_1'}) I(Y_i;Y,\Xvec_{-i}, \tTvec_{i,2}^{L_i'}|X_i)\Big)\\
1136:                     & ... &\\
1137:                     & = & q_i \sum_{j = 0}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;Y,\Xvec_{-i} ,\tYvec_{l',\Bj}(\tTvec_i)|X_i),
1138:          \end{eqnarray*}
1139:     where $P_{l'}(\Bj) = \prod_{r:\Bj_{r} = 1} q_{l'_r} \times \prod_{r:\Bj_{r} = 0}(1- q_{l'_r})$,
1140:     $\Bj_{r}$ is the $r$-th bit of the $L_i'$-bit binary representation of $j$, and
1141:     $\tYvec_{l',\Bj}(\tTvec_i) = \left(Y_{l_{n_1}'}, Y_{l_{n_2}'},...,Y_{l_{n_M}'}\right)$,
1142:     $n_1, n_2,...,n_M$ are the locations of '1' in the $L_i'$-bit binary representation of $j$, and
1143:     $l_{n_1}', l_{n_2}',...,l_{n_M}'$ are the indices of the $\hY_i$'s in locations $n_1, n_2,...,n_M$ in $\tTvec_i$.
1144:     For example, if $L_i' = 3$ and $j = 3$ then $\mbox{Bin}_3(3) = (1,0,1)$ and $M = 2$,
1145:     $n_1 = 1, n_2 = 3$. Letting $\tTvec_i = \left(\hY_3,\hY_1,\hY_2\right)$
1146:     then $l_1' = 3, l_2' = 1$ and $l_3' = 2$, and
1147:                 \begin{eqnarray*}
1148:                     P_{l'}(\mbox{Bin}_3(3)) & = & q_{l_1'}(1-q_{l_2'})q_{l_3'},\\
1149:                     \tYvec_{l',{\mbox{Bin}_3(3)}}(\tTvec_i)) & = & (Y_{l_1'}, Y_{l_3'}) = (Y_3,Y_2).
1150:                 \end{eqnarray*}
1151: 
1152:     \subsubsection{Combining the Bounds on $R'_i$}
1153:     Applying the above scheme requires that $R'_i$ satisfies \eqref{eqn:relay_rate_constr_1} and
1154:     \eqref{eqn:Ri'_upper_bound}:
1155:     \begin{eqnarray*}
1156:         q_i H(Y_i|X_i) + \eps < R'_i & < & q_i \sum_{j = 0}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;Y,\Xvec_{-i} ,\tYvec_{l',\Bj}(\tTvec_i)|X_i)
1157:             + I(X_i;Y|\tZvec_i) - 4\eps,
1158: %            &  & \qquad + \sum_{j = 0}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)) - 4\eps
1159:     \end{eqnarray*}
1160:     which is satisfied if
1161:     \begin{eqnarray*}
1162: %        q_i \le \frac{\sum_{j = 1}^{L_i} P_l(\Bj)I(X_i;Y|\tXvec_i(\tZvec_i),\tYvec_{l,{\Bj}}(\tZvec_i)) - 5\eps}
1163: %                        {H(Y_i|X_i) -\sum_{j = 1}^{L'_i} P_{l'}(\Bj)I(Y_i;Y|\tXvec_i(\tTvec_i),\tYvec_{l',\Bj}(\tTvec_i))}
1164:         q_i & < & \frac{I(X_i;Y|\tZvec_i) - 5\eps}
1165:                         {H(Y_i|X_i) -\sum_{j = 0}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;Y,\Xvec_{-i} ,\tYvec_{l',\Bj}(\tTvec_i)|X_i)}\\
1166:         & = & \frac{I(X_i;Y|\tZvec_i) - 5\eps}
1167:                         {H(Y_i|X_i) - I(Y_i;Y,\Xvec_{-i}|X_i) -\sum_{j = 1}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;\tYvec_{l',\Bj}(\tTvec_i)|\Xvec,Y)}\\
1168:         & = & \frac{I(X_i;Y|\tZvec_i) - 5\eps}
1169:                         {H(Y_i|\Xvec,Y) -\sum_{j = 1}^{2^{L'_i}-1} P_{l'}(\Bj)I(Y_i;\tYvec_{l',\Bj}(\tTvec_i)|\Xvec,Y)}.
1170:     \end{eqnarray*}
1171:     Combining with the constraint $0 \le q_i \le 1$ gives the condition in \eqref{eqn:q_i_assgn_full_thm}.
1172: 
1173:     Finally, %having set all the $q_i$'s, $i=1,2,...,N$
1174:     the achievable rate is obtained as
1175:     follows: using the decoded $\left\{\hyvec_i(z_{i,k-1}|s_{i,k-1})\right\}_{i=1}^N$ (assuming
1176:     correct decoding of all $\left\{z_{i,k-1} \right\}_{i=1}^N$) the receiver decodes the source
1177:     message $w_{k-1}$ by looking for a message $w \in \mW$ such that
1178:     \begin{eqnarray*}
1179:        &  &\Big(\xvec(w), \hyvec_1(z_{1,k-1}|s_{1,k-1}), \hyvec_2(z_{2,k-1}|s_{2,k-1}),...,
1180:             , \hyvec_N(z_{N,k-1}|s_{N,k-1}),\\
1181:        &  &\qquad \qquad       \xvec_1(s_{1,k-1}\big),\xvec_2(s_{2,k-1}\big),...,\xvec_N(s_{N,k-1}),\yvec(k-1)\Big)
1182:         \in \styp(X,\hYvec,\Xvec,Y),
1183:     \end{eqnarray*}
1184:     where $\hYvec = \left(\hY_1, \hY_2,...,\hY_N\right)$.
1185:     This results in an achievable rate of
1186:     \[
1187:         R \le I(X;Y,\hYvec,\Xvec) = I(X;Y,\hYvec|\Xvec).
1188:     \]
1189:     Plugging in the assignments of all the $\hY_i$'s, we get the following explicit rate expression:
1190:     \begin{eqnarray*}
1191:             I(X;Y,\hYvec|\Xvec) & = & I(X;Y|\Xvec) + I(X;\hYvec|\Xvec,Y)\\
1192:             & = & I(X;Y|\Xvec) + H(X|\Xvec,Y)  -  H(X|\Xvec,Y,\hYvec)\\
1193:             & = & I(X;Y|\Xvec) + H(X|\Xvec,Y)  -  (1-q_1) H(X|\Xvec,Y,\hYvec_2^N) - q_1 H(X|\Xvec,Y,\hYvec_2^N,Y_1)\\
1194:             & = & I(X;Y|\Xvec) +  (1-q_1) I(X;\hYvec_2^N|\Xvec,Y) + q_1 I(X;\hYvec_2^N,Y_1|\Xvec,Y)\\
1195:             & ... &\\
1196:             & = & I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y).
1197:         \end{eqnarray*}
1198: \tend
1199: 
1200: 
1201: 
1202: 
1203: 
1204: 
1205: \begin{comment}
1206: \subsection{An Achievable Rate Using Separate Decoding of the Auxiliary Variables}
1207: \label{sec:achieve_with_individual_assignments}
1208: A simple rate expression is obtained when the receiver decodes each $\xvec_i$ sequence and each $\hyvec_i$ sequence
1209:  without using the previously decoded sequences to improve decoding. Therefore, each $s_{i,k}$ is decoded using only $\yvec(k)$
1210: and each $\mL_i(k-1)$ is generated based only on $\xvec_1(s_{1,k-1}),\xvec_2(s_{2,k-1}),...,\xvec_N(s_{N,k-1})$ and $\yvec(k-1)$.
1211: This is the simplest implementation of the multi-relay EAF strategy. This can be obtained from theorem
1212: \ref{thm:achieve_N_result_2} by setting $\tZvec_i = \varnothing$ and $\tTvec_i = \varnothing$, $i = 1,2,...,N$. The result is summarized in the
1213: following corollary:
1214: \begin{corollary}
1215:     \label{corr:achieve_N_result_1}
1216:     \it
1217:     For the general multiple-relay channel
1218:         $\left( \mX \times_{i=1}^N \mX_i,p(y,y_1,...,y_N|x,x_1,...,x_N), \mY \times_{i=1}^N \mY_i\right)$, any rate $R$ satisfying
1219:         \[
1220:             R \le I(X;Y|\Xvec) + \sum_{\theta = 1}^{2^N-1} P(\Bt)I(X;\Yvec_{\Bt}|\Xvec,Y),
1221:         \]
1222:         is achievable,
1223: %        where $\Bt$ is an $N$-element vector that contains $'1'$ in the location where the $N$-bit binary representation
1224: %        of the integer $\theta$ contains $'1'$,
1225: %        $P(\Bt) = \prod_{i:\Bt_i = 0} (1-q_i) \prod_{i:\Bt_i = 1} q_i$,
1226: %        $\Yvec_{\Bt} = \left(Y_{i_1}, Y_{i_2},...,Y_{i_M} \right)$, where $i_1$, $i_2$, ..., $i_M$ are
1227: %        the locations of the $'1'$ in $\Bt$, and
1228:         where
1229:         \begin{equation}
1230:             \label{eqn:q_i_assgn_simple}
1231:             q_i = \left[ \frac{I(X_i;Y)}{H(Y_i|\Xvec,Y)} \right]^*,
1232:         \end{equation}
1233:         for the joint distribution
1234:         $p(x,x_1,x_2,...,x_N,y,y_1,y_2,...,y_N)=p(x)p(x_1)...p(x_N)p(y,y_1,...,y_N|x,x_1,...,x_N)$.
1235: \end{corollary}
1236: 
1237:        \bigskip
1238:        \begin{proof}
1239: 
1240: 
1241:            \subsubsection{Decoding at the Destination}
1242:            We first decode the relay messages and then decode the source message.
1243:            We decode the relay messages independently: decoding first $s_{i,k}$ and then
1244:            $z_{i,k}$, using only $s_{i,k}$ and $s_{i,k-1}$. Therefore, for each relay we get a constraint
1245:            \begin{equation}
1246:                I(X_i;Y) \ge I(\hY_i;Y_i|X_i).
1247:            \end{equation}
1248: 
1249: 
1250: 
1251:            \subsubsection{Assigning the Auxiliary Variables According to \eqref{eqn:aux_assign_simple}}
1252:                Using the assignment in \eqref{eqn:aux_assign_simple} for each auxiliary random
1253:                variable, the set of constraints becomes:
1254:                \begin{equation}
1255:                    \label{eqn:cond_q_i_simple}
1256:                    I(X_i;Y) \ge q_i H(Y_i|X_i,Y).
1257:                \end{equation}
1258: 
1259:                Finally, we address the question of the assignment of $q_i$: as can be seen from the derivation
1260:                step above, at each substitution we would like to maximize the $q_i$. Therefore, maximizing
1261:                \eqref{eqn:cond_q_i_simple} while keeping $q_i \le 1$ yields \eqref{eqn:q_i_assgn_simple}.
1262:        \end{proof}
1263: 
1264: \end{comment}
1265: 
1266: 
1267: \subsection{Discussion}
1268: To demonstrate the usefulness of the explicit EAF-based achievable rate of theorem \ref{thm:achieve_N_result_2} we
1269: compare it with the DAF-based method of
1270: \cite[theorem 3.1]{XieKumar:2005} for the two-relay case.
1271: For this scenario there are five possible DAF setups, and the maximum of the five resulting rates is taken as the
1272: DAF-based rate:
1273: \begin{eqnarray*}
1274:     R^{DAF} & = & \sup_{p(x,x_1,x_2)} \max \left\{R_1, R_2, R_{12}, R_{21}, R_G \right\}\\
1275:     R_1 & = & \max_{x_2\in \mX_2} \min\left\{I(X; Y_1|X_1,x_2), I(X; Y|X_1, x_2) + I(X_1; Y|x_2) \right\}\\
1276:     R_2 & = & \max_{x_1\in \mX_1} \min\left\{I(X; Y_2|X_2,x_1), I(X; Y|X_2, x_1) + I(X_2; Y|x_1) \right\}\\
1277:     R_{12} & = & \min\left\{I(X; Y_1|X_1, X_2), I(X; Y_2|X_1, X_2) + I(X_1; Y_2|X_2), I(X; Y|X_1, X_2)+ I(X_1; Y|X_2) + I(X_2; Y)  \right\}\\
1278:     R_{21} & = & \min\left\{I(X; Y_2|X_1, X_2), I(X; Y_1|X_1, X_2) + I(X_2; Y_1|X_1), I(X; Y|X_1, X_2)+ I(X_2; Y|X_1) + I(X_1; Y)  \right\}\\
1279:     R_G & = & \min \left\{I(X; Y_1|X_1, X_2), I(X; Y_2|X_1, X_2), I(X,X_1,X_2; Y) \right\},
1280: \end{eqnarray*}
1281: where $R_1$ is the rate obtained when only relay 1 is active, $R_2$ is the rate obtained when only relay 2 is active,
1282: $R_{12}$ is the rate obtained when relay 1 decodes first and relay 2 decodes second and $R_{21}$ is
1283: the rate obtained when this order is reversed.
1284:  $R_G$ is the rate obtained when both relays form one group\footnote{In fact, since we take the supremum over all p.d.f.'s
1285:  $p(x,x_1,x_2)$ we do not need to explicitly include $R_1$ and $R_2$ in the maximization, but
1286:  it is included here to provide a complete presentation.}.
1287: Now, as in the single-relay case, DAF is limited by the worst source-relay link. Therefore, if
1288: \begin{equation}
1289:     \label{eqn:DAF_inequality}
1290:      R^{PTP} >
1291:         \max_{p(x|x_1,x_2), (x_1,x_2) \in \mX_1 \times \mX_2} \big\{I(X;Y_1|x_1,x_2), I(X;Y_2|x_1,x_2)\big\},
1292: \end{equation}
1293: where $  R^{PTP} = \max_{p(x|x_1,x_2), (x_1,x_2) \in \mX_1 \times \mX_2} I(X; Y|x_1,x_2)$
1294: is the point-to-point rate,
1295: then it is better not to use \cite[theorem 3.1]{XieKumar:2005} at all, but rather set the relays to transmit
1296: the symbol pair $(x_1,x_2) \in \mX_1 \times \mX_2$ such that the point-to-point rate is maximized.
1297: However, the rate obtained using corollary \ref{corr:achieve_N_result_1} for the two-relay case is given by
1298: \begin{eqnarray*}
1299:     R^{TS-EAF} & \le &  \sup_{p(x)p(x_1)p(x_2)} I(X; Y|X_1,X_2) + q_1(1-q_2) I(X;Y_1|X_1,X_2,Y)  \\
1300:     &  & \phantom{xxxxxxxxxxxxxxx}  +(1-q_1)q_2I(X;Y_2|X_1,X_2,Y) + q_1q_2I(X;Y_1,Y_2|X_1,X_2,Y),
1301: \end{eqnarray*}
1302: where $q_1$ and $q_2$ are positive and determined according to \eqref{eqn:q_i_assgn_simple}.
1303: This expression can, in general be greater than
1304: %$\max_{p(x|x_1,x_2), (x_1,x_2) \in \mX_1 \times \mX_2} I(X; Y|x_1,x_2)$
1305: $R^{PTP}$
1306: even when
1307: \eqref{eqn:DAF_inequality} holds, for channels where the relay to destination links are very good.
1308:  Hence, this explicit achievable expression provides an easy way to improve upon the
1309: DAF-based achievable rates when the source-to-relay links are very noisy.
1310: 
1311: To demonstrate this, consider the channel given in table \ref{table:channel_for_example} over binary RVs
1312: $X$, $X_1$, $X_2$, $Y$, $Y_1$ and $Y_2$. The channel
1313: \begin{table}
1314:        \caption{$p(y,y_1,y_2|x,x_1,x_2)$ for the EAF example.}
1315:         \label{table:channel_for_example}
1316:        \begin{tabular}[h!]{|c||c|c|c|c|c|c|c|c|}
1317:                \hline
1318:                $(x,x_1,x_2)$& \multicolumn{7}{c}{$p(y,y_1,y_2|x,x_1,x_2)$} &\\
1319:                \cline{2-9}
1320:                & 000 & 001 & 010 & 011 & 100  & 101 & 110 & 111\\
1321:               \hline \hline
1322:              000 &  8.047314e-2  &  1.948360e-1 &   2.041506e-1  &  4.523933e-2 &
1323:                  2.423322e-1  &  7.057734e-3 &   1.310053e-1  &  9.490483e-2\\
1324: 
1325:              001 &  8.601616e-1  &  6.643713e-2  &  1.662897e-2  &  1.937227e-2 &
1326:                  1.859104e-2  &  1.741020e-2 &   8.833169e-4  &  5.154431e-4 \\
1327: 
1328:              010 &  3.131504e-1  &  1.821840e-1  &  5.618147e-2  &  1.522841e-1 &
1329:                  5.290856e-2  &  1.555570e-1 &   3.214581e-2  &  5.558854e-2  \\
1330: 
1331:              011 &  5.183921e-3  &  3.704625e-1  &  1.641795e-2  &  2.208356e-1 &
1332:                  1.660775e-3  &  2.355928e-1  &  9.590170e-4  &  1.488874e-1 \\
1333: 
1334:              100 &  8.116746e-3  &  8.139504e-3  &  9.387860e-2  &  1.736515e-2 &
1335:                  1.039350e-1  &  7.308714e-3  &  7.612555e-1  &  7.612563e-7\\
1336: 
1337:              101 &  4.824126e-2  &  1.196128e-1  &  1.705739e-1  &  7.127199e-2 &
1338:                  4.631349e-2  &  1.955324e-1  &  1.928693e-1  &  1.555848e-1\\
1339: 
1340:              110 &  9.367321e-2  &  1.248830e-1  &  1.873302e-1  &  6.161358e-2 &
1341:                  5.827773e-2  &  1.906660e-1  &  1.589616e-1  &  1.245946e-1\\
1342: 
1343:              111 &  9.141272e-7  &  9.141263e-1  &  7.618061e-3  &  3.435473e-2 &
1344:                  7.974830e-4  &  4.117531e-2  &  9.302643e-4  &  9.969457e-4\\
1345:            \hline
1346:        \end{tabular}
1347: \end{table}
1348: distribution was constructed under the independence constraint
1349: \[
1350:     p(y,y_1,y_2|x,x_1,x_2) = p(y_1|x,x_1,x_2) p(y_2|x,x_1,x_2) p(y|x,x_1,x_2,y_1,y_2),
1351: \]
1352: i.e. given the channel inputs, the two relay outputs are independent.
1353: This channel is characterized by noisy source-relay links, while
1354: the link from relay $1$ to the destination has low noise. Therefore, DAF is inferior to the point-to-point
1355: transmission but EAF is able to exceed this rate, by giving up a small amount of rate on the direct link (compared
1356: to the point-to-point rate) and gaining more rate through the relays. The numerical evaluation of the
1357: rates for this channel produces\footnote{The resulting rates were obtained by optimizing for the rates with
1358: random initial input distributions. The optimization was repeated $50$ times for each rate and the maximum resulting rate
1359: was recorded. The m-files used for this evaluation are available at {\tt http://cn.ece.cornell.edu}.}
1360: \begin{eqnarray*}
1361: R^{PTP}  & = & 0.2860323,\\
1362: R^{DAF} & = & 0.2408629,\\
1363: R^{TS-EAF} & = & 0.2924798,
1364: \end{eqnarray*}
1365: where the optimal distributions that achieve these rates are summarized in tables \ref{table:opt_DAF_dist} and
1366: \ref{table:opt_EAF_dist}.
1367: \begin{table}
1368: \centering
1369: \begin{minipage}{5cm}
1370: \centering
1371:     \caption{Optimal distribution for DAF}
1372:     \label{table:opt_DAF_dist}
1373:     \vspace{-0.2cm}
1374:        \begin{tabular}[!h]{|c||c|}
1375:                \hline
1376:                $(x,x_1,x_2)$ & $p(x,x_1,x_2)$ \\
1377:                 \hline \hline
1378:                 000 & 5.698189907239905e-009\\
1379:                 001 & 5.259061814752764e-017\\
1380:                 010 & 4.301809992760095e-009\\
1381:                 011 & 4.424193267301109e-001\\
1382:                 100 & 6.792096128437060e-009\\
1383:                 101 & 4.740938235494830e-017\\
1384:                 110 & 3.207903771562940e-009\\
1385:                 111 & 5.575806532698892e-001\\
1386:                \hline
1387:        \end{tabular}
1388: \end{minipage}
1389: \phantom{xxxxxxxxx}
1390: \begin{minipage}{5cm}
1391:     \centering
1392:     \caption{Optimal distribution for EAF}
1393:     \label{table:opt_EAF_dist}
1394:         \vspace{-0.2cm}
1395:        \begin{tabular}[!h]{|c|}
1396:                \hline
1397:             $\Pr(X = 0)  = 4.3752093552645e-001$\\
1398: %            \hline
1399:             $\Pr(X_1 = 0) =1.9388669163312e-001 $\\
1400: %            \hline
1401:             $\Pr(X_2 = 0) = 1.000000000000000e-009$\\
1402:             \hline
1403:        \end{tabular}
1404: \end{minipage}
1405: 
1406: \end{table}
1407: The optimal DAF distribution fixes both $X_1$ and $X_2$ to $'1'$ and sets the probability of $X$ to be
1408: $\Pr(X = 1) = 0.442419$, as expected for the case where the relays limit the achievable rate. For the EAF, the
1409: useless relay $2$ is fixed to $0$, to facilitate transmission with the useful relay $1$. In accordance, we
1410: obtain time sharing proportions of $q_1 = 0.156947$ and $q_2 \approx 0$ for relay $1$ and relay $2$ respectively.
1411: We note that in this scenario, we actually have that even the single-relay TS-EAF outperforms the two-relay DAF.
1412: 
1413: 
1414: 
1415: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1416: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1417: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1418: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1419: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1420: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1421: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1422: 
1423: 
1424: 
1425: 
1426: 
1427: 
1428: 
1429: 
1430: 
1431: 
1432: 
1433: 
1434: \section{The Gaussian Relay Channel}
1435: \label{sec:Gauss_relay}
1436: 
1437: In this section we investigate the application of estimate-and-forward with time-sharing
1438: to the Gaussian relay channel. For this channel, the common practice it to use Gaussian codebooks and
1439: Gaussian quantization at the relay. The rate in Gaussian scenarios where coded modulation is applied, is usually
1440: analyzed by applying DAF at the relay. In this section we show that when considering coded modulation, one should select the
1441: relay strategy according to the channel condition: Gaussian selection seems a good choice when the SNR at the relay
1442: is low and DAF appears to be superior when the relay enjoys high SNR conditions. However, for
1443: intermediate SNR there is much room
1444: for optimizing the estimation mapping at the relay.
1445: 
1446: 
1447: In the following we first recall the Gaussian relay channel with a Gaussian codebook, and
1448: then we consider the Gaussian relay channel under BPSK modulation constraint. Since we focus on the mapping
1449: at the relay we consider here the Gaussian relay channel with an orthogonal relay of finite
1450: capacity $C$, also considered in
1451: \cite{Goldsmith:2006}. This scenario is depicted in figure \ref{fig:Gauss_relay}.
1452: 
1453: \begin{figure}[ht]
1454:      \epsfxsize=0.6\textwidth \leavevmode\centering\epsffile{Gaussian_Relay_Channel.eps}
1455:     \caption{The Gaussian relay channel with a finite capacity noiseless relay link between the relay and the
1456:     destination.}
1457:     \label{fig:Gauss_relay}
1458: \end{figure}
1459: 
1460: Here $Y_1 = g \cdot X + N_1$ is the channel output at the relay, $Y = X + N$ is the channel output at the receiver, which decodes
1461: the message based on $(Y^n, \hY_1^n)$. Let $\mW = \left\{1,2,...,2^{nR}\right\}$ denote the source message set, and let
1462: the source have an average power constraint $P$:
1463: \[
1464:     \frac{1}{n}\sum_{i=1}^n x_i(w) \le P, \qquad \forall w \in \mW.
1465: \]
1466: The relay signal $\hY_1^n$ is transmitted to the destination through a finite-capacity noiseless link of
1467: capacity $C$. For this scenario the expressions of \cite[theorem 6]{CoverG:79} specialize to
1468: \begin{subequations}
1469: \begin{eqnarray}
1470:     \label{eqn:rate_Gauss}
1471:     R & \le & I(X;Y,\hY_1)\\
1472:     \label{eqn:constraint_Gauss}
1473:     \mbox{subject to } C & \ge & I(\hY_1;Y_1|Y),
1474: \end{eqnarray}
1475: \end{subequations}
1476: with the Markov chain $X,Y - Y_1 - \hY_1$.
1477: 
1478: We also consider in this section the DAF method whose information rate is given by (see \cite[theorem 1]{CoverG:79})
1479: \[
1480:     R_{DAF} = \min \left\{I(X;Y_1), I(X;Y)+C\right\},
1481: \]
1482: and the upper bound of \cite[theorem 3]{CoverG:79}:
1483: \[
1484:     R_{upper} = \min\left\{I(X;Y)+C, I(X;Y,Y_1)\right\}.
1485: \]
1486: We note that although these expressions were derived for the finite, discrete alphabets case, following the argument
1487: in \cite[remark 30]{Kramer:2005}, they also hold for the Gaussian case.
1488: 
1489: \subsection{The Gaussian Relay Channel with Gaussian Codebooks}
1490: When $X \sim \mN(0,P)$, i.i.d., then the channel outputs at the relay and the receiver are jointly Normal RVs:
1491: \[
1492:     \left(
1493:         \begin{array}{c}
1494:         y\\
1495:         y_1
1496:         \end{array} \right) \sim \mN\left( \left( \begin{array}{c}
1497:                                                         0\\ 0 \end{array}
1498:                                                         \right) , \left( \begin{array}{cc}
1499:                                                                             P + \sigD & gP \\
1500:                                                                                 gP    & g^2P + \sigR \end{array} \right) \right).
1501: \]
1502: The compression is achieved by adding to $Y_1$ a zero mean independent Gaussian RV, $N_Q$:
1503: \begin{equation}
1504:     \label{eqn:def_qaussian_quant}
1505:     \hY_1 = Y_1 + N_Q, \qquad N_Q \sim \mN(0, \sigQ).
1506: \end{equation}
1507: We refer to the assignment \eqref{eqn:def_qaussian_quant} as Gaussian-quantization estimate-and-forward (GQ-EAF).
1508: Evaluating the expressions \eqref{eqn:rate_Gauss} and \eqref{eqn:constraint_Gauss}
1509: with assignment \eqref{eqn:def_qaussian_quant} results in  (see also \cite{Goldsmith:2006}):
1510: %\begin{eqnarray*}
1511: %    I(X;Y)  & = & \log(1+P) +
1512: %    I(Y_1;\hY_1|Y)  & = & \log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}  \right)\\
1513: %    I(X;\hY_1|Y)    & = & \log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\\
1514: %\end{eqnarray*}
1515: \begin{subequations}
1516: \begin{eqnarray}
1517:     \label{eqn:rate_Gauss_evaluated}
1518:     I(X;Y, \hY_1)   & = & \frac{1}{2}\log_2\left(1+P + \frac{gP}{1 + \sigQ}  \right)\\
1519:         \label{eqn:constraint_Gauss_evaluated}
1520:     I(Y_1;\hY_1|Y)  & = & \frac{1}{2}\log_2\left(1  + \frac{1+P+gP}{\sigQ(P+1)}  \right).
1521: \end{eqnarray}
1522: \end{subequations}
1523: The feasibility condition \eqref{eqn:constraint_Gauss} yields
1524: \[
1525:     \sigQ\ge  \frac{1+P+gP}{(2^{2C}  - 1)(P+1)},
1526: \]
1527: and because maximizing the rate \eqref{eqn:rate_Gauss_evaluated} requires minimizing $\sigQ$, the resulting
1528: GQ-EAF rate expression is
1529: \[
1530:     R  \le \frac{1}{2} \log_2\left(1+P + \frac{gP}{1 + \frac{1+P+gP}{(2^{2C}  - 1)(P+1)}}  \right).
1531: \]
1532: Now, when using Gaussian quantization at the relay
1533: %(as it is the most efficient way to compress $Y_1$, the Gaussian channel output at the relay {\Huge reference} ),
1534: it is
1535: obvious that time sharing does not help: we need the minimum $\sigQ$ in order to maximize
1536: the rate. This minimum is
1537: obtained only when the entire capacity of the relay link is dedicated to the transmission of the (minimally)
1538: quantized $Y_1$.
1539: However, when we consider the Gaussian relay channel with coded modulation, the situation is quite different, as
1540: we show in the remaining of this section.
1541: 
1542: 
1543: 
1544: 
1545: 
1546: \subsection{The Gaussian Relay Channel with Coded Modulation}
1547: Consider the Gaussian relay channel where $X$ is an equiprobable BPSK signal of amplitude $\sqrt{P}$:
1548: \begin{equation}
1549:     \label{eqn:def_PX}
1550:     \Pr(X = \sqrt{P}) = \Pr(X = -\sqrt{P}) = \frac{1}{2}.
1551: \end{equation}
1552: Under these conditions, the received symbols $(Y,Y_1)$ are no longer jointly Gaussian, but follow a Gaussian-mixture
1553: distribution:
1554: \begin{eqnarray*}
1555:     f(y,y_1) & = & \Pr(X = \sqrt{P})f(y,y_1|x = \sqrt{P}) + \Pr(X = -\sqrt{P})f(y,y_1|x = -\sqrt{P}) \\
1556:              & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)G_{y_1}(g\sqrt{P} , \sigR) + G_y(-\sqrt{P},\sigD)G_{y_1}(-g\sqrt{P} , \sigR)\right),
1557: \end{eqnarray*}
1558: where
1559: \begin{equation}
1560:     \label{eqn:def_G}
1561:     G_x(a,b) \triangleq \frac{1}{\sqrt{2 \pi b}} e ^{-\frac{(x-a)^2}{2 b} }.
1562: \end{equation}
1563: Contrary to the Gaussian codebook case, where it is hard to identify a mapping $p(\hy_1|y_1)$ that will be superior to
1564:  Gaussian quantization (if indeed such a mapping exists), in this case it is a natural question to compare the
1565: Gaussian mapping of \eqref{eqn:def_qaussian_quant}, which induces a Gaussian-mixture distribution for $\hY_1$
1566: with other possible mappings. In the case of binary inputs it is natural to consider binary mappings
1567: for $\hY_1$. We can predict that such mappings will do well at high SNR on the source-relay link,
1568: when the probability of error for symbol-by-symbol detection at the relay is small, with a much smaller
1569: complexity than Gaussian quantization. We start by considering
1570:  two types of  hard-decision (HD) mappings:
1571: \begin{enumerate}
1572:     \item The first mapping is HD-EAF: The relay first makes a hard decision about every received $Y_1$ symbol,
1573:     determining whether it
1574:     is positive or negative, and then randomly decides if it is going to transmit this decision or transmit
1575:     an erasure symbol $E$ instead. The probability of transmitting an erasure, $1 - P_{\ners}$, is used to adjust the conference
1576:     rate such that the feasibility constraint is satisfied. Therefore, the conditional distribution $p(\hY_1|Y_1)$ is
1577:     given by:
1578:     \begin{subequations}
1579:         \begin{eqnarray}
1580:             \label{eqn:def_p_hy1_given_y1_HD_eq1}
1581:             p(\hY_1|Y_1 > 0) & = & \left\{
1582:                         \begin{array}{cl}
1583:                             P_{\ners} &, 1\\
1584:                             1 - P_{\ners} &, E
1585:                         \end{array}
1586:                     \right.\\
1587:             \label{eqn:def_p_hy1_given_y1_HD_eq2}
1588:             p(\hY_1|Y_1 \le 0) & = & \left\{
1589:                         \begin{array}{cl}
1590:                             P_{\ners} &, -1\\
1591:                             1 - P_{\ners} &, E
1592:                         \end{array}
1593:                     \right..
1594:         \end{eqnarray}
1595:     \end{subequations}
1596:     This choice is motivated by the time-sharing method considered
1597:     in section \ref{sec:timeshare_single}: after making a hard decision on the received symbol's sign --- positive
1598:     or negative, the relay applies TS to that decision so that the rate required to transmit the resulting random variable
1599:     is less than $C$. This facilitates transmission to the destination through the conference link.
1600:     Since the entropy of the sign decision is $1$, then when $C \ge 1$ we can transmit the sign decisions directly without using
1601:     an erasure. Therefore,
1602:     we expect that for values of $C$ in the range $C > 1$, this mapping
1603:     will not exceed the rate obtained for $C=1$. The focus is, therefore, on values of $C$ that are less than $1$.
1604:     The expressions for this assignment are given in appendix \ref{append:Gauss-deriv-HD-EAF}.
1605: 
1606:     \item The second method is deterministic hard-decision. In this approach, we select a threshold $T$ such that the
1607:     range of $Y_1$ is partitioned into three regions: $Y_1 < -T, -T \le Y_1 \le T, Y_1 > T$. Then, according to the
1608:     value of each received $Y_1$ symbol, the corresponding $\hY_1$ is deterministically determined:
1609:     \begin{eqnarray}
1610:         \hY_1 = \left\{
1611:             \begin{array}{cl}
1612:                 1, & Y_1 > T\\
1613:                 E, & -T \le Y_1 \le T\\
1614:                 -1, & Y_1 < -T
1615:             \end{array}
1616:         \right..
1617:     \end{eqnarray}
1618:     The threshold $T$ is selected such that the achievable rate is maximized subject to satisfying
1619:     the feasibility constraint. We refer to this method as deterministic HD (DHD). Therefore, this is
1620:     another type of TS in which  the erasure probability is determined by the fraction of the time
1621:     the relay input is between $-T$ to $T$.
1622:     This method should be better than HD-EAF at high relay SNR since for HD-EAF, erasure is selected without
1623:     any regard to the quality of the decision - both good sign decisions and bad sign decisions are
1624:     erased with the same probability. However in DHD, the erased area is the area where the decisions have
1625:     low quality in the first place and all high quality decisions are sent. However, at low relay SNR and
1626:     small capacity for the relay-destination link, HD-EAF may perform better than DHD since the
1627:     erased area  (i.e. the region between $-T$ to $+T$) for the DHD mapping has to be very large
1628:     to allow 'squeezing' the estimate through the relay link,
1629:     while HD-EAF may require less compression of the HD output.
1630:     The expressions for evaluating the rate of the DHD assignment are given in appendix \ref{sec:expressions_DHD}.
1631: \end{enumerate}
1632: 
1633: We now examine the performance of each technique using numerical evaluation:
1634: first, we examine the achievable rates with HD-EAF. The expressions are evaluated for $\sigR = \sigD = 1$ and
1635: $P = 1$. For every pair of values $(g,C)$ considered, the maximum $P_{\ners}$ was selected. Figure \ref{fig:hard-decision-vs-g}
1636: depicts the achievable rate vs. $g$ for $ 0.4 \le C \le 2$, together with the upper bound and the decode-and-forward rate.
1637: \begin{figure}[h]
1638:     \centering
1639:     \scalebox{0.7}{\includegraphics{Hard_decision_vs_g.eps}}
1640:     \caption{Information rate with BPSK and hard decision EAF mapping at the relay vs. relay channel gain $g$,
1641:         for different values of $C$.}
1642:     \label{fig:hard-decision-vs-g}
1643:     \vspace{-0.2cm}
1644: \end{figure}
1645: As can be observed from figure \ref{fig:hard-decision-vs-g}, the information rate of HD-EAF increases with $C$
1646: until $C = 1$ and then remains constant.
1647: It is also seen that for small values of $g$, HD-EAF is better than DAF. This region of $g$  increases with $C$,
1648: and for $C \ge 1$ the crossover value of $g$
1649: is approximately $1.71$. However, even for $g = 2$, DAF is only $2.5\%$ better than HD-EAF.
1650: 
1651: Next, examine DHD: as can be seen from figure \ref{fig:DHD-vs-g}, for small values of $C$, DAF exceeds
1652: the information rate of DHD for values of $g$ greater than $1$, but for $C \ge 0.8$, DHD is superior to
1653: DAF, and in fact DAF approaches DHD from below. Another phenomena obvious from the
1654: figure (esp. for $C = 0.8$), is the existence of a threshold: for low values of $C$ there is some $g$ at which the DHD rate
1655: exhibits a jump.
1656: \begin{figure}[!h]
1657:     \centering
1658:     \scalebox{0.69}{\includegraphics{DHD_vs_g.eps}}
1659:     \caption{Information rate with BPSK, for deterministic hard decision at the relay vs. relay channel gain $g$, for
1660:     different values of $C$.}
1661:     \label{fig:DHD-vs-g}
1662: \end{figure}
1663: \begin{figure}
1664:     \centering
1665:     \scalebox{0.69}{\includegraphics{DHD_Explanation.eps}}
1666:     \caption{$I(\hY_1;Y_1|Y)$ and $I(X;\hY_1,Y)$ vs. Threshold $T$ for $(g,C) = (0.4,0.8)$ (left) and
1667:         $(g,C) = (1.4,0.8)$ (right). The bold solid line represents $I(\hY_1,Y_1|Y)$, the bold dashed line represents $C = 0.8$,
1668:         $I(X;Y,\hY_1)$ is represented by the dash-dot line and the resulting information rate is depicted with the solid line. }
1669:     \label{fig:DHD-Explanation}
1670: \end{figure}
1671: This can be explained by looking at figure \ref{fig:DHD-Explanation}, which depicts
1672: the values of $I(X;\hY_1,Y)$ and $I(\hY_1;Y_1|Y)$ vs. the threshold $T$: the bold-solid graph of
1673: $I(\hY_1;Y_1|Y)$ can intersect the bold-dashed horizontal line representing $C$ at two values of $T$. We also note that
1674: for small $T$ the value of $I(X;\hY_1,Y)$ is generally greater than for large $T$. Now, the jump can be explained as follows: as
1675: shown in appendix \ref{sec:HDH-Explanation}, for small $T$ and $g$, $I(\hY_1;Y_1|Y)$ is bounded from below.
1676: Now, if this bound value is greater than $C$ then the intersection will occur only at a large value of $T$, hence
1677: the small rate. When $g$ increases, the value of $I(\hY_1;Y_1|Y)$ for small $T$ decreases accordingly, until
1678: at some $g$ it intersects $C$ for a small $T$ as well as for a large $T$, as indicated by the arrow in the
1679: right-hand part
1680: of figure \ref{fig:DHD-Explanation}. This allows us to obtain the
1681: rates in the region of small $T$ which are in general higher than the rates for large $T$ and this
1682: is the source of the jump in the achievable rate.
1683: 
1684: 
1685: 
1686: 
1687: 
1688: \FloatBarrier
1689: 
1690: 
1691: 
1692: 
1693: 
1694: 
1695: 
1696: 
1697: 
1698: \subsection{Time-Sharing Deterministic Hard-Decision (TS-DHD)}
1699: It is clearly evident from the above numerical evaluation that none of the two mappings, HD-EAF and DHD, is universally better than
1700: the other: when $g$ is small and $C$ is less than $1$, then HD-EAF performs better than DHD, since the erased region is too large,
1701: and when $g$ increases, DHD performs better than HD-EAF since it erases only the low quality information. It is therefore natural to consider
1702: a third mapping which combines both aspects of binary mapping at the relay, namely deterministically erasing low quality information and
1703: then randomly gating the resulting discrete variable in order to allow its transmission over the conference link.
1704: This hybrid mapping is given in the following equation:
1705:     \begin{subequations}
1706:     \label{eqn:def_TS-DHD}
1707:         \begin{eqnarray}
1708:             \label{eqn:def_TS-DHD_eq1}
1709:             p(\hY_1|Y_1 > T) & = & \left\{
1710:                         \begin{array}{cl}
1711:                             P_{\ners} &, 1\\
1712:                             1 - P_{\ners} &, E
1713:                         \end{array}
1714:                     \right.\\
1715:             \label{eqn:def_TS-DHD_eq2}
1716:             p(\hY_1 = E \;|\;|Y_1| \le T) & = & 1\\
1717:             \label{eqn:def_TS-DHD_eq3}
1718:             p(\hY_1|Y_1 < -T) & = & \left\{
1719:                         \begin{array}{cl}
1720:                             P_{\ners} &, -1\\
1721:                             1 - P_{\ners} &, E
1722:                         \end{array}
1723:                     \right..
1724:         \end{eqnarray}
1725:     \end{subequations}
1726: In this mapping, the region $|Y_1| \le T$ is always erased, and the complement region is erased with probability $P_{\ers} = 1- P_{\ners}$.
1727: Of course, now both $T$ and $P_{\ers}$ have to be optimized. The expressions for TS-DHD can be found in appendix \ref{appndx:expressions_TS_DHD}.
1728: Figure \ref{fig:compare_HD-EAF_DHD_TS-DHD} compares the performance of
1729: DHD, HD-EAF and TS-DHD. As can be seen, the hybrid method enjoys the benefits of both types of mappings and is the superior method.
1730: \begin{figure}[!h]
1731:     \centering
1732:     \scalebox{0.69}{\includegraphics{Compare_DHD_HD-EAF_TS-DHD.eps}}
1733:     \caption{Information rate with BPSK, for  HD-EAF, DHD and TS-DHD at the relay vs. relay channel gain $g$, for
1734:     different values of $C$.}
1735:     \label{fig:compare_HD-EAF_DHD_TS-DHD}
1736: \end{figure}
1737: 
1738: 
1739: Next, figure \ref{fig:compare-HD-EAF-GQ-EAF} compares the performance of TS-DHD, GQ-EAF, and DAF.
1740: \begin{figure}[!h]
1741:     \centering
1742:     \scalebox{0.67}{\includegraphics{compare_GQ_and_TS-DHD.eps}}
1743:     \caption{Information rate with BPSK, for DAF,  TS-DHD and GQ-EAF at the relay vs. relay channel gain $g$, for
1744:     different values of $C$.}
1745:     \label{fig:compare-HD-EAF-GQ-EAF}
1746: \end{figure}
1747: As can be seen from the figure, Gaussian quantization is not always the optimal choice: for $C = 0.6$ (the lines with
1748: diamond-shaped markers) we have that
1749: GQ-EAF is the best method for $g < 1.05$, for $1.05 < g < 1.55$ TS-DHD is the best method and for $g>1.55$
1750: DAF achieves the highest rate.
1751: For $C = 1$ (x-shaped markers) TS-DHD is superior to both GQ-EAF and DAF for $g > 0.9$ and  for $C = 2$, GQ-EAF is the superior method for all $g \le 2$.
1752: This suggests that for the practical Gaussian relay scenario, where the modulation constraint is taken into account, there is
1753: room to optimize the mapping at the relay since the choice of Gaussian quantization is not always optimal.
1754: 
1755: Lastly, figure \ref{fig:DAF-EAF-Regions} depicts the regions in the g-C plane in which each of the methods considered here is superior,
1756: in a similar manner to  \cite[figure 2]{Goldsmith:2006}\footnote{The block shapes are due to the step-size of $0.2$ in the values of $g$ and $C$ used
1757: for evaluating the rates. In the final version we will present an evaluation over a finer grid (such an evaluation
1758: requires several weeks to complete).}.
1759: \begin{figure}[!h]
1760:     \centering
1761:     \scalebox{0.67}{\includegraphics{Regions_figure_GQ_and_DHD.eps}}
1762:     \caption{The best cooperation strategy (out of DAF, TS-DHD and GQ-EAF)
1763:         for the Gaussian relay channel with BPSK transmission.}
1764:     \label{fig:DAF-EAF-Regions}
1765: \end{figure}
1766: As can be observed from the figure, in the noisy region of small $g$ and also in the region of very large $C$,
1767: GQ-EAF is superior, and in the
1768: strong relay region of medium-to-high $g$ and medium-to-high $C$, TS-DHD is the superior method.
1769:  DAF is superior small $C$ and high $g$.
1770: %In the transition region where $C \in [0.6,1.8]$ and $g \in [1,2]$, the two hard decision methods, HD-EAF and DHD are
1771: %superior.
1772: In a sense, the TS-DHD method is a hybrid method between the DAF which makes a hard-decision on the
1773: entire block and GQ-EAF which makes a soft decision every symbol, therefore it is superior in the transition region
1774: between the region where DAF is distinctly better, and the region where GQ-EAF is distinctly superior.
1775: 
1776: \FloatBarrier
1777: 
1778: 
1779: \subsection{When the SNR on the Direct Link Approaches $0$ ($\sigD \rightarrow \infty$)}
1780: In this subsection we analyze the relaying strategies discussed in this section as the SNR on the direct link $ X - Y $
1781: approaches zero. Because TS-DHD is a hybrid method combining
1782: both DHD and HD-EAF, we analyze the behavior of the components rather than the hybrid, to gain more insight.
1783: This analysis is particularly useful when trying to numerically evaluate the rates, since as the direct-link SNR goes to zero,
1784:    the computer's numerical accuracy does not allow to numerically obtain the rates using the general expressions.
1785: %  Therefore, in order to examine the behavior
1786: %of the three EAF relay mappings considered in this section at low SNR on the direct link, it is required to derive analytical approximations
1787: %to the rate expressions.
1788: 
1789: First we note that
1790: when the SNR of the direct link $ X - Y $ approaches $0$ we have that $I(X;Y) \rightarrow 0$ as well.
1791: To see this we write
1792: \begin{eqnarray*}
1793:     I(X;Y)  & = & h(Y) - h(Y|X)\\
1794:             & = & h(Y) - h(X + N|X)\\
1795:             & = & h(Y) - h(N),
1796: \end{eqnarray*}
1797: with $h(Y) = -\int_{-\infty}^{\infty}f(y) \log_2(f(y)) dy$, and from \eqref{eqn:f_Y_HC}
1798: \begin{eqnarray*}
1799:     f(Y)& = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD) + G_y(-\sqrt{P},\sigD)\right)\\
1800:         & = & \frac{1}{2}\left( \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{(y-\sqrt{P})^2}{2\sigD}}
1801:             +\frac{1}{\sqrt{2 \pi \sigD}} e^{-\frac{(y+\sqrt{P})^2}{2\sigD}}\right)\\
1802:         & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}\left(\frac{1}{2} e^{\frac{y\sqrt{P}}{\sigD}}
1803:             +\frac{1}{2} e^{-\frac{y\sqrt{P}}{\sigD}}\right)e^{-\frac{P}{2\sigD}}\\
1804:         & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}
1805:              \cosh\left(\frac{y\sqrt{P}}{\sigD}\right)e^{-\frac{P}{2\sigD}}\\
1806:         &\stackrel{\sigD \rightarrow \infty}{\approx} & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}\\
1807:         & \triangleq & G_y(0,\sigD),
1808: \end{eqnarray*}
1809: where the approximation is in the sense that for small $|y|$ we have $\cosh(|y|) \approx 1$ and for large $|y|$, $e^{-\frac{y^2}{2\sigD}}$
1810: drives the entire expression to zero as $e^{-\frac{y^2}{2\sigD}}$,
1811: for $\sigD \rightarrow \infty$.
1812: This approximation reflects the intuitive notion that as the variance increases to infinity, the two-component, symmetric Gaussian
1813: mixture resembles more and more a zero-mean Gaussian RV with the same variance.
1814: Therefore, for low SNR, the output is very close to a zero-mean Normal
1815: RV with variance $\sigD$, and $h(Y) \approx h(N)$,\footnote{For $\sigma = 20$ we have that $\int_{-\infty}^{\infty} |f_Y(y) - G_y(0,\sigD)|dy < 0.001$,
1816: for $\sigma = 55$, $h(Y) - h(N) \approx 0.001$ and for $\sigma = 200$, $h(Y) - h(N) < 0.0001$.} hence
1817: \[
1818:     I(X;Y) \stackrel{\sigD \rightarrow \infty}{\longrightarrow} 0.
1819: \]
1820: Note that the upper bound and the decode-and-forward rate in this case are both equal to
1821: \[
1822:     R_{DAF} = R_{upper} = \min\left\{C,I(X;Y_1)\right\}.
1823: \]
1824: 
1825: Now, let us evaluate the rate for HD-EAF as the SNR goes to zero. From \eqref{eqn:rate_Gauss}:
1826: \[
1827:     R \le I(X;Y,\hY_1) = I(X;\hY_1) + I(X;Y|\hY_1),
1828: \]
1829: and
1830: \begin{eqnarray*}
1831:     I(X;Y | \hY_1) & = & h(Y|\hY_1) - h(Y | X, \hY_1)\\
1832:                    & = & \Pr(\hY_1 = 1) h(Y|\hY_1 = 1) + \Pr(\hY_1 = E) h(Y| \hY_1 = E) +
1833:                         \Pr(\hY_1 = -1) h(Y|\hY_1 = -1) - h(N).
1834: \end{eqnarray*}
1835: Using  appendix \ref{append:Gauss-deriv}, equations \eqref{eqn:cond_entropy_hy1_is_1} -- \eqref{eqn:cond_f_y1_pos},
1836: we have
1837: \begin{eqnarray*}
1838:     h(Y|\hY_1 = 1)  & = & -\int_{y = -\infty}^{\infty} f_{Y|\hY_1}(y|\hy_1 = 1) \log_2 \left(f_{Y|\hY_1}(y|\hy_1 = 1)\right) dy,\\
1839:     f_{Y|\hY_1}(y|\hy_1 = 1)  & = & \frac{f_{Y,Y_1}(y,y_1>0)P_{\ners}}{\Pr(Y_1>0)P_{\ners}} = \frac{f_{Y,Y_1}(y,y_1>0)}{\Pr(Y_1>0)},\\
1840:     f_{Y,Y_1}(y,y_1>0)      & = & \frac{1}{2}\left( f_{Y,Y_1|X}(y,y_1>0|x = \sqrt{P}) + f_{Y,Y_1|X}(y,y_1>0|x = -\sqrt{P}) \right)\\
1841:                     & = & \frac{1}{2}\left( G_y(\sqrt{P},\sigD) \Pr(Y_1>0|X = \sqrt{P}) + G_y(-\sqrt{P},\sigD) \big(1 - \Pr(Y_1>0|X = \sqrt{P})\big)\right)\\
1842:                     & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}\left(\frac{1}{2} e^{\frac{y\sqrt{P}}{\sigD}}\Pr(Y_1>0|X = \sqrt{P})
1843:             +\frac{1}{2} e^{-\frac{y\sqrt{P}}{\sigD}}\big(1-\Pr(Y_1>0|X = \sqrt{P})\big)\right)e^{-\frac{P}{2\sigD}}\\
1844:                     & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}
1845:                     \left(\frac{\left(\frac{1}{2}-\delta\right) e^{\frac{y\sqrt{P}}{\sigD}}
1846:             +\left(\frac{1}{2}+\delta\right) e^{-\frac{y\sqrt{P}}{\sigD}}}{2}\right)e^{-\frac{P}{2\sigD}}\\
1847:                     & = & \frac{1}{\sqrt{2 \pi \sigD}}e^{-\frac{y^2}{2\sigD}}
1848:                     \left(\frac{1}{2}\cosh\left(\frac{y\sqrt{P}}{\sigD}\right)
1849:             -\delta \sinh\left(\frac{y\sqrt{P}}{\sigD}\right)\right)e^{-\frac{P}{2\sigD}}\\
1850:                     & \stackrel{(a)}{\approx} & \frac{1}{2} G_y(0,\sigD),
1851: \end{eqnarray*}
1852: when $\sigD \rightarrow \infty$ and $\delta \in \left[-\frac{1}{2},\frac{1}{2}\right]$ is selected such that
1853: $\Pr(Y_1>0|X = \sqrt{P}) = \frac{1}{2} - \delta$.
1854: The approximation in (a) is because for small $|y|$, $\sinh\left(\frac{y\sqrt{P}}{\sigD}\right) \approx 0$ and
1855: $\cosh\left(\frac{y\sqrt{P}}{\sigD}\right) \approx 1$, and for large $|y|$, both
1856: $ e^{-\frac{y^2}{2\sigD}}\sinh\left(\frac{y\sqrt{P}}{\sigD}\right) \rightarrow 0$ and
1857: $ e^{-\frac{y^2}{2\sigD}}\cosh\left(\frac{y\sqrt{P}}{\sigD}\right) \rightarrow 0$.
1858: %Note that for the symmetric case we consider here $\delta = 0$.
1859: Hence
1860: \begin{eqnarray*}
1861:     h(Y|\hY_1 = 1) & \approx & -\int_{y = -\infty}^{\infty} \frac{G_y(0,\sigD)}{2\Pr(Y_1>0)} \log_2 \left(\frac{G_y(0,\sigD)}{2\Pr(Y_1>0)}\right) dy\\
1862:                 & = & -\frac{1}{2\Pr(Y_1>0)}\int_{y = -\infty}^{\infty} G_y(0,\sigD)
1863:                     \left[\log_2 \left(G_y(0,\sigD)\right) - \log_2 \left(2\Pr(Y_1>0)\right)\right] dy\\
1864:                 & = & \frac{1}{2\Pr(Y_1>0)} \left[h(N) + \log_2 \left(2\Pr(Y_1>0)\right)\right],
1865: \end{eqnarray*}
1866: and using $\Pr(Y_1 > 0) = \Pr(Y_1 \le 0) = \frac{1}{2}$ and $h(Y|\hY_1 = 1) = h(Y| \hY_1 = -1)$, we obtain
1867: \begin{eqnarray*}
1868:     h(Y|\hY_1) & \approx & \frac{1}{2}P_{\ners}h(N) + (1 - P_{\ners})h(N) + \frac{1}{2}P_{\ners} h(N) \\
1869:                     & = &    h(N).
1870: \end{eqnarray*}
1871: Therefore, at low SNR, $Y$ and $\hY_1$ become independent.
1872: Then, $I(X;Y | \hY_1)  =  h(Y|\hY_1)  - h(N) \approx 0$ and the information rate becomes (see
1873: appendix \ref{appndx:appndxHD-EAF-highSNR})
1874: \begin{eqnarray*}
1875:     R \le I(X;\hY_1) & = & H(\hY_1) - H(\hY_1|X)\\
1876: %                    & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right)
1877: %                            - H\left(P_1 P_{\ners}, 1 - P_{\ners}, (1-P_1)P_{\ners}\right)\\
1878: %                    & = & -P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right) -(1 - P_{\ners})\log_2(1 - P_{\ners})+  P_1 P_{\ners} \log_2(P_1 P_{\ners})\\
1879: %                    &   & \quad     +(1 - P_{\ners})\log_2(1 - P_{\ners})  +  (1-P_1)P_{\ners}\log_2((1-P_1)P_{\ners})\\
1880: %                    & = & -P_{\ners} \log_2\left(P_{\ners}\right) +P_{\ners}  +  P_1 P_{\ners} \log_2(P_1) + P_1 P_{\ners} \log_2(P_{\ners})\\
1881: %                    &   & \quad       +  (1-P_1)P_{\ners}\log_2(1-P_1) + (1-P_1)P_{\ners}\log_2(P_{\ners}) \\
1882: %                    & = &  P_{\ners}(1  +  P_1  \log_2(P_1) +  (1-P_1)\log_2(1-P_1) ) \\
1883:                     & = &  P_{\ners}(1  -H ( P_1  ,1-P_1 )),
1884: \end{eqnarray*}
1885: where $H(\cdot)$ is the discrete entropy for the specified discrete distribution and $P_1 = \Pr(Y_1 > 0 | X = \sqrt{P})$.
1886: Now, consider the feasibility condition $C \ge I(Y_1;\hY_1|Y)$:
1887: \begin{eqnarray*}
1888:     I(Y_1;\hY_1|Y)  & = & H(\hY_1|Y) - H(\hY_1|Y_1,Y)\\
1889:                     & \stackrel{(a)}{\approx} & H(\hY_1) - H(\hY_1|Y_1)\\
1890: %                    & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right) -
1891: %                        H(P_{\ners},1-P_{\ners})\\
1892: %                    & = & - 2 \frac{1}{2}P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right)
1893: %                        - (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right) + P_{\ners} \log_2(P_{\ners})\\
1894: %                    &   & \quad        + (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right)\\
1895:                     & = &  P_{\ners},
1896: \end{eqnarray*}
1897: where (a) follows from the independence of $Y$ and $\hY_1$ at low SNR, see appendix \ref{appndx:appndxHD-EAF-highSNR}.
1898: Therefore, for low SNR, we set $P_{\ners} = \min\left\{ C,1\right\}$
1899: and the rate becomes
1900: \[
1901:     R \le \min\left\{ C,1\right\}( 1 - H ( P_1  ,1-P_1 )).
1902: \]
1903: 
1904: 
1905: For the GQ-EAF we first approximate $f(Y,\hY_1)$ at low SNR starting with \eqref{eqn:joint_y_hy1_gq_eaf}:
1906: \begin{eqnarray*}
1907:     f_{Y,\hY_1}(y,\hy_1) & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)G_{\hy_1}(g\sqrt{P},\sigR+\sigQ) +
1908:             G_y(-\sqrt{P},\sigD)G_{\hy_1}(-g\sqrt{P},\sigR+\sigQ) \right)\\
1909:             & = & \frac{1}{\sqrt{2 \pi \sigD}} e^{-\frac{y^2}{2\sigD}}
1910:                     \left(\frac{1}{2} G_{\hy_1}(g\sqrt{P},\sigR+\sigQ) e^{\frac{y\sqrt{P}}{\sigD}}+
1911:                        \frac{1}{2} G_{\hy_1}(-g\sqrt{P},\sigR+\sigQ) e^{\frac{-y\sqrt{P}}{\sigD}} \right)e^{-\frac{P}{2\sigD}}\\
1912:             & \approx & G_y(0,\sigD)f_{\hY_1}(\hy_1),
1913: \end{eqnarray*}
1914: as $e^{\pm \frac{y\sqrt{P}}{\sigD}} \approx 1$ in the region when $G_{\hy_1}$ is significant, for both $X = \sqrt{P}$
1915: or $X = -\sqrt{P}$.
1916: We conclude that as the direct SNR approaches 0, $Y$ and $\hY_1$ become independent.
1917: Now, the rate is given by:
1918: \begin{eqnarray}
1919:     R & \le & I(X;Y,\hY_1) \nonumber \\
1920:       &  =  & h(Y,\hY_1) - h(Y,\hY_1|X) \nonumber \\
1921:       &  =  & h(Y) + h(\hY_1) - h(X+N, gX+N_1+N_Q|X) \nonumber \\
1922:       &  =  & h(Y) + h(\hY_1) - h(N, N_1+N_Q|X)\nonumber \\
1923:       &  =  & h(Y) - h(N|X) + h(\hY_1) - h(N_1 + N_Q|X)\nonumber \\
1924:       &  =  & I(X;Y) + I(X;\hY_1)\nonumber \\
1925:       & \approx & I(X;\hY_1)\nonumber \\
1926:       \label{eqn:GQ-EAF-at-low-SNR}
1927:       & = & h(\hY_1) - h(N_1+N_Q).
1928: \end{eqnarray}
1929: The  feasibility condition becomes:
1930: \begin{eqnarray}
1931:     C & \ge & I(\hY_1;Y_1|Y) \nonumber\\
1932:         & = & h(\hY_1|Y) - h(\hY_1|Y,Y_1) \nonumber\\
1933:     \label{eqn:cond_C_lowSNR_GQ}
1934:         & \approx & h(\hY_1) - h(N_Q),
1935: \end{eqnarray}
1936: with
1937: \[
1938:     f_{\hY_1}(\hy_1) = \frac{1}{2}\left[G_{\hy_1}(g\sqrt{P},\sigR+\sigQ) + G_{\hy_1}(-g\sqrt{P},\sigR+\sigQ)\right].
1939: \]
1940: 
1941: For DHD, as $\sigD \rightarrow \infty$ we have
1942: \begin{eqnarray*}
1943:     I(X;\hY_1;Y) & = & I(X;Y)  + I(X;\hY_1|Y) \\
1944:         & \approx & I(X;\hY_1|Y)\\
1945:         & = & H(\hY_1|Y) - H(\hY_1|Y,X)\\
1946:         & \stackrel{(a)}{\approx} & H(\hY_1) - H(\hY_1|X)\\
1947:         & = & I(X;\hY_1)
1948: \end{eqnarray*}
1949: where (a) follows from the independence of $Y$ and $Y_1$ as $\sigD \rightarrow \infty$ and the fact that
1950: $\hY_1$ is a deterministic function of $Y_1$, combined with the fact that given $X$, $Y_1$ and $Y$ are independent.
1951: The feasibility condition becomes
1952: \[
1953:     C  \ge  H(\hY_1|Y) \approx  H(\hY_1).
1954: \]
1955: Because $I(X;\hY_1)$ is not a monotone function of $T$ we have to optimize over $T$ to find the actual rate.
1956: 
1957: As can be seen from the expression for HD-EAF, when the SNR on the direct link decreases, the capacity of the
1958: conference link acts as a scaling factor on the rate of the binary channel from the source to the relay.
1959: \begin{figure}[!h]
1960:     \centering
1961:     \scalebox{0.7}{\includegraphics{DAF_DHD_GQEAF_HDEAF_at_LOWSNR.eps}}
1962:     \caption{Information rate with DAF, DHD, HD-EAF and GQ-EAF vs.
1963:         relay channel gain $g$, for different values of $C$, at low SNR on the source-relay link.}
1964:     \label{fig:low-direct-snr}
1965: \end{figure}
1966: In figure \ref{fig:low-direct-snr} we plotted the information rate for DHD, HD-EAF, GQ-EAF and DAF (which coincides with
1967: the upper bound). Comparing the three EAF strategies we note that DHD, which at intermediate SNR on the source-relay channel performs well for $C \ge 0.8$,
1968: has the worst performance at low SNR up to $C = 1.2$. At $C = 1.2$, DHD becomes the best technique out of the three.
1969: For $C < 1.2$ and high SNR on the
1970: source-relay channel, HD-EAF outperforms both DHD and GQ-EAF. For low SNR on the source-relay channel, GQ-EAF is again superior.
1971: 
1972: \subsection{Discussion}
1973: We make the following observations:
1974: \begin{itemize}
1975:     \item As noted at the beginning of this section, for low SNR on the source-relay link,
1976:     GQ-EAF outperforms TS-DHD. To see why, consider the
1977:     distribution of $Y_1$:
1978:     \begin{eqnarray*}
1979:         f_{Y_1}(y_1) & = & G_{y_1}(0,\sigR) \cosh\left(\frac{g \sqrt{P} y_1}{\sigR}\right) e^{-\frac{g^2P}{2\sigR}}\\
1980:         &  \stackrel{g \rightarrow 0}{\approx} & G_{y_1}(0,\sigR) \left(1 - \frac{g^2P}{2\sigR}\right),
1981:     \end{eqnarray*}
1982:     where the approximation is obtained using the first order Taylor expansion, and the fact that for large
1983:     values of $Y_1$, $G_{y_1}(0,\sigR)$ dominates the expression. Therefore, as $g \rightarrow 0$, $Y_1$
1984:     approaches a zero-mean Gaussian RV: $Y_1 \stackrel{\mathcal{D}}{\rightarrow} \mN(0,\sigR)$.
1985:     As discussed in \cite[ch. 13.1]{cover-thomas:it-book},
1986:     the closer the reconstruction variable is to the original variable, the better the quantization performance are expected to be. Therefore
1987:     it should be natural to guess that GQ will perform better at low relay link SNR.
1988: 
1989:     \item At the other extreme, as $g \rightarrow \infty$, consider the DAF strategy:
1990:     as $g \rightarrow \infty$, have that
1991:     \begin{eqnarray*}
1992:         h(Y_1) & = & -\int_{y_1 = -\infty}^{\infty}
1993:             \frac{1}{2}\left[G_{y_1}(g\sqrt{P},\sigR) + G_{y_1}(-g\sqrt{P},\sigR)\right]\times\\
1994:             &  & \qquad \qquad \qquad
1995:             \log_2\left(\frac{1}{2}\left[G_{y_1}(g\sqrt{P},\sigR) + G_{y_1}(-g\sqrt{P},\sigR)\right]
1996:             \right)dy_1\\
1997:         & \stackrel{g \rightarrow \infty}{\approx}&  1 - \int_{y_1 = -\infty}^{\infty} \frac{1}{2}G_{y_1}(g\sqrt{P},\sigR)
1998:             \log_2 G_{y_1}(g\sqrt{P},\sigR) dy_1 \\
1999:         &   & \qquad \qquad \qquad - \int_{y_1 = -\infty}^{\infty} \frac{1}{2}G_{y_1}(-g\sqrt{P},\sigR)
2000:             \log_2 G_{y_1}(-g\sqrt{P},\sigR) dy_1\\
2001:         & = & 1 + h(N_1),
2002:     \end{eqnarray*}
2003:     and therefore,
2004:     \[
2005:         I(X;Y_1)  = h(Y_1) -  h(Y_1|X) \approx 1 + h(N_1) -  h(N_1) = 1 = H(X).
2006:     \]
2007:     Hence,
2008:     \[
2009:         R_{DAF}  = \min\left\{I(X;Y_1), I(X;Y)+C\right\} = \min \left\{1, I(X;Y)+C \right\},
2010:     \]
2011:     which is the maximal rate. Therefore, as $g \rightarrow \infty$ DAF provides the optimal rate.
2012: 
2013:     \item We can expect that at intermediate SNR, methods that balance between the soft-decision per symbol of GQ-EAF and
2014:     the hard-decision on the entire codeword of DAF, will be superior to both.
2015:     Furthermore, we believe that as the SNR decreases, increasing the
2016:     cardinality of $\hY_1$ accordingly will improve the performance.
2017: \end{itemize}
2018: 
2019: 
2020: 
2021: 
2022: 
2023: 
2024: 
2025: 
2026: 
2027: 
2028: 
2029: 
2030: \section{Multi-Step Cooperative Broadcast Application}
2031: \label{sec:application_multi_step}
2032: % In relaying we first need to find a common knowledge that both the receiver and the
2033: % relay share. The relay helps the receiver by refining this common knowledge. In the DAF method
2034: % the common knowledge is the set of messages $\mW$. In our new relay method the common knowledge is the
2035: % set $\stypm(Y_1|\xvec_2^m)$. Since this set can always be used as common knowledge for conferencing, we
2036: % can apply the same idea used in theorem \ref{thm:main_thm} to generate common knowledge in multi-step conferencing for
2037: % cooperative broadcast.
2038: 
2039: In this section we consider the cooperative broadcast (BC) scenario. In this scenario, one transmitter communicates with two receivers. In its most
2040: general form, the transmitter sends three independent messages: a common message intended for both receivers and two private messages,
2041: one for each receiver, where all three messages are encoded into a single channel codeword $X^n$.
2042: Each receiver gets a noisy version of the codeword, $Y_1^n$
2043: at $\Rgood$ and $Y_2^n$ at $\Rbad$. After reception, the receivers exchange messages in a K-cycle conference over noiseless
2044: conference links  of finite capacities $C_{12}$ and $C_{21}$.
2045: Each conference message is based on the channel output at each receiver and the conference messages previously received
2046: from the
2047: other receiver, in a similar manner to the conference defined by Willems in~\cite{Willems:83} for the cooperative MAC.
2048: After conferencing, each receiver decodes its message.
2049: This scenario is depicted in figure
2050: \ref{fig:three_msg_bc}. This setup was studied in \cite{DraperFK:03} for the
2051: single common message case over the independent BC (i.e. $p(\yvec_1,\yvec_2|\xvec) = \prod_{i=1}^n p(y_{1,i}|x_i)p(y_{2,i}|x_i)$),
2052: and in \cite{RonSer:2005} for the general setup with a single cycle of conferencing.
2053: \begin{figure}[h]
2054:     \centering
2055:     \scalebox{0.6}{\includegraphics{Broadcast_Channel_ThreeMSG.eps}}
2056:     \caption{The broadcast channel with cooperating receivers. The encoder sends three messages, a common message $W_0$,  a private message to $\Rgood$,
2057:     $W_1$, and a private message to $\Rbad$, $W_2$. $\hat{W}_0$ and $\hat{\hat{W}}_0$ are the estimates of $W_0$ at
2058:     $\Rgood$ and $\Rbad$ respectively.}
2059:     \label{fig:three_msg_bc}
2060: \end{figure}
2061: 
2062:     \subsection{Definitions}
2063: 
2064:     We use the standard definition for the discrete memoryless general broadcast channel
2065:     given in \cite{Cover:98}.
2066:     We define a cooperative coding scheme as follows:
2067:     \begin{definition}
2068:         {\em A $\left(C_{12}, C_{21} \right)$-admissible K-cycle conference} consists of the following elements:
2069:         \begin{enumerate}
2070:             \item $K$ message sets from $\Rgood$ to $\Rbad$, denoted by
2071:                 $\mW_{12}^{(1)}$, $\mW_{12}^{(2)}$,...,$\mW_{12}^{(K)}$, and $K$ message sets from $\Rbad$ to $\Rgood$,
2072:                 denoted by $\mW_{21}^{(1)}$, $\mW_{21}^{(2)}$,...,$\mW_{21}^{(K)}$.
2073:                 Message
2074:                 set $\mW_{12}^{(k)}$ consists of $2^{nR_{12}^{(k)}}$ messages and message
2075:                 set $\mW_{21}^{(k)}$ consists of $2^{nR_{21}^{(k)}}$ messages.
2076:             \item $K$ mapping functions, one for each conference step from $\Rgood$ to $\Rbad$:
2077:                 \[
2078:                     h_{12}^{(k)}: \mY_1^n \times \mW_{21}^{(1)} \times \mW_{21}^{(2)} \times ... \times
2079:                                 \mW_{21}^{(k-1)} \mapsto \mW_{12}^{(k)},
2080:                 \]
2081:                 and $K$ mapping functions, one for each conference step from $\Rbad$ to $\Rgood$:
2082:                 \[
2083:                     h_{21}^{(k)}: \mY_2^n \times \mW_{12}^{(1)} \times \mW_{12}^{(2)} \times ... \times
2084:                         \mW_{12}^{(k)} \mapsto \mW_{21}^{(k)},
2085:                 \]
2086:                 where $k = 1,2,...,K$.
2087:         \end{enumerate}
2088: %        Let $R_{12}^{(k)} = \frac{1}{n}\log_2\left( ||\mW_{12}^{(k)}|| \right)$, and
2089: %        $R_{21}^{(k)} = \frac{1}{n}\log_2\left( ||\mW_{21}^{(k)}|| \right)$. Then
2090:     The conference rates satisfy:
2091:         \[
2092:             C_{12} = \sum_{k = 1}^K R_{12}^{(k)}, \qquad C_{21} = \sum_{k = 1}^K R_{21}^{(k)}.
2093:         \]
2094:     \end{definition}
2095:     \begin{definition}
2096:         {\em A $(2^{nR_0},2^{nR_1},2^{nR_2},n,C_{12},C_{21},K)$ code} for the general broadcast channel with
2097:         a common message and two independent private messages, consists of three sets of source messages,
2098:         $\mM_0 = \left\{1, 2,...,2^{nR_0}\right\}$, $\mM_1 = \left\{1, 2,...,2^{nR_1}\right\}$ and
2099:                 $\mM_2 = \left\{1, 2,...,2^{nR_2}\right\}$,
2100: %        three conference message sets,
2101: %        \begin{eqnarray*}
2102: %            \mW_{21}^a &  = & \Big\{1,2,...,2^{n R_{21}^a} \Big\},\\
2103: %            \mW_{12}   &  = & \Big\{1,2,...,2^{n R_{12}}   \Big\},\\
2104: %            \mW_{21}^b &  = & \Big\{1,2,...,2^{n R_{21}^b} \Big\},
2105: %        \end{eqnarray*}
2106:         a mapping function at the transmitter,
2107:         \[
2108:            f: \mM_0 \times \mM_1 \times \mM_2 \mapsto \mX^n,
2109:         \]
2110: %        three relay functions,
2111: %        \begin{eqnarray*}
2112: %             &h_{21}^a:&  \mY_2^n  \mapsto \mW_{21}^a,\\
2113: %             &h_{12}:  &  \mW_{21}^a \times \mY_1^n \mapsto \mW_{12},\\
2114: %             &h_{21}^b:&  \mW_{12} \times \mY_2^n \mapsto \mW_{21}^b,
2115: %        \end{eqnarray*}
2116: %        with
2117: %        \[
2118: %            R_{21}^a  \le  C_{21}^a, \quad   R_{12}  \le  C_{12} \;\;\mbox{    and     }\;\;  R_{21}^b  \le  C_{21}^b ,
2119: %        \]
2120: %        where $C_{21}^a = \alpha C_{21}$ and $C_{21}^b = \left(1 - \alpha\right) C_{21}$;
2121:         A $\left(C_{12}, C_{21} \right)$-admissible $K$-cycle conference,
2122:         and two decoders,
2123:         \begin{eqnarray*}
2124:             & g_1: & \mW_{21}^{(1)} \times \mW_{21}^{(2)}\times ... \times \mW_{21}^{(K)} \times \mY_1^n \mapsto \mM_0 \times \mM_1, \\
2125:             & g_2: & \mW_{12}^{(1)} \times \mW_{12}^{(2)}\times ... \times \mW_{12}^{(K)} \times \mY_2^n \mapsto \mM_0 \times \mM_2.
2126:         \end{eqnarray*}
2127:     \end{definition}
2128:     \begin{definition}
2129:         The {\em average probability of error} is defined as
2130:         the average probability that at least one of the receivers does not decode its message pair correctly:
2131:         \[
2132:             \Pe = \Pr\left(g_1\left(W_{21}^{(1)}, W_{21}^{(2)}, ..., W_{21}^{(K)}, Y_1^n\right)\ne (M_0,M_1) \mbox{ or }
2133:                 g_2\left(W_{12}^{(1)}, W_{12}^{(2)}, ...,W_{12}^{(K)}, Y_2^n\right) \ne (M_0,M_2)\right),
2134:         \]
2135:         where we assume that each message is selected uniformly and independently over its respective message set.
2136:     \end{definition}
2137: %    \begin{definition}
2138: %        Let $\typm(A)$ denote the set of $\delta$-weakly typical sequences of length $m$ generated
2139: %        by the distribution $p_A(a)$ on $\mA$
2140: %        %the i.i.d. distribution $\prod_{l=1}^m p(a_l)$, $a_l \in \mA$,
2141: %        as defined in \cite[ch. 3]{cover-thomas:it-book}.
2142: %        For $\avec_0^m \in \typm(A)$, define the set $\ttyp(A,\avec_0^m)$ to be the set of all
2143: %        typical vectors $\avec^n \in \typ(A)$ such that their first $m$ elements
2144: %        satisfy $\avec^m = \avec_0^m$.
2145: %    \end{definition}
2146: 
2147: 
2148: 
2149: 
2150: 
2151: 
2152: 
2153: 
2154: 
2155: 
2156: 
2157: 
2158: 
2159: 
2160: 
2161: 
2162: 
2163: \subsection{The Cooperative Broadcast Channel with Two Independent and One Common Message}
2164: \label{sec:multi-step-general-bc}
2165: We first present the general result for the cooperative broadcast scenario with a $K$-cycle conference.
2166: Denote with $\hYvec_1 = \left(\hY_1^{(1)}, \hY_1^{(2)},..., \hY_1^{(K)} \right)$ and
2167: $\hYvec_2 = \left(\hY_2^{(1)}, \hY_2^{(2)},..., \hY_2^{(K)} \right)$. Let $R_1$ and $R_2$ be the private rates to $\Rgood$ and
2168: $\Rbad$ respectively, and let $R_0$ denote the rate of the common information. Then, the following rate triplets are achievable:
2169: 
2170: \begin{theorem}
2171:     \label{thm:multi-step-general-bc}
2172:     \it
2173:         Consider the general broadcast channel $\left(\mX, p(y_1,y_2|x), \mY_1 \times \mY_2\right)$ with cooperating
2174:         receivers, having noiseless conference
2175:     links of finite capacities $C_{12}$ and $C_{21}$ between them. Let the receivers hold a conference that
2176:     consists of $K$ cycles. Then, any rate triplet $(R_0, R_1, R_2)$ satisfying
2177:     \begin{subequations}
2178:         \begin{eqnarray}
2179:             R_0 & \le & \min\left\{I\left(W;Y_1,\hYvec_2\right), I\left(W;\hYvec_1,Y_2\right) \right\}\\
2180:             R_1 & \le & I(U;Y_1, \hYvec_2|W)\\
2181:             R_2 & \le & I(V;\hYvec_1, Y_2|W)\\
2182:             R_1 + R_2 & \le & I(U;Y_1, \hYvec_2|W) + I(V;\hYvec_1, Y_2|W) - I(U;V|W),
2183:         \end{eqnarray}
2184:     \end{subequations}
2185:     subject to,
2186:     \begin{subequations}
2187:         \begin{eqnarray}
2188:         \label{eqn:c12_constr_multi_step}
2189:             C_{12}  & \ge & I(Y_1; \hYvec_1, \hYvec_2|Y_2)\\
2190:         \label{eqn:c21_constr_multi_step}
2191:             C_{21}  & \ge & I(Y_2; \hYvec_2, \hYvec_1|Y_1),
2192:         \end{eqnarray}
2193:     \end{subequations}
2194:     for some joint distribution
2195:         \begin{eqnarray}
2196:         \label{eqn:distributions}
2197:          &  & p\left(w,u,v,x,y_1,y_2,\hy_1^{(1)}, \hy_1^{(2)},...,\hy_1^{(K)},\hy_2^{(1)}, \hy_2^{(2)},...,\hy_2^{(K)}\right) =\nonumber\\
2198:          &  & \phantom{xxx} p(w,u,v,x) p(y_1,y_2|x) p\left(\hy_1^{(1)}|y_1\right) p\left(\hy_2^{(1)}|y_2,\hy_1^{(1)}\right)\cdot\cdot\cdot
2199:                 p\left(\hy_1^{(k)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)\times\nonumber\\
2200:          &  &  \phantom{xxx} p\left(\hy_2^{(k)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)
2201:                 \cdot\cdot\cdot p\left(\hy_1^{(K)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-1)}\right)\nonumber\\
2202:          &  &  \phantom{xxx} \times p\left(\hy_2^{(K)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-1)}\right),
2203:     \end{eqnarray}
2204:     is achievable.
2205:     The cardinality of the $k$'th auxiliary random variables are bounded by:
2206:     \begin{eqnarray*}
2207:         ||\mhY_1^{(k)}|| & \le & ||\mY_1|| \times \prod_{l=1}^{k-1} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1, \qquad
2208:             \qquad k = 1,2,...,K\\
2209:         ||\mhY_2^{(k)}|| & \le & ||\mY_2|| \times \prod_{l=1}^{k} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1,
2210:             \qquad \qquad k = 1,2,...,K.
2211:     \end{eqnarray*}
2212: \end{theorem}
2213: 
2214: 
2215: \begin{proof}
2216:     \subsubsection{Overview of Strategy}
2217:     The coding strategy is based on combining the BC code construction of \cite{ElGamalM:81}, after incorporating the common message into the
2218:     construction, with the $K$-cycle conference of
2219:     \cite{Kaspi:85}. The transmitter constructs a broadcast code to split the rate between the three message sets. This
2220:     is done independently of the relaying scheme.
2221:     Each receiver generates its conference messages according to the construction of \cite{Kaspi:85}.
2222:     After $K$ cycles of conferencing
2223:     each receiver decodes its information based on its channel output and the conference messages received from the other receiver.
2224: 
2225:     \subsubsection{Code Construction at The Transmitter}
2226:     \begin{itemize}
2227:     \item
2228:     Fix all the distributions in \eqref{eqn:distributions}. Fix $\eps > 0$ and let $n > 1$. Let $\delta > 0$ be a positive number whose
2229:     value is determined in the following steps.
2230:     Let $R(W) = \min\Big\{I\left(W;Y_1,\hYvec_2\right), I\left(W;\hYvec_1,Y_2\right) \Big\}$. Let $S_{[W]\delta}^{(n)}$ denote the
2231:     set of all $\wvec \in \mW^n$ sequences such that $\wvec \in \stypd(W)$ and $\stypd(U,V|\wvec)$ is non-empty, as defined in
2232:     \cite[corollary 5.11]{YeungBook}. From \cite[corollary 5.11]{YeungBook} we  have that
2233:     $||S_{[W]\delta}^{(n)}|| \ge 2^{n(H(W)-\phi)}$, where $\phi \rightarrow 0$ as $\delta \rightarrow 0$ and $n \rightarrow \infty$.
2234: 
2235:     \item Pick $2^{n(R(W) - \eps)}$ sequences from $S_{[W]\delta}^{(n)}$ in a uniform and independent manner according to
2236:     \[
2237:         \Pr(\wvec) = \left\{
2238:                 \begin{array}{cl}
2239:                     \frac{1}{||S_{[W]\delta}^{(n)}||} & ,\wvec \in S_{[W]\delta}^{(n)}\\
2240:                     0   & ,\mbox{otherwise}.
2241:                 \end{array}
2242:             \right.
2243:     \]
2244:     Label these sequences with $l \in \mM_0 \triangleq \left\{1,2,...,2^{n(R(W)-\eps)}\right\}$.
2245: 
2246:     \item For each sequence $\wvec(l)$, $l \in \mM_0$, consider the set $\stypdp(U|\wvec(l))$ ,$\delta' = \delta\max\left\{||\mU||, ||\mV||\right\}$.
2247:     Since the sequences $\wvec \in \mW^n$ are selected such that $\stypd(U,V|\wvec(l))$ is non-empty and since
2248:     $(\uvec, \vvec) \in \stypd(U,V|\wvec(l))$ implies  $\uvec \in \stypdp(U|\wvec(l))$, then also $\stypdp(U|\wvec(l))$ in non-empty, and by
2249:     \cite[theorem 5.9]{YeungBook}, $||\stypdp(U|\wvec(l))|| \ge 2^{n(H(U|W) - \psi)}$,
2250:     $\psi \rightarrow 0$ as $\delta' \rightarrow 0$ and $n \rightarrow \infty$.
2251: 
2252:     \item For each $l \in \mM_0$ pick $2^{n(I(U;Y_1,\hYvec_2|W)-\eps)}$ sequences in a uniform and independent manner from $\stypdp(U|\wvec(l))$ according
2253:     to
2254:     \[
2255:         \Pr(\uvec|l) =  \left\{
2256:             \begin{array}{cl}
2257:                 \frac{1}{||\stypdp(U|\wvec(l))||} & ,\uvec \in \stypdp(U|\wvec(l))\\
2258:                 0 & , \mbox{otherwise}.
2259:             \end{array}
2260:         \right.
2261:     \]
2262:     Label these sequences with $\uvec(i|l)$, $i \in \mZ_1 \triangleq \left\{1,2,...,2^{n(I(U;Y_1,\hYvec_2|W)-\eps)}\right\}$. Similarly,
2263:     pick $2^{n(I(V;\hYvec_1, Y_2|W)-\eps)}$ sequences in a uniform and independent manner from $\stypdp(V|\wvec(l))$ according
2264:     to
2265:     \[
2266:         \Pr(\vvec|l) =  \left\{
2267:             \begin{array}{cl}
2268:                 \frac{1}{||\stypdp(V|\wvec(l))||} & ,\vvec \in \stypdp(V|\wvec(l))\\
2269:                 0 & , \mbox{otherwise}.
2270:             \end{array}
2271:         \right.
2272:     \]
2273:     Label these sequences with $\vvec(j|l)$, $j \in \mZ_2 \triangleq \left\{1,2,...,2^{n(I(V;\hYvec_1, Y_2|W)-\eps)}\right\}$.
2274:     $\delta$ is selected such that $||S_{[W]\delta}^{(n)}|| \ge 2^{n(R(W)-\eps)}$, and $\forall l \in \mM_0$ we have
2275:     that $||\stypdp(U|\wvec(l))|| \ge 2^{n(I(U;Y_1,\hYvec_2|W)-\eps)}$ and
2276:     $||\stypdp(V|\wvec(l))|| \ge 2^{n(I(V;\hYvec_1, Y_2|W)-\eps)}$.
2277: 
2278: 
2279:     \item Partition the set $\mZ_1$ into $2^{nR_1}$ subsets $B_{w_1}$,  $w_1 \in \mM_1 = \left\{1,2,...,2^{nR_1}\right\}$, let \\
2280:     $B_{w_1} = \Big[(w_1 - 1)2^{n(I(U;Y_1,\hYvec_2|W)- R_1 - \eps)} + 1, w_1 2^{n(I(U;Y_1,\hYvec_2|W)- R_1 - \eps)} \Big]$. Similarly partition
2281:     the set $\mZ_2$ into $2^{nR_2}$ subsets $C_{w_2}$,  $w_2 \in \mM_2 = \left\{1,2,...,2^{nR_2}\right\}$, let \\
2282:     $C_{w_2} = \left[(w_2 - 1)2^{n(I(V;\hYvec_1,Y_2|W)- R_2 - \eps)} + 1, w_2 2^{n(I(V;\hYvec_1,Y_2|W)- R_2 - \eps)} \right]$.
2283: 
2284:     \item For each triplet $(l,w_1,w_2)$ consider the set
2285:     \[
2286:         \mD(w_1,w_2|l) \triangleq \left\{(m_1,m_2): m_1 \in B_{w_1}, m_2 \in C_{w_2}, \left(\uvec(m_1|l), \vvec(m_2|l)\right)
2287:             \in \stypdp(U,V|\wvec(l)) \right\}.
2288:     \]
2289:     By \cite[lemma on pg. 121]{ElGamalM:81}, we have that taking $n$ large enough we can make
2290:     $\Pr\left(||\mD(w_1,w_2|l)||  = 0\right) \le \eps$ for any arbitrary $\eps > 0$, as long as
2291:     \begin{subequations}
2292:         \begin{eqnarray}
2293:         \label{eqn:R1_cond_lemma}
2294:             R_1  & \le & I(U;Y_1,\hYvec_2|W)\\
2295:         \label{eqn:R2_cond_lemma}
2296:             R_2 & \le & I(V;\hYvec_1,Y_2|W)\\
2297:         \label{eqn:R1_and_R2_cond_lemma}
2298:             R_1 + R_2 & \le & I(U;Y_1,\hYvec_2|W) + I(V;\hYvec_1,Y_2|W) - I(U;V|W).
2299:         \end{eqnarray}
2300:     \end{subequations}
2301:     Note that the individual rate constraints are required to guarantee that the sets $B_{w_1}$ and $C_{w_2}$ are non-empty.
2302: 
2303:     \item For each $l \in \mM_0$, we pick a unique pair of $(m_1(w_1,w_2,l), m_2(w_1,w_2,l)) \in \mD(w_1,w_2|l)$,
2304:     $(w_1,w_2) \in \mM_1 \times \mM_2$.  The transmitter generates the codeword $\xvec(l,w_1,w_2)$ according to\\
2305:     $p(\xvec(l,w_1,w_2)) = \prod_{i=1}^n p(x_i|u_i(m_1(w_1,w_2,l)),v_i(m_2(w_1,w_2,l)),w_i(l))$.
2306:     When transmitting the triplet $(l,w_1,w_2)$ the transmitter outputs $\xvec(l,w_1,w_2)$.
2307:     \end{itemize}
2308: 
2309:         \subsubsection{Codebook Generation at the Receivers}
2310:         \begin{itemize}
2311:             \item For the first conference step from $\Rgood$ to $\Rbad$, $\Rgood$ generates a codebook
2312:                 with $2^{nR_{12}'^{(1)}}$ codewords indexed by
2313:                 $z_{12}^{(1)}\in \mZ_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}'^{(1)}}\right\}$ according to the distribution
2314:                 $p\left(\hy_1^{(1)}\right)$:
2315:                 $p\left(\hyvec_1^{(1)}(z_{12}^{(1)})\right) = \prod_{i=1}^n p\left(\hy_{1,i}^{(1)}(z_{12}^{(1)})\right)$.
2316:                 $\Rgood$ uniformly and independently partitions the message set
2317:                 $\mZ_{12}^{(1)}$ into $2^{nR_{12}^{(1)}}$ subsets indexed by
2318:                 $w_{12}^{(1)} \in \mW_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}^{(1)}}\right\}$. Denote these subsets
2319:                 with $\mS_{12,w_{12}^{(1)}}^{(1)}$.
2320:             \item For the first conference step from $\Rbad$ to $\Rgood$, $\Rbad$ generates a codebook with
2321:                 $2^{nR_{21}'^{(1)}}$ codewords indexed by $z_{21}^{(1)} \in \mZ_{21}^{(1)} = \left\{1,2,..., 2^{nR_{21}'^{(1)}}\right\}$
2322:                 for each codeword $\hyvec_1^{(1)}(z_{12}^{(1)})$, $z_{12}^{(1)} \in \mZ_{12}^{(1)}$, in an i.i.d.
2323:                 manner according to
2324:                 $p\left(\hyvec_2^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\right)=  \prod_{i=1}^n p\left(\hy_{2,i}^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\Big|\hy^{(1)}_{1,i}(z_{12}^{(1)})\right)$.
2325: %                $z_{21}^{(1)} \in \mZ_{21}^{(1)}$.
2326:                 $\Rbad$ uniformly and independently partitions the message set $\mZ_{21}^{(1)}$
2327:                  into $2^{nR_{21}^{(1)}}$ subsets indexed by
2328:                 $w_{21}^{(1)} \in \mW_{21}^{(1)} = \left\{1,2,...,2^{nR_{21}^{(1)}}\right\}$. Denote these subsets
2329:                 with $\mS_{21,w_{21}^{(1)}}^{(1)}$.
2330:             \item For the $k$'th conference step from $\Rgood$ to $\Rbad$, $\Rgood$ considers each combination of
2331:                 $z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)}$,
2332:                 $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}$. For each combination, $\Rgood$ generates a codebook with $2^{nR_{12}'^{(k)}}$
2333:                 messages indexed by $z_{12}^{(k)} \in \mZ_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}'^{(k)}}\right\}$,
2334:                 according to the distribution
2335:                  $p\left(\hy_1^{(k)}|\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$.
2336:                 $\Rgood$ uniformly and independently partitions the message set
2337:                 $\mZ_{12}^{(k)}$ into $2^{nR_{12}^{(k)}}$ subsets indexed by
2338:                 $w_{12}^{(k)} \in \mW_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}^{(k)}}\right\}$. Denote these subsets
2339:                 with $\mS_{12,w_{12}^{(k)}}^{(k)}$.
2340:             \item The codebook for the $k$'th conference step from $\Rbad$ to $\Rgood$ is generated in a parallel manner for each combination
2341:                 of $z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k)}$, $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}$.
2342: 
2343: 
2344:         \end{itemize}
2345: 
2346: 
2347: 
2348: 
2349: 
2350: 
2351:     \subsubsection{Decoding and Encoding at $\Rgood$ at the $k$'th Conference Cycle ($k \le K$) for Transmission
2352:         Block $i$}
2353:     \label{sec:DecEncMultiStepRgood}
2354:         $\Rgood$ needs first to decode the message $z_{21}^{(k-1)}$ sent from $\Rbad$ at the $(k-1)$'th cycle.
2355:         To that end, $\Rgood$ uses $w_{21}^{(k-1)}$, the index received from $\Rbad$ at the $(k-1)$'th conference
2356:         step. In
2357:         decoding $z_{21}^{(k-1)}$ we assume that all the previous $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-2)}$
2358:         were correctly decoded at $\Rgood$. We denote the $\hyvec_2^{(k)}$ sequences corresponding to
2359:         $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-2)}$ by\\
2360:          $\hyvec_2(1), \hyvec_2(2), ...,\hyvec_2(k-2)$, and
2361:         similarly define $\hyvec_1(1), \hyvec_1(2) ,..., \hyvec_1(k-1)$.
2362:         \begin{itemize}
2363:             \item $\Rgood$ first generates the set $\mL_1(k-1)$ defined by:
2364:                 \begin{eqnarray*}
2365:                     &  & \mL_1(k-1) = \bigg\{z_{21}^{(k-1)} \in  \mZ_{21}^{(k-1)} :
2366:                         \Big(\hyvec_2^{(k-1)}(z_{21}^{(k-1)}|z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)},z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-2)}),\\
2367:                     &  & \phantom{xxxxxxxxxxxxx} \hyvec_1(1),\hyvec_1(2),...,\hyvec_1(k-1),\hyvec_2(1),\hyvec_2(2),...,\hyvec_2(k-2),\yvec_1(i)\Big)\in \styp\bigg\}.
2368:                 \end{eqnarray*}
2369:             \item $\Rgood$ then looks for a unique $z_{21}^{(k-1)} \in \mZ_{21}^{(k-1)}$ such that
2370:                     $z_{21}^{(k-1)} \in \mL_1(k-1) \bigcap \mS_{21,w_{21}^{(k-1)}}^{(k-1)}$. If there is none or
2371:                     there is more than one, an error is declared.
2372:             \item From an argument similar to \cite{Kaspi:85}, the probability of error can be made arbitrarily small
2373:                 by taking $n$ large enough as long as
2374:                 \[
2375:                     R_{21}'^{(k-1)} < I\left(\hY_2^{(k-1)};Y_1\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2376:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-2)}\right) + R_{21}^{(k-1)} - \eps.
2377:                 \]
2378:                 Here, $k > 1$, since for the first conference message from $\Rgood$ to $\Rbad$ no
2379:                 decoding takes place.
2380:         \end{itemize}
2381:         In generating the $k$'th conference message to $\Rbad$, it is assumed that all the previous $k-1$ messages from
2382:         $\Rbad$ were decoded correctly.
2383:         \begin{itemize}
2384:             \item $\Rgood$ looks for a message $z_{12}^{(k)} \in \mZ_{12}^{(k)}$ such that
2385:                 \begin{eqnarray*}
2386:                     & & \Big(\hyvec_1^{(k)}(z_{12}^{(k)}|z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)},z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}),\\
2387:                     & & \phantom{xxxx}    \hyvec_1(1),\hyvec_1(2),...,\hyvec_1(k-1),\hyvec_2(1),\hyvec_2(2),...,\hyvec_2(k-1),\yvec_1(i) \Big) \in \styp.
2388:                 \end{eqnarray*}
2389:                 From the argument in \cite{Kaspi:85}, the probability that such a sequence exists can be made arbitrarily close to $1$
2390:                 by taking $n$ large enough as long as
2391:                 \[
2392:                     R_{12}'^{(k)} > I\left(\hY_1^{(k)};Y_1\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps.
2393:                 \]
2394:             \item $\Rgood$ looks for the partition of $\mZ_{12}^{(k)}$ into which $z_{12}^{(k)}$ belongs. Denote the index of this
2395:                 partition with $w_{12}^{(k)}$.
2396:             \item $\Rgood$ transmits $w_{12}^{(k)}$ to $\Rbad$ through the conference link.
2397:         \end{itemize}
2398: 
2399:     \subsubsection{Decoding and Encoding at $\Rbad$ at the $k$'th Conference Step ($k \le K$) for Transmission
2400:         Block $i$}
2401:     \label{sec:DecEncMultiStepRbad}
2402:         Using similar arguments to section \ref{sec:DecEncMultiStepRgood}, we obtain the following
2403:         rate constraints:
2404:         \begin{itemize}
2405:             \item Decoding $z_{12}^{(k)}$ at $\Rbad$ can be done with an arbitrarily small
2406:                 probability of error by taking $n$ large enough as long as
2407:                 \[
2408:                     R_{12}'^{(k)} < I\left(\hY_1^{(k)};Y_2\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2409:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + R_{12}^{(k)} - \eps.
2410:                 \]
2411:             \item Encoding $z_{21}^{(k)}$ can be done with an arbitrarily small probability of error
2412:                 by taking $n$ large enough as long as
2413:                 \[
2414:                     R_{21}'^{(k)} > I\left(\hY_2^{(k)};Y_2\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k)},
2415:                                 \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps.
2416:                 \]
2417:         \end{itemize}
2418: 
2419: 
2420:     \subsubsection{Combining All Conference Rate Bounds}
2421:     \label{sec:combining_bounds_general}
2422:         First consider the bounds on $R_{12}'^{(k)}$, $k = 1,2,...,K$:
2423:         \begin{eqnarray*}
2424:         &  &    I\left(\hY_1^{(k)};Y_1\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps
2425:                 < R_{12}'^{(k)} < \\
2426:         &  &  \phantom{xxxxx}I\left(\hY_1^{(k)};Y_2\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2427:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + R_{12}^{(k)} - \eps.
2428:         \end{eqnarray*}
2429:         This can be satisfied only if
2430:         \begin{eqnarray*}
2431:         &  &  I\left(\hY_1^{(k)};Y_2\big| \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2432:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + R_{12}^{(k)} - \eps >  \\
2433:         &  &  \phantom{xxxxx}  I\left(\hY_1^{(k)};Y_1\Big|\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + \eps\\
2434:         & \Rightarrow  &   R_{12}^{(k)}  >  H\left(\hY_1^{(k)} \big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2435:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) \\
2436:         &  &  \phantom{xxxxx}  -H\left(\hY_1^{(k)}\Big|Y_1,\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)} \right) + 2\eps\\
2437:         &   &  \phantom{xxx} =  I\left(\hY_1^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2438:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) +2\eps.
2439:         \end{eqnarray*}
2440:         Hence
2441:         \begin{eqnarray}
2442:             C_{12} & = & \sum_{k = 1}^K R_{12}^{(k)} \nonumber\\
2443:                    & \ge & \sum_{k = 1}^K \bigg(I\left(\hY_1^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2444:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) +2\eps\bigg)\nonumber\\
2445:                    & = & \sum_{k = 1}^{K} \bigg[I\left(\hY_1^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2446:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right)\nonumber\\
2447:                    &   & \phantom{xxxxxx} + I\left(\hY_2^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k)},
2448:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right)\bigg] + 2K\eps \nonumber\\
2449: %                   &   & \phantom{xxxxxxxxxx} +I\left(\hY_1^{(K)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K-1)},
2450: %                        \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right)  \nonumber\\
2451: %                   &   & \phantom{xxxxxxxxxxxxxx} + I\left(\hY_2^{(K)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},
2452: %                        \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right) + 2K\eps \nonumber\\
2453:                    & = &  \sum_{k = 1}^{K} I\left(\hY_1^{(k)},\hY_2^{(k)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(k-1)},
2454:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(k-1)}\right) + 2K\eps \nonumber\\
2455: %                   &   & \phantom{xxxxxxx} +I\left(\hY_1^{(K)}, \hY_2^{(K)} ;Y_1\big| Y_2, \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K-1)},
2456: %                        \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right) + 2K\eps\nonumber\\
2457:                    \label{eqn:constr_c12_general_bc}
2458:                    & = &   I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},
2459:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K)};Y_1\big| Y_2\right) +2K\eps,
2460:         \end{eqnarray}
2461:         and similarly
2462:         \begin{equation}
2463:         \label{eqn:constr_c21_general_bc}
2464:             C_{21} \ge I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},
2465:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K)};Y_2\big| Y_1\right) +2K\eps.
2466:         \end{equation}
2467:          This provides the rate constraints on the conference auxiliary variables of \eqref{eqn:c12_constr_multi_step} and
2468:          \eqref{eqn:c21_constr_multi_step}.
2469: 
2470:          \subsubsection{Decoding at $\Rgood$}
2471:          $\Rgood$ uses $\yvec_1(i)$ and $\hyvec_2^{(1)},\hyvec_2^{(2)},...,\hyvec_2^{(K)}$ received from $\Rbad$, to decode $(l_i,w_{1,i})$ as follows:
2472:          \begin{itemize}
2473:             \item $\Rgood$ looks for a unique message $l \in \mM_0$ such
2474:             \[
2475:                 \big(\wvec(l),\yvec_1(i),\hyvec_2^{(1)},\hyvec_2^{(2)},...,\hyvec_2^{(K)}\big) \in \styp.
2476:             \]
2477:             From the point-to-point channel capacity theorem (see \cite{ElGamalM:81}), this can be done with an arbitrarily
2478:             small probability of error by taking $n$ large enough as long as
2479:             \begin{equation}
2480:             \label{eqn:constr_r0_decode_Rgood}
2481:                 R_0 \le I(W;Y_1,\hYvec_2).
2482:             \end{equation}
2483:             Denote the decoded message $\hat{l}_i$. Now $\Rgood$ decodes $w_{1,i}$ by looking for a unique $k \in \mZ_1$
2484:             such that
2485:             \[
2486:                 \big(\uvec(k|\hat{l}_i),\wvec(\hat{l}_i),\yvec_1(i),\hyvec_2^{(1)},\hyvec_2^{(2)},...,\hyvec_2^{(K)}\big) \in \styp.
2487:             \]
2488:             If a unique such $k$ exists, then denote the decoded index with $\hat{k}=k$. Now $\Rgood$ looks for the partition of $\mZ_1$ into which $\hat{k}$ belongs and sets $\hw_{1,i}$
2489:             to be the index of that partition: $\hat{k} \in B_{\hw_{1,i}}$.
2490:             Similarly to  the proof in \cite[ch 14.6.2]{cover-thomas:it-book}, assuming successful decoding
2491:             of $l_i$, the probability of error can be made arbitrarily small by taking $n$ large enough as long as
2492:             \[
2493:                 \frac{1}{n}\log_2||\mZ_1|| \le I(U;Y_1,\hYvec_2|W),
2494:             \]
2495:             which is satisfied by construction.
2496: 
2497:          \end{itemize}
2498: 
2499:          \subsubsection{Decoding at $\Rbad$}
2500:          Repeating similar steps for decoding at $\Rbad$ we get that decoding $l_i$ can be done with an arbitrarily
2501:          small probability of error by taking $n$ large enough as long as
2502:          \begin{equation}
2503:          \label{eqn:constr_r0_decode_Rbad}
2504:             R_0 \le I(W; \hYvec_1,Y_2),
2505:          \end{equation}
2506:          and assuming successful decoding of $l_i$, decoding $w_{2,i}$ with an arbitrarily small probability of error
2507:          requires that
2508:          \[
2509:                 \frac{1}{n}\log_2||\mZ_2|| \le I(V;\hYvec_1,Y_2|W),
2510:          \]
2511:          which again is satisfied by construction.
2512: 
2513:          Finally, collecting \eqref{eqn:R1_cond_lemma}, \eqref{eqn:R2_cond_lemma},
2514:          \eqref{eqn:R1_and_R2_cond_lemma}, \eqref{eqn:constr_r0_decode_Rgood} and \eqref{eqn:constr_r0_decode_Rbad} give
2515:          the achievable rate constraints of theorem \ref{thm:multi-step-general-bc}, and \eqref{eqn:constr_c12_general_bc}
2516:          and \eqref{eqn:constr_c21_general_bc} give the conference rate constraints of the theorem.
2517: \end{proof}
2518: 
2519: 
2520: 
2521: 
2522: 
2523: 
2524: 
2525: 
2526: 
2527: 
2528: 
2529: \subsection{The Cooperative Broadcast Channel with a Single Common Message}
2530: \label{sec:multi-step-single-common-message}
2531: 
2532: In the single common message cooperative broadcast scenario,
2533: a single transmitter sends a message to two receivers encoded in a single channel codeword
2534: $X^n$. % where the superscript $n$ denotes the length of a vector.
2535: \begin{figure}[ht]
2536:      \epsfxsize=0.6\textwidth \leavevmode\centering\epsffile{Broadcast_Channel_Common.eps}
2537:     \caption{The broadcast channel with cooperating receivers, for the single common message case.
2538:         $\hat{W}$ and $\hat{\hat{W}}$ are the estimates of $W$ at $\Rgood$ and $\Rbad$ respectively.}
2539:     \label{fig:broadcast-cooperation-common}
2540: \end{figure}
2541: This scenario is depicted in figure \ref{fig:broadcast-cooperation-common}.
2542: %The conference messages are
2543: %functions of $Y_1^n$ (at $\Rgood$), $Y_2^n$ (at $\Rbad$),
2544: %and the previous conference messages received from the
2545: %other decoder, as defined by Willems in~\cite{Willems:83}.
2546: After conferencing, each receiver decodes the message.
2547: For this setup we have the following upper bound:
2548: \begin{proposition}
2549:          \label{prop:common_upper}
2550:          {\it (\cite[theorem 6]{RonISIT05:05})}
2551:          {\it
2552:          Consider the general broadcast channel $(\mX, p(y_1,y_2|x), \mY_1 \times \mY_2)$ with cooperating
2553:          receivers having noiseless conference links of finite capacities $C_{12}$ and $C_{21}$ between them.
2554:          Then, for sending a common message to both receivers, any rate $R$ must satisfy
2555:          \[
2556:             R \! \le \!\sup_{p_X(x)} \! \min \! \Big\{I(X;Y_1) + C_{21}, I(X;Y_2) + C_{12}, I(X;Y_1,Y_2) \Big\}.
2557:          \]}
2558: \end{proposition}
2559: In \cite{RonISIT05:05} we also derived the following achievable rate for
2560: %the general broadcast channel with a single common message:
2561: this scenario:
2562: \begin{proposition}
2563:         \label{prop:achive_common_one_step}
2564:          {\it (\cite[theorem 5]{RonISIT05:05})} {\it
2565:          Assume the broadcast channel setup of proposition \ref{prop:common_upper}.
2566:          Then, for sending a common message to both receivers, any rate $R$ satisfying
2567:          \begin{subequations}
2568:              \begin{eqnarray}
2569:                  R & \le & \sup_{p_X(x)}\Big[ \max\Big\{ R_{12}(p_X(x)), R_{21}(p_X(x)) \Big\} \Big], \nonumber\\
2570:             \label{eqn:PrevResult1}
2571:                  R_{12}(p_X(x)) & \triangleq & \min \Big( I(X;Y_1) + C_{21} ,  \max\big\{I(X;Y_2),
2572:                         I(X;Y_2) - H(Y_1|Y_2,X) + \min\big(C_{12},H(Y_1|Y_2)\big)\big\} \Big),\phantom{xx}\\
2573:              \label{eqn:PrevResult2}
2574:                  R_{21}(p_X(x)) & \triangleq & \min \Big( I(X;Y_2) + C_{12} ,
2575:                         \max \big\{I(X;Y_1), I(X;Y_1)  - H(Y_2|Y_1,X) + \min\big(C_{21},H(Y_2|Y_1)\big)\big\} \Big),\phantom{xx}
2576:             \end{eqnarray}
2577:          \end{subequations}
2578: %         with the appropriate $C_{12} > H(Y_1|Y_2,X)$  or  $C_{21} > H(Y_2|Y_1,X)$ (the one used for the first cooperation step),
2579:          is achievable.}
2580: \end{proposition}
2581: 
2582: Note that this rate expression
2583: depends only on the parameters of the problem and is, therefore, computable. In proposition
2584: \ref{prop:achive_common_one_step}  the achievable rate  increases linearly with the cooperation
2585: capacity. The downside of this method is that it %cannot be applied to any given conference capacity.
2586: produces a rate increase over the non-cooperative rate only for conference links capacities that exceed
2587: some minimum values.
2588: % The same limitation also exists in theorem \ref{thm:CEG_EAF}.
2589: % We note that \cite{DraperFK:03} presents a different approach for multi-step conference.~The approach of
2590: % \cite{DraperFK:03} generalizes \cite{CoverG:79} but still requires auxiliary random variables.
2591: % The work in \cite{DraperFK:03} is for the single common message case, and also uses the independent
2592: % broadcast channel. Here we derive the rate for the general broadcast channel with three messages, and
2593: % when specializing to the single common message case, we obtain that \cite[theorem 2]{DraperFK:03} also holds
2594: % for the general broadcast channel.
2595: %
2596: %  The motivation for deriving the new multi-step conference
2597: % is twofold: first we note that in the worst case we can always send information
2598: % about part of the received symbols and use this partial information to improve decoding. Second, assume
2599: % that the maximum rate in proposition \ref{prop:achive_common_one_step} is achieved when $\Rgood$
2600: % helps $\Rbad$ first, and then $\Rbad$ decodes and helps $\Rgood$
2601: % (this corresponds to $R_{12}(p(x))$ in equation (\ref{eqn:PrevResult1})).
2602: % The achievable rate for each of the receivers in this case is
2603: % \begin{eqnarray*}
2604: %     R_1  & \le & I(X;Y_1) + C_{21}, \\
2605: %     R_2  & \le & I(X;Y_2) - H(Y_1|Y_2,X) + C_{12},
2606: % \end{eqnarray*}
2607: % for some $p_X(x)$ on $\mX$ and as long as $H(Y_1|Y_2,X) \le C_{12} \le H(Y_1|Y_2)$.
2608: % Now, if $R_1 \le R_2$ then $\min(R_1,R_2) = R_1$ which is the optimal rate,
2609: % since proposition \ref{prop:common_upper} asserts that
2610: % the non-cooperative rate to $\Rgood$
2611: % cannot be increased by more than $C_{21}$.
2612: % However, if after the conference we have that $R_1 > R_2$, this implies that we helped
2613: % $\Rgood$ too much and helped $\Rbad$ too little. We could then increase the rate by
2614: % helping $\Rbad$ more and helping $\Rgood$ less. This is achieved with the multi-step conference.
2615: 
2616: 
2617: 
2618: 
2619: 
2620: 
2621: 
2622: 
2623: 
2624: % \subsection{A Multi-Step Conference for the Broadcast Channel with a Single Common Message}
2625: Specializing the three independent messages result to the single common message case we obtain the
2626: following achievable rate with a $K$-cycle conference for the general BC with a single common message:
2627: 
2628: \begin{corollary}
2629:     \label{corr:single-coomon-message-with-multi-step}
2630:     \it
2631:     Consider the general broadcast channel with cooperating receivers, having noiseless conference
2632:     links of finite capacities $C_{12}$ and $C_{21}$ between them. Let the receivers hold a conference that
2633:     consists of $K$ cycles. Then, any rate $R$ satisfying
2634:     \begin{equation}
2635:         R = \max \left\{R_{12}, R_{21} \right\},
2636:     \end{equation}
2637:     is achievable.
2638: 
2639:     Here $R_{12}$ is defined as follows:
2640:     \begin{equation}
2641:         R_{12} = \sup_{p_X(x), \alpha \in [0,1]} \min \left\{ R_1, R_2 \right\},
2642:     \end{equation}
2643:     with
2644:     \begin{subequations}
2645:     \begin{eqnarray}
2646:         \label{eqn:R_1}
2647:         R_1 & = & I\left(X;Y_1,\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\right) + \alpha C_{21},\\
2648:         \label{eqn:R_2}
2649:         R_2 & = & I\left(X;Y_2,\hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)}\right),
2650:     \end{eqnarray}
2651:     \end{subequations}
2652:     subject to
2653:     \begin{subequations}
2654:     \begin{eqnarray}
2655:         C_{12} & \ge & I\left(Y_1; \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\Big|Y_2\right),\\
2656:         (1-\alpha)C_{21} & \ge & I\left(Y_2; \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},\hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)}\Big|Y_1\right),
2657:     \end{eqnarray}
2658:     \end{subequations}
2659:     for the joint distribution
2660:     \begin{eqnarray*}
2661:          &  & p\left(x,y_1,y_2,\hy_1^{(1)}, \hy_1^{(2)},...,\hy_1^{(K)},\hy_2^{(1)}, \hy_2^{(2)},...,\hy_2^{(K-1)}\right) =\\
2662:          &  & \phantom{xxx} p(x) p(y_1,y_2|x) p\left(\hy_1^{(1)}|y_1\right) p\left(\hy_2^{(1)}|y_2,\hy_1^{(1)}\right)\cdot\cdot\cdot
2663:                 p\left(\hy_1^{(k)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)\times\\
2664:          &  &  \phantom{xxx} p\left(\hy_2^{(k)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)
2665:                 \cdot\cdot\cdot p\left(\hy_2^{(K-1)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-2)}\right)\\
2666:          &  &  \phantom{xxx} \times p\left(\hy_1^{(K)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(K-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(K-1)}\right).
2667:     \end{eqnarray*}
2668:     The cardinality of the $k$'th auxiliary random variables are bounded by:
2669:     \begin{eqnarray*}
2670:         ||\mhY_1^{(k)}|| & \le & ||\mY_1|| \times \prod_{l=1}^{k-1} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1, \qquad
2671:             \qquad k = 1,2,...,K\\
2672:         ||\mhY_2^{(k)}|| & \le & ||\mY_2|| \times \prod_{l=1}^{k} ||\mhY_1^{(l)}|| \times \prod_{l=1}^{k-1} ||\mhY_2^{(l)}|| + 1,
2673:             \qquad \qquad k = 1,2,...,K-1.
2674:     \end{eqnarray*}
2675:     $R_{21}$ is defined in a parallel manner to $R_{12}$, with $\Rbad$ performing the first conference step, and the appropriate change
2676:     in the probability chain.
2677: 
2678: \end{corollary}
2679: 
2680: 
2681: 
2682: \bigskip
2683: The proof of corollary \ref{corr:single-coomon-message-with-multi-step} is provided in appendix \ref{appndx:prof_corollary_single_common}.
2684: \smallskip
2685: 
2686: We note that \cite[theorem 2]{DraperFK:03} presents a similar result for this scenario, under the constraint that the memoryless
2687: broadcast channel can be decomposed as $p(\yvec_1,\yvec_2|\xvec) = \prod_{i=1}^n p(y_{1,i}|x_i)p(y_{2,i}|x_i)$, and
2688: considering the sum-rate of the conference. Here we show that the same achievable rate expressions hold
2689: for the general memoryless broadcast channel.
2690: A recent result appears in \cite{Shlomo_BZ}, where
2691: the single common message case for a Gaussian BC is considered.
2692: In the multi-cycle conference considered in this section, we let the auxiliary RVs follow a more
2693: general chain than that of \cite{Shlomo_BZ} --- which results in a larger achievable rate.
2694: 
2695: \subsection{A Single-Cycle Conference with TS-EAF}
2696: Consider the case where only a single cycle of conferencing between the receivers is allowed.
2697:  Specializing corollary \ref{corr:single-coomon-message-with-multi-step} to a single cycle case
2698:  we obtain
2699: \begin{subequations}
2700:  \begin{eqnarray}
2701:     \label{eqn:two-step_TSEAF-R_1}
2702:     R_1 & = & I(X;Y_1) + C_{21}\\
2703:     \label{eqn:two-step_TSEAF-R_2}
2704:     R_2 & = & I(X;Y_2, \hY_1^{(1)})\\
2705:     \label{eqn:two-step_TSEAF-C12}
2706:     C_{12} & \ge & I(Y_1; \hY_1^{(1)}|Y_2),
2707:  \end{eqnarray}
2708: \end{subequations}
2709: and the TS-EAF assignment is
2710: \[
2711:     p(\hy_1^{(1)}|y_1) = \left\{
2712:         \begin{array}{cl}
2713:             q_1, & \hy_1^{(1)} = y_1\\
2714:             1-q_1, & \hy_1^{(1)} = \Omega \notin \mY_1.
2715:         \end{array}
2716:     \right.
2717: \]
2718: Applying the TS-EAF assignment to \eqref{eqn:two-step_TSEAF-C12} and \eqref{eqn:two-step_TSEAF-R_2} we obtain
2719: \begin{eqnarray*}
2720:     C_{12} & \ge & I(Y_1; \hY_1^{(1)}|Y_2)\\
2721:         & = & H(Y_1|Y_2) - H(Y_1|Y_2, \hY_1^{(1)})\\
2722:         & = & H(Y_1|Y_2) - q_1 H(Y_1|Y_2, Y_1) - (1-q_1)H(Y_1|Y_2)\\
2723:         & = & q_1 H(Y_1|Y_2)\\
2724:     R_2 & = & I(X;Y_2, \hY_1^{(1)}) \\
2725:         & = & I(X;Y_2) + H(X|Y_2) - H(X|Y_2,\hY_1^{(1)})\\
2726:         & = & I(X;Y_2) + H(X|Y_2) - (1-q_1) H(X|Y_2) - q_1 H(X|Y_2,Y_1)\\
2727:         & = & I(X;Y_2) + q_1 I(X;Y_1|Y_2).
2728: \end{eqnarray*}
2729: Maximizing $R_2$ requires maximizing $q_1 \in [0,1]$. Therefore setting $q_1 = \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^*$, we
2730: obtain $R_2 = I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^* I(X;Y_1|Y_2)$. Combining with $R_1$ we have
2731: that the rate when $\Rbad$ decodes first is given by
2732: \[
2733:     R_{12} = \min \left\{I(X;Y_1) + C_{21}, I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^* I(X;Y_1|Y_2)\right\},
2734: \]
2735: and by symmetric argument we can obtain $R_{21}$. We conclude that the rate for the single-cycle conference with TS-EAF is given by
2736: \begin{eqnarray*}
2737:     R & = &  \sup_{p(x)} \min\left\{ R_{12}, R_{21} \right\},\\
2738:     R_{12} & = & \min \left\{I(X;Y_1) + C_{21}, I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)}\right]^* I(X;Y_1|Y_2)\right\}\\
2739:     R_{21} & = & \min \left\{I(X;Y_1) + \left[\frac{C_{21}}{H(Y_2|Y_1)}\right]^* I(X;Y_2|Y_1), I(X;Y_2) + C_{12}\right\}.
2740: \end{eqnarray*}
2741: We note that this rate is always better than the point-to-point rate and also better than the joint-decoding rate of
2742: proposition \ref{prop:achive_common_one_step} (whenever cooperation can provide a rate increase).
2743: However, as in proposition \ref{prop:achive_common_one_step}, at least one receiver has to satisfy the Slepian-Wolf
2744: condition for the full cooperation rate to be
2745: achieved. We also note that using TS-EAF with more than two steps does not improve upon this result.
2746: 
2747: Finally, we demonstrate the results of proposition \ref{prop:achive_common_one_step} and corollary \ref{corr:single-coomon-message-with-multi-step} through
2748: a symmetric BC example: consider the symmetric broadcast channel where $\mY_1 = \mY_2 = \mY$ and
2749:               \[
2750:                   p_{Y_1|Y_2,X}(a|b,x) = p_{Y_2|Y_1,X}(a|b,x),
2751:               \]
2752:               for any $a,b \in \mY \times \mY$ and $x \in \mX$. Let $C_{21} = C_{12} = C$.
2753:               For this scenario we have that $R_{12} = R_{21}$, in corollary \ref{corr:single-coomon-message-with-multi-step} and
2754:               also $R_{12}(p_X(x)) = R_{21}(p_X(x))$ in proposition \ref{prop:achive_common_one_step}. The resulting rate is depicted in
2755:               figure \ref{fig:compare:ft_and_ts} for a fixed probability $p(x)$.
2756:                 \begin{figure}[htb]
2757:                          \centering
2758:                          \scalebox{0.60}{\includegraphics{Two-step-TS_vs_JT.eps}}
2759:                          \caption{\small The achievable rate $R$ vs. conference capacity $C$,
2760:                               for proposition \ref{prop:common_upper} (dashed-dot),
2761:                               proposition \ref{prop:achive_common_one_step} (dashed) and corollary \ref{corr:single-coomon-message-with-multi-step} (solid),
2762:                               for the symmetric broadcast channel.}
2763:                          \label{fig:compare:ft_and_ts}
2764:                   \end{figure}
2765:               We can see that for this case, time-sharing exceeds joint-decoding for all values of $C$. Both methods meet the upper
2766:               bound at $C = H(Y_1|Y_2)$. We note that this is a corrected version of the figure in \cite{ron:ISIT06}.
2767: 
2768: 
2769: 
2770: \begin{comment}
2771:             \subsection{A Three-Step Conference with TS-EAF Relaying Example}
2772:             Consider the following three-step conference:
2773:             \begin{enumerate}
2774:                 \item $\Rbad$  transmits information at rate $C_{21}^a$ to $\Rgood$.
2775:                 \item $\Rgood$ transmits information at rate $C_{12}$ to $\Rbad$.
2776:                 \item $\Rbad$ decodes and sends information at rate $C_{21}^b$ to $\Rgood$.
2777:             \end{enumerate}
2778:             Lastly, $\Rgood$ decodes. We set $C_{21}^a + C_{21}^b = C_{21}$.
2779:             We note that in the following we
2780:             analyze this order of conference, whose rate is denoted by $R_{212}$. However, since we can choose the order that yields
2781:             the highest rate, repeating the same considerations we derive symmetric expressions
2782:             for the same scheme with the roles of $\Rgood$ and $\Rbad$ switched.
2783:             %Theorem \ref{thm:achieve-three-steps}
2784:             Corollary \ref{corr:three-steps-special}
2785:             stated below considers, therefore, both possible orders.
2786:             % and in addition a two-step conference based on theorem
2787:             % \ref{thm:main_thm},
2788:             % that is used if the capacities of the conference links are not large enough for applying the decoding
2789:             % scheme of appendix \ref{sec:decoding-at-Rx2}, or if the rate increase from the three-step conference is too little.
2790:             % The two-step conference will
2791:             % be described in appendix \ref{sec:two-step}. Theorem \ref{thm:achieve-three-steps} selects the configuration that results in the highest rate.
2792: 
2793: 
2794:             %\subsubsection{Specializing Corollary \ref{corr:single-coomon-message-with-multi-step} to Three Steps}
2795:             With three conference steps, the expressions in corollary \ref{corr:single-coomon-message-with-multi-step} specializes to the following
2796:             \begin{subequations}
2797:             \label{eqn:specialize_single_to_three_steps}
2798:                 \begin{eqnarray}
2799:                     R_1    &  =  & I\big(X;Y_1,\hY_2^{(1)}\big) + (1-\alpha)C_{21}\\
2800:                 \label{eqn:R2_specialize_three_steps}
2801:                     R_2    &  =  & I\big(X;\hY_1^{(1)},Y_2\big)\\
2802:                 \label{eqn:C12_three_steps_special}
2803:                     C_{12} & \ge & I(Y_1;\hY_1^{(1)}, \hY_2^{(1)}|Y_2)\\
2804:                 \label{eqn:C21_three_steps_special}
2805:                     \alpha C_{21} & \ge & I(Y_2;\hY_1^{(1)}, \hY_2^{(1)}|Y_1)\\
2806:                     p(\hy_1^{(1)},\hy_2^{(1)}|y_1,y_2) & = & p(\hy_2^{(1)}|y_2) p(\hy_1^{(1)}|y_1,\hy_2^{(1)}).
2807:                 \end{eqnarray}
2808:             \end{subequations}
2809:             Using TS-EAF, the assignment of $p(\hy_2^{(1)}|y_2)$ is
2810:             \[
2811:                 p(\hy_2^{(1)}|y_2) = \left\{
2812:                     \begin{array}{cl}
2813:                             q_2 &, \hy_2^{(1)} = y_2\\
2814:                             1-q_2 &, \hy_2^{(1)} = \Delta_2 \notin \mY_2.
2815:                     \end{array}
2816:                     \right.
2817:             \]
2818:             Now, as to the assignment of $p(\hy_1^{(1)}|y_1,\hy_2^{(1)})$, we need to set
2819:             \[
2820:                 p(\hy_1^{(1)}|y_1,\hy_2^{(1)} = y_2) = \left\{
2821:                     \begin{array}{cl}
2822:                             q_1' &, \hy_1^{(1)} = y_1\\
2823:                             1-q_1' &, \hy_1^{(1)} = \Delta_1 \notin \mY_1
2824:                     \end{array}
2825:                     \right.,\;\;
2826:                         p(\hy_1^{(1)}|y_1,\hy_2^{(1)} = \Delta_2) = \left\{
2827:                     \begin{array}{cl}
2828:                             q_1'' &, \hy_1^{(1)} = y_1\\
2829:                             1-q_1'' &, \hy_1^{(1)} = \Delta_1 \notin \mY_1.
2830:                     \end{array}
2831:                     \right.
2832:             \]
2833:             Now, let us examine the expressions in \eqref{eqn:specialize_single_to_three_steps} where $\hY_1^{(1)}$ is used:
2834:              first consider \eqref{eqn:R2_specialize_three_steps}, here we see that whether $\hY_2^{(1)}$ is $Y_2$ or
2835:              $\Delta_2$ does not matter. Therefore, what determines the rate is the probability $\Pr(\hY_1^{(1)}|Y_1)$ rather than
2836:              $\Pr(\hY_1^{(1)}|Y_1, \hY_2^{(1)})$.
2837:              Similarly for \eqref{eqn:C21_three_steps_special} we have that since $Y_1$ is given, then the actual value of
2838:              $\hY_1^{(1)}$ does not affect the value of the mutual information.
2839:             \begin{eqnarray*}
2840:                 I(Y_2;\hY_1^{(1)}, \hY_2^{(1)}|Y_1) & = & I(Y_2; \hY_2^{(1)}|Y_1) + I(Y_2;\hY_1^{(1)}|Y_1, \hY_2^{(1)})\\
2841:                     & = & I(Y_2; \hY_2^{(1)}|Y_1).
2842:             \end{eqnarray*}
2843:              Finally consider \eqref{eqn:C12_three_steps_special}:
2844:              \begin{eqnarray*}
2845:                 I(Y_1;\hY_1^{(1)}, \hY_2^{(1)}|Y_2) & = & H(Y_1 | Y_2) - H(Y_1 | \hY_1^{(1)}, \hY_2^{(1)},Y_2)\\
2846:                     & = & H(Y_1 | Y_2) - q_2 H(Y_1 | \hY_1^{(1)}, Y_2,Y_2) - (1-q_2) H(Y_1 | \hY_1^{(1)},\Delta_2,Y_2)\\
2847:                     & = & H(Y_1 | Y_2) - q_1' q_2 H(Y_1 | Y_1, Y_2,Y_2) - (1-q_1') q_2 H(Y_1 | \Delta_1, Y_2,Y_2) \\
2848:                     &   & \qquad \qquad - q_1'' (1-q_2) H(Y_1 | Y_1,\Delta_2,Y_2) - (1-q_1'')(1-q_2) H(Y_1 | \Delta_1,\Delta_2,Y_2) \\
2849:                     & = & H(Y_1 | Y_2) - q_1' q_2 H(Y_1 | Y_1,Y_2) - (1-q_1') q_2 H(Y_1 | Y_2) \\
2850:                     &   & \qquad \qquad - q_1'' (1-q_2) H(Y_1 | Y_1,Y_2) - (1-q_1'')(1-q_2) H(Y_1  | Y_2) \\
2851:                     & = & H(Y_1 | Y_2)  - ((1-q_1') q_2 +(1-q_1'')(1-q_2)) H(Y_1 | Y_2) \\
2852:                     & = & H(Y_1 | Y_2)  - \Pr(\hY_1^{(1)} = \Delta_1) H(Y_1 | Y_2)
2853:              \end{eqnarray*}
2854:             which depends only on $\Pr(\hY_1^{(1)} = Y_1)$. Therefore, it is enough to consider
2855:             \[
2856:                 p(\hy_1^{(1)}|y_1,\hy_2^{(1)}) = \left\{
2857:                     \begin{array}{cl}
2858:                             q_1 &, \hy_1^{(1)} = y_1\\
2859:                             1-q_1 &, \hy_1^{(1)} = \Delta_1 \notin \mY_1
2860:                     \end{array}
2861:                     \right.,
2862:             \]
2863:             independent of $\hY_2^{(1)}$.
2864:             Using this assignment we get for the three-steps conference equations \eqref{eqn:specialize_single_to_three_steps}
2865:             become
2866:             \begin{subequations}
2867:                 \begin{eqnarray}
2868:                 \label{eqn:C12_three_steps_special_explicit}
2869:                     C_{12} & \ge & q_1 H(Y_1|Y_2) \\
2870:                 \label{eqn:C21_three_steps_special_explicit}
2871:                     \alpha C_{21} & \ge & q_2 H(Y_2|Y_1),
2872:                 \end{eqnarray}
2873:             \end{subequations}
2874:             and since $q_1, q_2 \in [0,1]$ we obtain the following corollary:
2875: 
2876:             \begin{corollary}
2877:             \label{corr:three-steps-special}
2878:                 For the general BC of corollary \ref{corr:single-coomon-message-with-multi-step}, and rate $R$ satisfying
2879:                 \[
2880:                     R = \sup_{p_X(x), \alpha \in [0,1]} \left\{R_1^{(21)}, R_2^{(21)}, R_1^{(12)}, R_2^{(12)}\right\}
2881:                 \]
2882:             \begin{subequations}
2883:                 \label{eqn:R1R2_specialize_three_steps_explicit}
2884:                 \begin{eqnarray}
2885:                     R_1^{(21)}    &  =  & I\big(X;Y_1\big) + \left[\frac{\alpha C_{21}}{H(Y_2|Y_1)}\right]^*I\big(X;Y_2|Y_1\big) + (1-\alpha)C_{21}\\
2886:                 \label{eqn:R2_specialize_three_steps_explicit}
2887:                     R_2^{(21)}    &  =  & I\big(X;Y_2) + \left[\frac{C_{12}}{H(Y_1|Y_2)} \right]^*I(X;Y_1|Y_2\big),
2888:                 \end{eqnarray}
2889:             \end{subequations}
2890:             and $R_1^{(12)}$ can be obtained from $R_2^{(21)}$ by switching $'1'$ and $'2'$, and similarly
2891:             $R_2^{(12)}$ can be obtained from $R_1^{(21)}$, and $p(x,y_1,y_2) = p(x)p(y_1,y_2|x)$.
2892:             \end{corollary}
2893: 
2894: 
2895: 
2896:             As an examples, consider the symmetric BC where $\mY_1 = \mY_2 = \mY$ and
2897:              \[
2898:                      p_{Y_1|Y_2,X}(a|b,x) = p_{Y_2|Y_1,X}(a|b,x),
2899:              \]
2900:              for any $a,b \in \mY \times \mY$ and $x \in \mX$. For $R_2^{(21)}$ to achieve the
2901:              full cooperation rate we need $C_{12} \ge H(Y_1|Y_2)$. For $R_2^{(12)}$ to achieve the full cooperation
2902:              rate we need:
2903:              \begin{eqnarray*}
2904:                 C_{21} \left(\frac{\alpha}{H(Y_2|Y_1)} + \frac{1-\alpha}{I(X;Y_2|Y_1)}\right) & \ge & 1 \\
2905:                 C_{21} \left(\frac{\alpha I(X;Y_2|Y_1) + (1-\alpha)H(Y_2|Y_1)}{H(Y_2|Y_1)I(X;Y_2|Y_1)} \right) & \ge  & 1\\
2906:                 C_{21} \left(\frac{ H(Y_2|Y_1) - \alpha H(Y_2|Y_1,X) }{H(Y_2|Y_1)I(X;Y_2|Y_1)} \right) & \ge  & 1\\
2907:                 C_{21}  & \ge  & H(Y_2|Y_1)\frac{I(X;Y_2|Y_1)}{ H(Y_2|Y_1) - \alpha H(Y_2|Y_1,X) }
2908:              \end{eqnarray*}
2909:              Now when $\alpha < 1$ then
2910: 
2911: \end{comment}
2912: 
2913: 
2914: 
2915: 
2916: 
2917: 
2918: 
2919: 
2920: 
2921: \begin{comment}
2922:           \subsubsection{An Alternative Achievable Rate for a Three-Step Conference}
2923:           With three steps we can achieve the following rate
2924: 
2925:               \begin{theorem}
2926:               \label{thm:achieve-three-steps}
2927:                   {\it
2928:                    Assume the broadcast channel setup of proposition \ref{prop:common_upper}.
2929:                    Then, for sending a common message to both receivers, any rate $R$ satisfying
2930:           %         $R \le  \sup_{p(x),\alpha}\Big[ \max\Big\{R_{12},R_{21}, R_{121}, R_{212} \Big\} \Big]$,
2931:                    {
2932:                    \setlength\arraycolsep{0mm}
2933:                    \begin{subequations}
2934:                        \begin{eqnarray}
2935:                            &  & R \le  \sup_{p_X(x),\alpha}\Big[ \max\Big\{R_{12},R_{21}, R_{121}, R_{212} \Big\} \Big], \nonumber\\
2936:                                   \label{eqn:thm_3step_3steps_rate}
2937:                            &  & R_{iji} \triangleq \min \Big( \max \Big\{I(X;Y_i), \;\; I(X;Y_i) -H(Y_j|Y_i,X) +
2938:                               \min\big(C_{ji} + \left[\frac{\alpha C_{ij}}{H(Y_i|Y_j)}\right]^*I(Y_j;Y_i),H(Y_j|Y_i) \big)\Big\},\nonumber\\
2939:                            &  &  \phantom{xxxxxxxxxxxxxxxxxxxxxx}     I(X;Y_j) + (1-\alpha)C_{ij}
2940:                                   +\left[\frac{\alpha C_{ij}}{H(Y_i|Y_j)}\right]^*I(X;Y_i|Y_j)\Big), \\
2941:                            \label{eqn:thm_3step_2steps_rate}
2942:                            &  &  R_{ji} \triangleq \min\Big( I(X;Y_j) + C_{ij},
2943:                                    I(X;Y_i) + \left[\frac{C_{ji}}{H(Y_j|Y_i)}\right]^*I(X;Y_j|Y_i) \Big),
2944:           %             &  & R_{212} \triangleq \min \Big(I(X;Y_1) + (1-\alpha)C_{21}
2945:           %                    +\frac{\alpha C_{21}}{H(Y_2)}I(X;Y_2|Y_1), \nonumber\\
2946:           %             &  &   \qquad \qquad  I(X;Y_2) -H(Y_1|Y_2,X) + \nonumber\\
2947:           %             &  &   \qquad \qquad \qquad \min\big(C_{12}+ \frac{\alpha C_{21}}{H(Y_2)}I(Y_1;Y_2),H(Y_1|Y_2) \big)\Big)\nonumber
2948:           %
2949:           %            &  & R_{212} \triangleq \min \Big(I(X;Y_1) + (1-\alpha)C_{21} +\frac{\alpha C_{21}}{H(Y_2)}I(X;Y_2|Y_1), \nonumber\\
2950:           %            &  &    \; I(X;Y_2) -H(Y_1|Y_2,X) + \min\big(C_{12}+KI(Y_1;Y_2),H(Y_1|Y_2) \big)\Big)\nonumber\\
2951:           %            &  & R_{121}  \triangleq  \min \Big(I(X;Y_2) + (1-\alpha)C_{12} +KI(X;Y_1|Y_2), \nonumber\\
2952:           %            &  & \; I(X;Y_1)-H(Y_2|Y_1,X) + \min\big(C_{21}+KI(Y_1;Y_2),H(Y_2|Y_1) \big)\Big)\nonumber\\
2953:                        \end{eqnarray}
2954:                   \end{subequations}
2955:                    }
2956:                    where $[x]^* \triangleq \min(x,1)$, $(i,j) = (1,2) \mbox{ or } (2,1)$ and $\alpha \in [0,1]$, is achievable.}
2957:               \end{theorem}
2958:           \smallskip
2959: 
2960:           \noindent
2961:                    The proof of this theorem is provided in appendix \ref{sec:three-steps-proof}\footnote{
2962:                       In the proof we assume that $H(Y_1|Y_2)$ and $H(Y_2|Y_1)$ are positive
2963:                       since otherwise
2964:                       one of the receivers cannot receive information from the transmitter.
2965:                       However, we can incorporate the situation where at least one of these entropies is zero as
2966:                       a special case of the derivation.
2967:                       The expressions in \eqref{eqn:thm_3step_3steps_rate} and \eqref{eqn:thm_3step_2steps_rate} hold for
2968:                       any value of the entropy.}.
2969: 
2970:               \subsection{An Example}
2971:               \label{sec:example_multi_step}
2972:               Consider the symmetric broadcast channel where $\mY_1 = \mY_2 = \mY$ and
2973:               \[
2974:                   p_{Y_1|Y_2,X}(a|b,x) = p_{Y_2|Y_1,X}(a|b,x),
2975:               \]
2976:               for any $a,b \in \mY \times \mY$ and $x \in \mX$. Let $C_{21} = C_{12} = C$.
2977:               For this scenario we have that $R_{121} = R_{212}$ and $R_{12} = R_{21}$,~so it is enough to consider
2978:               only $R_{212}$ and $R_{12}$. Consider first $\alpha$ small enough such that
2979:               $\frac{\alpha C}{H(Y_2|Y_1)} < 1$. Begin with $R_{212}$:
2980:               \begin{eqnarray}
2981:                   \negdista &  & R_{212}   =  \min\left(R_{212}', R_{212}''\right),\nonumber\\
2982:                   \negdista &  & R_{212}'  =  I(X;Y_1) + (1 - \alpha)C + \frac{\alpha C}{H(Y_2|Y_1)}I(X;Y_2|Y_1),\nonumber\\
2983:               \label{eqn:example_R212''}
2984:                   \negdista &  & R_{212}''  =  I(X;Y_2) - H(Y_1|Y_2,X) + C +\frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2),
2985:               \end{eqnarray}
2986:                   in the region where
2987:                   $H(Y_1|Y_2,X) \le C + \frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2)\! <\! H(Y_1|Y_2)$
2988:                   (otherwise we trivially get either
2989:                   the full cooperation bound for $R_{212}''$ or the non-cooperative rate).
2990:                   Next, we note that for a fixed $C$,
2991:                   $R_{212}'$ is a decreasing function of $\alpha$ and
2992:                   $R_{212}''$ is an increasing function of $\alpha$.
2993:                   Therefore the optimal value of $\alpha$ that maximizes
2994:                   the minimum of these two terms is the value for which both expressions are equal,
2995:                   subject to $\alpha \in [0,1]$. We also have due
2996:                   to the symmetry that $I(X;Y_1) = I(X;Y_2)$ and therefore equality implies
2997:                   \begin{eqnarray*}
2998:                      -\alpha C + \frac{\alpha C}{H(Y_2|Y_1)}I(X;Y_2|Y_1) & = & - H(Y_1|Y_2,X)
2999:                                + \frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2)
3000:                   \end{eqnarray*}
3001:                   or
3002:                   \begin{equation}
3003:                   \label{eqn:example-alpah-C}
3004:                       \frac{\alpha C}{H(Y_2|Y_1)} = \frac{H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1)}.
3005:                   \end{equation}
3006:                   Note that $\frac{H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1)} < 1$, hence indeed
3007:                   $\left[\frac{\alpha C}{H(Y_2|Y_1)}\right]^* = \frac{\alpha C}{H(Y_2|Y_1)}$.
3008:                   Combining the constraint
3009:                   $\alpha \le 1$ with (\ref{eqn:example-alpah-C}), we obtain a lower bound on $C$:
3010:                   \[
3011:                       C \ge C_{\mbox{\scriptsize{min}}} \triangleq \frac{H(Y_2|Y_1) H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1) }.
3012:                   \]
3013: 
3014:                   To obtain a rate increase over the non-cooperative rate,~we need to verify in
3015:                   \eqref{eqn:example_R212''} that
3016:                   \begin{equation}
3017:                   \label{eqn:example_rate_constr}
3018:                       C + \frac{\alpha C}{H(Y_2|Y_1)}I(Y_1;Y_2)\!>\! H(Y_1|Y_2,X),
3019:                   \end{equation}
3020:                   which implies
3021:                   \begin{equation}
3022:                   \label{eqn:exmaple-C-lower-bound}
3023:                       C >                \frac{H(Y_1|Y_2,X)}{1 + \frac{\alpha}{H(Y_2|Y_1)}I(Y_1;Y_2)}.
3024:                   \end{equation}
3025:                   Again, plugging the equality \eqref{eqn:example-alpah-C} into the right-hand side of
3026:                    \eqref{eqn:exmaple-C-lower-bound} we get a second lower bound on $C$:
3027:                    \begin{eqnarray*}
3028:                       C \ge C_0 \triangleq  \frac{H(Y_1|Y_2,X) (H(Y_2|Y_1) - I(X;Y_2|Y_1))}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1)}.
3029:                    \end{eqnarray*}
3030:                   Note that $C_{\mbox{\scriptsize min}} \ge C_0$, so it is enough to satisfy $C \ge C{\mbox{\scriptsize{min}}}$.
3031:                   Lastly, plugging (\ref{eqn:example-alpah-C}) into the expression for $R_{212}''$ in
3032:                   (\ref{eqn:example_R212''}) results in
3033:                   \begin{eqnarray}
3034:                   \label{eqn:R212_example}
3035:                       R_{212} = R_{212}'' & = & I(X;Y_2) - H(Y_1|Y_2,X) + C +
3036:                                                \frac{I(Y_1;Y_2) H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1) },
3037:                   \end{eqnarray}
3038:                   for $C \ge C_{\mbox{\scriptsize{min}}}$.
3039:                    % satisfies (\ref{eqn:exmaple-C-lower-bound}).
3040:                   We note that $R_{212} = I(X;Y_1,Y_2)$ for
3041:                   \[
3042:                       C = H(Y_1|Y_2) - \frac{I(Y_1;Y_2) H(Y_1|Y_2,X)}{H(Y_2|Y_1) + I(Y_1;Y_2) - I(X;Y_2|Y_1) } \ge C_{\mbox{\scriptsize{min}}},
3043:                   \]
3044:                   namely we achieve the full cooperation bound when the capacity $C$ is {\em less} than the
3045:                   full cooperation capacity required by the Slepian-Wolf theorem
3046:                   (see \cite[theorem 14.4.1]{cover-thomas:it-book}). This cannot be achieved using theorem \ref{thm:CEG_EAF}
3047:                   or the approach in \cite{DraperFK:03}.
3048: 
3049:                   Next, examine $R_{12}$: writing $R_{12}$ explicitly we have
3050:                   \begin{eqnarray*}
3051:                       R_{12}  & = & \min \left( R_{12}', R_{12}'' \right),\\
3052:                       R_{12}' & = & I(X;Y_1) + C,\\
3053:                       R_{12}''& = & I(X;Y_2) + \frac{C}{H(Y_1|Y_2)}I(X;Y_1|Y_2).
3054:                   \end{eqnarray*}
3055:                   in the region where $C < H(Y_1)$. Here, the equality $I(X;Y_2) = I(X;Y_1)$ implies that
3056:                   \begin{equation}
3057:                   \label{eqn:example-three-steps-two-step}
3058:                       R_{12} = I(X;Y_2) + \left[\frac{C}{H(Y_1|Y_2)}\right]^*I(X;Y_1|Y_2).
3059:                   \end{equation}
3060:                   We see that for $R_{12}$ the slope of the rate vs. $C$ is less than $1$, thus it is smaller
3061:                   than the slope of the rate for $R_{212}$.
3062:                   In our example we get that $R_{12} > R_{212}$
3063:                   for any value of $C$ in the range $0 \le C \le C_{\mbox{\scriptsize{min}}}$. In this range
3064:                   we also have that $\frac{C}{H(Y_1)} \le 1$.
3065: 
3066:                   For proposition \ref{prop:achive_common_one_step}, applying the same
3067:                   assumptions, and following the same reasoning leading to equation (\ref{eqn:example-three-steps-two-step})
3068:                   yields the achievable rate
3069:                   \begin{equation}
3070:                   \label{eqn:R_twostep_example}
3071:                       R = \max\Big(I(X;Y_2), I(X;Y_2) - H(Y_1|Y_2,X) + C\Big),
3072:                   \end{equation}
3073:                   for $C \le H(Y_1 | Y_2)$.
3074:                   We see that $C = H(Y_1|Y_2)$ is required for full cooperation, and when $C < H(Y_1|Y_2,X)$
3075:                   we get the non-cooperative rate $I(X;Y_2)$. The comparison between proposition \ref{prop:achive_common_one_step} and
3076:                   theorem \ref{thm:achieve-three-steps} for a fixed $p_X(x)$ on $\mX$ is depicted in figure
3077:                   \ref{fig:Three-steps-comparison}, together with the upper bound of proposition
3078:                   \ref{prop:common_upper}, which for this case specializes to:
3079:                   \[
3080:                       R^{\mbox{\scriptsize upper}} = \min \left\{I(X;Y_2)+C, I(X;Y_1,Y_2) \right\}.
3081:                   \]
3082: 
3083:                   \begin{figure}[htb]
3084:                          \centering
3085:                          \scalebox{0.60}{\includegraphics{Three-step-conference_new.eps}}
3086:                          \caption{\small The achievable rate $R$ vs. conference capacity $C$,
3087:                               for proposition \ref{prop:common_upper},
3088:                               proposition \ref{prop:achive_common_one_step} and theorem \ref{thm:achieve-three-steps},
3089:                               for the symmetric broadcast channel.}
3090:                          \label{fig:Three-steps-comparison}
3091:                   \end{figure}
3092: 
3093: \end{comment}
3094: 
3095: %        \subsection{Discussion}
3096: %        As can be easily observed in figure \ref{fig:Three-steps-comparison},
3097: %        when $C \ge C_{\mbox{\scriptsize{min}}}$ the three-step conference indeed provides
3098: %        a rate increase over the EAF-based two-step conference scheme of proposition \ref{prop:achive_common_one_step}.
3099: %        This can be seen by comparing the solid line when $C \ge C_{\mbox{\scriptsize{min}}}$ which
3100: %        represents the three-step conference and the dashed line that represents the EAF-based two-step
3101: %        conference. We see that both conferences result in rate expressions with slope of $1$ vs. C, but
3102: %        the three-step conference obtains the same rates for smaller values of $C$. This is because
3103: %        as long as the receiver that is first to decode is the limiting one
3104: %        (which is the case in the symmetric channel setup),
3105: %        then helping that receiver will increase the overall rate.
3106: 
3107: %        Also note the benefits of the new relaying strategy of theorem \ref{thm:main_thm}, which provides a
3108: %        rate increase over the non-cooperative rate even when both the three-step
3109: %        conference and the EAF-based two-step conference are not able to provide that, due to the constraint on the
3110: %        feasible region resulting from the Wyner-Ziv compression strategy. The superiority of the
3111: %        three-step conference over the two-step conference is also evidenced by directly comparing
3112: %        the rate expressions in \eqref{eqn:R212_example} and \eqref{eqn:R_twostep_example}: we see
3113: %        that the rate expressions are identical except an additive and (in general) positive term that
3114: %        appears only in the three-step rate expression \eqref{eqn:R212_example}. This term is the rate increase of the
3115: %        three-step scheme over the two-step scheme.
3116: 
3117: 
3118: 
3119: %        \begin{itemize}
3120: %            \item Talk about shlomo's paper
3121: %            \item explain that last thm is not easy to obtain from the general expression
3122: %        \end{itemize}
3123: 
3124: 
3125: 
3126: 
3127: 
3128: 
3129: 
3130: 
3131: 
3132: 
3133: 
3134: \section{Conclusions}
3135: \label{sec:conclu}
3136: 
3137: In this  paper we considered the EAF technique using time-sharing on the auxiliary RVs. We first showed that incorporating
3138: joint-decoding at the destination into the EAF technique results in a special case of the classic EAF of
3139: \cite[theorem 6]{CoverG:79}. We then used the time-sharing assignment of the auxiliary RVs to obtain an
3140: easily computable achievable rate for the multiple-relay case, which can be compared against the DAF-based results, to select the highest rate
3141: for any given scenario.
3142: Next, we showed that for the Gaussian relay channel with coded modulation, the Gaussian auxiliary RV
3143: assignment is not always optimal, and a TS-EAF implementing a per-symbol hard decision may sometimes perform better.
3144: Finally, we considered a third application of TS-EAF to the cooperative broadcast scenario with a multi-cycle
3145: conference. We first derived an achievable rate for the general channel, and then we specialized it to the single-cycle
3146: conference for which we obtained an explicit achievable rate. This rate is superior to the explicit expression that
3147: can be obtained with joint-decoding.
3148: 
3149: 
3150: 
3151: 
3152: \section{Acknowledgements}
3153: % The authors with to thank Gerhard Karmer and Shlomo Shamai for discussions regarding this work.
3154: In the final version.
3155: 
3156: 
3157: 
3158: 
3159: 
3160: 
3161: 
3162: 
3163: 
3164: 
3165: 
3166: 
3167: 
3168: 
3169: 
3170: 
3171: 
3172: 
3173: 
3174: 
3175: 
3176: 
3177: 
3178: 
3179: 
3180: 
3181: 
3182: 
3183: 
3184: 
3185: 
3186: 
3187: 
3188: 
3189: 
3190: 
3191: 
3192: 
3193: 
3194: 
3195: 
3196: 
3197: 
3198: 
3199: 
3200: 
3201: 
3202: 
3203: 
3204: 
3205: 
3206: 
3207: 
3208: \useRomanappendicesfalse
3209: \appendices
3210: \setcounter{equation}{0}
3211: \renewcommand{\theequation}{\thesection.\arabic{equation}}
3212: 
3213: 
3214: 
3215: 
3216: \section{Expressions for Section \ref{sec:Gauss_relay}}
3217: \label{append:Gauss-deriv}
3218: 
3219: % We first recall the definition in eqaution \eqref{eqn:def_G} for $G_x(a,b)$:
3220: % \[
3221: %     G_x(a,b) = \frac{1}{\sqrt{2 \pi b}} e ^{-\frac{(x-a)^2}{2 b} }.
3222: % \]
3223: \subsection{Hard-Decision Estimate-and-Forward}
3224: \label{append:Gauss-deriv-HD-EAF}
3225: We evaluate $I(X;\hY_1,Y)$, with $p(\hY_1|Y_1)$ given by \eqref{eqn:def_p_hy1_given_y1_HD_eq1}
3226: and \eqref{eqn:def_p_hy1_given_y1_HD_eq2} using:
3227: \[
3228:     I(X;\hY_1,Y) = I(X;\hY_1) + I(X;Y|\hY_1).
3229: \]
3230: \begin{enumerate}
3231:     \item Evaluating $I(X;\hY_1)$:  Note that both $X$ and $\hY_1$ are discrete RVs,
3232:         therefore $I(X;\hY_1)$ can be evaluated using the
3233:         discrete entropies. The conditional distribution of $\hY_1$ given $X$ is given by:
3234:         \begin{equation}
3235:         \label{eqn:def_p_hy1_given_x}
3236:             p(\hY_1|X=\sqrt{P}) = \left\{
3237:                                         \begin{array}{cr}
3238:                                         P_1 \cdot P_{\ners}, &              1\\
3239:                                         1-P_{\ners}, &                      E\\
3240:                                         (1-P_1) P_{\ners},&                 -1
3241:                                         \end{array}
3242:                                     \right.
3243:         \end{equation}
3244:         where
3245:         \[
3246:             P_1 = \Pr(Y_1 > 0 | X = \sqrt{P}).
3247:         \]
3248:         $p(\hY_1|X=-\sqrt{P})$ can be obtained from $p(\hY_1|X=\sqrt{P})$ by switching $1$ and $-1$ in \eqref{eqn:def_p_hy1_given_x}.
3249: 
3250:     \item Evaluating $I(X;Y|\hY_1)$: write first
3251:         \[
3252:             I(X;Y|\hY_1) = h(Y| \hY_1) - h(Y | \hY_1, X),
3253:         \]
3254:         and we note that
3255:         \[
3256:             h(Y | \hY_1, X) = h(X + N | \hY_1 , X) = h(N | \hY_1 , X) = h(N) = \frac{1}{2} \log_2 (2 \pi e \sigD).
3257:         \]
3258:         Using the chain rule we write
3259:         \[
3260:             h(Y| \hY_1) = p(\hY_1 = 1) h(Y| \hY_1 = 1) + p(\hY_1 = E) h(Y | \hY_1 = E) + p(\hY_1 = -1) h(Y | \hY_1 = -1),
3261:         \]
3262:         $p(\hY_1)$ can be obtained by combining \eqref{eqn:def_PX} and \eqref{eqn:def_p_hy1_given_x} which results in
3263:         \begin{equation}
3264:         \label{eqn:appndx_p_hy1}
3265:             p(\hY_1) = \left\{
3266:                         \begin{array}{cr}
3267:                             \frac{1}{2}P_{\ners}, & 1\\
3268:                             1 - P_{\ners} , & E\\
3269:                             \frac{1}{2}P_{\ners}, & -1
3270:                         \end{array}
3271:                     \right.,
3272:         \end{equation}
3273:         and we note that $h(Y | \hY_1 = E) = h(Y)$, since erasure is equivalent to no prior information.
3274:         Finally we note that by definition
3275:         \begin{eqnarray}
3276:             h(Y) & = &  -\int_{y = -\infty}^{\infty} f(y) \log_2(f(y)) dy, \nonumber\\
3277:             f(Y) & = & \Pr(X = \sqrt{P}) f(Y | X = \sqrt{P}) + \Pr(X = -\sqrt{P}) f(Y | X = -\sqrt{P})\nonumber\\
3278:             \label{eqn:f_Y_HC}
3279:                  & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD) + G_y(-\sqrt{P},\sigD) \right),
3280:         \end{eqnarray}
3281:         where
3282:         \begin{equation}
3283:             \label{eqn:def_G}
3284:                 G_x(a,b) = \frac{1}{\sqrt{2 \pi b}} e ^{-\frac{(x-a)^2}{2 b} }.
3285:         \end{equation}
3286:         Next, we have
3287:         \begin{eqnarray}
3288:                         \label{eqn:cond_entropy_hy1_is_1}
3289:             h(Y|\hY_1 = 1) & = & -\int_{y = -\infty}^{\infty} f(y|\hy_1 = 1) \log_2(f(y|\hy_1 = 1)) dy\\
3290:             f(Y|\hY_1 = 1) & = & \frac{f(Y,\hY_1 = 1)}{\Pr(\hY_1 = 1)}\nonumber\\
3291:                         & = & \frac{f(Y,Y_1 > 0)P_{\ners}}{\Pr(Y_1 > 0)P_{\ners}}\nonumber\\
3292:             \label{eqn:cond_f_hy1_is_1}
3293:                         & = & \frac{f(Y,Y_1 > 0)}{\Pr(Y_1 > 0)},\\
3294:             f(Y,Y_1 > 0) & = & \Pr(X = \sqrt{P}) f(Y,Y_1 > 0 | X = \sqrt{P}) + \Pr(X = -\sqrt{P}) f(Y,Y_1 > 0 | X = -\sqrt{P})\nonumber\\
3295:             \label{eqn:cond_f_y1_pos}
3296:                         & = & \frac{1}{2}\left( f(Y,Y_1 > 0 | X = \sqrt{P}) + f(Y,Y_1 > 0 | X = -\sqrt{P})\right).
3297:         \end{eqnarray}
3298:         Using
3299:         \[
3300:                     f_{Y,Y_1}(y,y_1 | x )  = \mN\left(
3301:                     \left( \begin{array}{c}
3302:                             x\\ g \cdot x
3303:                         \end{array}\right),
3304:                         \left( \begin{array}{cc}
3305:                             \sigD & 0\\ 0 & \sigR
3306:                             \end{array}
3307:                             \right)
3308:                                     \right)  = G_y(x,\sigD)G_{y_1}(g\cdot x,\sigR),
3309:         \]
3310:         we obtain
3311:         \[
3312:                   f(Y,Y_1 > 0 | X )  =  \int_{y_1 = 0}^{\infty} f(y,y_1 | x ) dy_1 = G_y(x, \sigD)
3313:                         \int_{y_1 = 0}^{\infty} G_{y_1}(g \cdot x, \sigR) dy_1.
3314:         \]
3315: \end{enumerate}
3316: 
3317: 
3318: Next we need to evaluate $I(\hY_1;Y_1|Y) = h(Y_1|Y) - h(Y_1|Y, \hY_1)$:
3319: \begin{enumerate}
3320:         \item $h(Y_1|Y) = h(Y,Y_1) - h(Y)$. Here
3321:         \begin{eqnarray*}
3322:             h(Y,Y_1) & = & -\int_{y = -\infty}^{\infty}\int_{y_1 = -\infty}^{\infty} f(y,y_1) \log_2(f(y,y_1)) dy \;dy_1,\\
3323:             f(Y,Y_1) & = & \frac{1}{2}\left(f(Y,Y_1|X = \sqrt{P}) + f(Y,Y_1|X = -\sqrt{P})\right),\\
3324:             f(Y,Y_1|X ) & = & G_y(x,\sigD)G_{y_1}(g \cdot x ,\sigR).
3325:         \end{eqnarray*}
3326: 
3327:         \item By the definition of conditional entropy we have
3328:          \[
3329:             h(Y_1|Y, \hY_1) = p(\hY_1 = 1) h(Y_1| Y, \hY_1 = 1) + p(\hY_1 = E) h(Y_1 |Y, \hY_1 = E) + p(\hY_1 = -1) h(Y_1 | Y, \hY_1 = -1),
3330:         \]
3331:         where $h(Y_1 |Y, \hY_1 = E) = h(Y_1 |Y )$,
3332:         and for $\hY_1 = 1$, for example, we have
3333:         \[
3334:             h(Y_1 |Y, \hY_1 = 1) = -\int_{y = -\infty}^{\infty} \int_{y_1 = -\infty}^{\infty}
3335:                 f(y,y_1|\hy_1 = 1)\log_2(f(y_1|y, \hy_1 = 1)) dy \; dy_1.
3336:         \]
3337:         Finally, we need to derive the distributions $f(y,y_1|\hy_1 = 1)$ and $f(y_1|y, \hy_1 = 1)$.
3338:         Begin with
3339:         \begin{eqnarray*}
3340:             &   & f_{Y,Y_1|\hY_1}(y,y_1|\hy_1 = 1) = \frac{f_{Y,Y_1,\hY_1}(y,y_1,\hy_1 = 1)}{\Pr(\hy_1 = 1)}\\
3341:             &   & \phantom{XXXXXX}\qquad  = \frac{f_{Y,Y_1,\hY_1}(y,y_1,y_1 > 0)P_{\ners}}{\Pr(y_1 > 0)P_{\ners}} = f(y,y_1|y_1 > 0) = \left\{
3342:                                                             \begin{array}{cl}
3343:                                                                 \frac{f_{Y,Y_1}(y,y_1)}{\Pr(Y_1>0)} , & y_1 > 0\\
3344:                                                                 0                           , & y_1 \le 0
3345:                                                             \end{array}
3346:                                                         \right.
3347:         \end{eqnarray*}
3348:         and due to the symmetry, $\Pr(Y_1 > 0)  = \Pr(Y_1 \le 0) = \frac{1}{2}$.
3349:         We also have
3350:         \begin{eqnarray*}
3351:         f(Y_1|Y , \hY_1 = 1) & =  & \frac{f(Y_1,Y|\hY_1 = 1) }{f(Y|\hY_1 = 1)}  =  \frac{f(Y_1,Y|Y_1 >0 ) }{f(Y|Y_1 > 0)} = \frac{\frac{f(Y_1,Y) }{\Pr(Y_1>0)} }{\frac{f(Y,Y_1 > 0)}{\Pr(Y_1>0)}} = \frac{f(Y_1,Y)}{f(Y,Y_1 > 0)}, \quad Y_1 > 0\\
3352:         f(Y_1|Y , \hY_1 = 1) & = & 0, \quad Y_1 \le 0.
3353:         \end{eqnarray*}
3354: \end{enumerate}
3355: 
3356: 
3357: 
3358: 
3359: 
3360: 
3361: 
3362: \subsection{Evaluation of the Rate with DHD}
3363: \label{sec:expressions_DHD}
3364: We evaluate the achievable rate using $I(X;Y,\hY_1) = I(X;\hY_1) + I(X;Y|\hY_1)$.
3365: The distribution of $\hY_1$ is given by:
3366: \begin{eqnarray*}
3367:     \Pr(\hY_1 = 1)  =  \Pr(Y_1 > T) & = & \frac{1}{2}\left(\Pr(Y_1 > T | X = \sqrt{P}) + \Pr(Y_1 > T | X = -\sqrt{P}) \right)\\
3368:              & = & \frac{1}{2} \left(\int_{y_1 > T}G_{y_1}(g\sqrt{P},\sigR)dy_1 + \int_{y_1 > T}G_{y_1}(-g\sqrt{P},\sigR)dy_1\right)\\
3369:     \Pr(\hY_1 = E)  =  \Pr(|Y_1| \le T) & = & \frac{1}{2}\left(\Pr(|Y_1| \le T | X = \sqrt{P})
3370:                 + \Pr(|Y_1| \le T | X = -\sqrt{P}) \right)\\
3371:              & = & \frac{1}{2} \left(\int_{y_1 =-T}^T G_{y_1}(g\sqrt{P},\sigR)dy_1
3372:                 + \int_{y_1 =-T}^T G_{y_1}(-g\sqrt{P},\sigR)dy_1\right),
3373: \end{eqnarray*}
3374: and by symmetry, $\Pr(\hY_1 = 1) = \Pr(\hY_1 = -1)$ and $H(\hY_1|X = \sqrt{P}) = H(\hY_1|X = -\sqrt{P})$.
3375: Therefore, we need the conditional distribution $p(\hY_1|X = \sqrt{P})$:
3376: \begin{eqnarray*}
3377:     \Pr(\hY_1 = 1 | X = \sqrt{P}) & = & \Pr(Y_1 > T| X = \sqrt{P}) = \int_{y_1 > T}G_{y_1}(g\sqrt{P},\sigR)dy_1\\
3378:     \Pr(\hY_1 = -1 | X = \sqrt{P}) & = & \Pr(Y_1 < -T| X = \sqrt{P}) = \int_{y_1 <-T}G_{y_1}(g\sqrt{P},\sigR)dy_1\\
3379:     \Pr(\hY_1 = E | X = \sqrt{P}) & = & 1 - \Pr(\hY_1 = 1 | X = \sqrt{P}) - \Pr(\hY_1 = -1 | X = \sqrt{P}).
3380: \end{eqnarray*}
3381: This allows us to evaluate $I(X;\hY_1) = H(\hY_1) - H(\hY_1|X)$. For evaluating $I(X;Y|\hY_1)$ note that
3382: \[
3383:     h(Y|\hY_1,X) = h(X + N | \hY_1,X) = h(N |\hY_1,X) = h(N) = \frac{1}{2}\log_2(2 \pi e \sigD),
3384: \]
3385: and we need only to evaluate $h(Y|\hY_1)$: by definition
3386: \[
3387:     h(Y|\hY_1) = \Pr(\hY_1 = 1)h(Y|\hY_1 = 1) + \Pr(\hY_1 = E)h(Y|\hY_1=E) + \Pr(\hY_1 = -1)h(Y|\hY_1 = -1),
3388: \]
3389: and note that $h(Y|\hY_1=E) = h(Y)$. Finally,
3390: \begin{eqnarray*}
3391:     h(Y|\hY_1 = 1) & = &   -\int_{y=-\infty}^{\infty} f(y|\hy_1 = 1) \log_2 (f(y | \hy_1 = 1)) dy\\
3392:     f_{Y|\hY_1}(y|\hy_1 = 1) & = & f(y|y_1 >T) = \frac{f(y,y_1 >T)}{\Pr(Y_1 > T)}\\
3393:     f_{Y,Y_1}(y,y_1 >T) & = & \frac{1}{2}\left(f(y,y_1 >T | X = \sqrt{P}) + f(y,y_1 >T| X = -\sqrt{P})\right)\\
3394:          & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)\Pr(Y_1>T|X = \sqrt{P}) + G_y(-\sqrt{P},\sigD)\Pr(Y_1>T|X = -\sqrt{P})\right).
3395: \end{eqnarray*}
3396: 
3397: Evaluating $I(\hY_1;Y_1|Y)$ we have:
3398: \begin{eqnarray*}
3399:     I(\hY_1;Y_1|Y) & = & H(\hY_1|Y) - H(\hY_1|Y,Y_1) \\
3400:      & \stackrel{(a)}{=} & H(\hY_1|Y)\\
3401:      & = & H(\hY_1) + h(Y|\hY_1) - h(Y),
3402: \end{eqnarray*}
3403: where (a) is due to the deterministic mapping from $Y_1$ to $\hY_1$, and $h(Y)$ can be evaluated using
3404: \eqref{eqn:f_Y_HC}.
3405: 
3406: \subsubsection{DHD when $T \rightarrow 0$}
3407: \label{sec:HDH-Explanation}
3408: As $T \rightarrow 0$ we have that $\Pr(\hY_1 = E) \rightarrow 0$ and $\hY_1$ converges in distribution to a
3409: Bernoulli RV with probability $\frac{1}{2}$. Therefore
3410: \begin{eqnarray*}
3411:     f(Y,\hY_1 = 1) & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)\Pr(Y_1>T|X = \sqrt{P}) + G_y(-\sqrt{P},\sigD)\Pr(Y_1>T|X = -\sqrt{P})\right)\\
3412:         & \stackrel{T \rightarrow 0}{\approx} & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)\Pr(Y_1>0|X = \sqrt{P})
3413:                 + G_y(-\sqrt{P},\sigD)\Pr(Y_1>0|X = -\sqrt{P})\right)\\
3414:         &  =  & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)P_+
3415:                 + G_y(-\sqrt{P},\sigD)(1 - P_+)\right),
3416: \end{eqnarray*}
3417: where $P_+ = \Pr(Y_1>0|X = \sqrt{P})$. Now, letting $g \rightarrow 0$ we have that $P_+ \rightarrow \frac{1}{2}$ and
3418: therefore
3419: \begin{eqnarray*}
3420:     f(Y|\hY_1 = 1) & \stackrel{g \rightarrow 0, T \rightarrow 0}{\longrightarrow } & f(Y)\\
3421:     \Rightarrow h(Y|\hY_1 = 1) & \stackrel{g \rightarrow 0, T \rightarrow 0}{\longrightarrow }& h(Y).
3422: \end{eqnarray*}
3423: We conclude that as $g \rightarrow 0, T \rightarrow 0$, then $h(Y|\hY_1) \rightarrow h(Y)$ and therefore the
3424: $I(Y_1;\hY_1|Y)$ becomes
3425: \[
3426:     I(Y_1;\hY_1|Y)  =  H(\hY_1) + h(Y|\hY_1) - h(Y)  \stackrel{g \rightarrow 0, T \rightarrow 0}{\longrightarrow } 1
3427: \]
3428: Using the continuity of $I(Y_1;\hY_1|Y)$ we conclude that for small values of $g$, as $T$ decreases then
3429: $I(Y_1;\hY_1|Y)$ is bounded from below. This implies that for small $g$ and small $C$ the feasibility
3430: is obtained only for large $T$, which in turn implies low rate.
3431: 
3432: 
3433: 
3434: 
3435: 
3436: 
3437: \subsection{Evaluating the Information Rate with TS-DHD}
3438: \label{appndx:expressions_TS_DHD}
3439: \subsubsection{Evaluating $I(X;Y,\hY_1)$}
3440: We first write
3441: \[
3442:     I(X;Y,\hY_1) = I(X;\hY_1) + I(X;Y|\hY_1).
3443: \]
3444: Evaluating $I(X;\hY_1) = H(\hY_1) - H(\hY_1|X)$ requires the marginal of $\hY_1$.
3445: Using the mapping defined in \eqref{eqn:def_TS-DHD}we find the marginal distribution of $\hY_1$:
3446: \[
3447:     \Pr(\hY_1) = \left\{
3448:         \begin{array}{cl}
3449:             1,  & (1-P_{\ers})\Pr(Y_1>T)\\
3450:             E,  & \Pr(|Y_1| \le T) + P_{\ers} \Pr(|Y_1|>T)\\
3451:             -1, & (1-P_{\ers})\Pr(Y_1 < -T)
3452:         \end{array}
3453:     \right.,
3454: \]
3455: where
3456: \begin{eqnarray*}
3457:     \Pr(Y_1 > T) = \Pr(Y_1 < -T) & = & \int_{y_1 = T}^{\infty} \frac{1}{2}\left[G_{y_1}(\sqrt{P},\sigR)+ G_{y_1}(-\sqrt{P},\sigR) \right]d y_1\\
3458:     \Pr(|Y_1| < T) & = & \int_{y_1 = -T}^{T} \frac{1}{2}\left[G_{y_1}(\sqrt{P},\sigR)+ G_{y_1}(-\sqrt{P},\sigR) \right]d y_1.
3459: \end{eqnarray*}
3460: Also, due to symmetry we have that $H(\hY_1|X = \sqrt{P}) = H(\hY_1|X = -\sqrt{P})$, and therefore we need only to find the conditional
3461: $\Pr(\hY_1|X = \sqrt{P})$:
3462: \[
3463:     \Pr(\hY_1|X = \sqrt{P}) = \left\{
3464:         \begin{array}{cl}
3465:             1,  & (1-P_{\ers})\Pr(Y_1>T|X = \sqrt{P})\\
3466:             E,  & \Pr(|Y_1| \le T|X = \sqrt{P}) + P_{\ers} \Pr(|Y_1|>T|X = \sqrt{P})\\
3467:             -1, & (1-P_{\ers})\Pr(Y_1 < -T|X = \sqrt{P})
3468:         \end{array}
3469:     \right.,
3470: \]
3471: and we note that $f_{Y_1|X} (y_1 | x = \sqrt{P}) = G_{y_1}(\sqrt{P},\sigR)$.
3472: 
3473: Next, we need to evaluate $I(X;Y|\hY_1) = h(Y|\hY_1) - h(Y | \hY_1,X)$. We first note that
3474: \[
3475:     h(Y| \hY_1,X) = h(X + N|X,\hY_1) = h(N|X , \hY_1) = h(N) = \frac{1}{2}\log_2(2 \pi e \sigR).
3476: \]
3477: Lastly, we have
3478: \[
3479:     h(Y|\hY_1) = \Pr(\hY_1 = 1) h(Y|\hY_1 = 1) + \Pr(\hY_1 = E) h(Y|\hY_1 = E) + \Pr(\hY_1 = -1)h(Y|\hY_1 = -1).
3480: \]
3481: We note that $h(Y|\hY_1 = E) = h(Y)$ and that $h(Y|\hY_1 = 1)$ and $h(Y|\hY_1 = -1)$ are calculated exactly as in
3482: appendix \ref{sec:expressions_DHD} for the DHD case.
3483: 
3484: 
3485: 
3486: 
3487: 
3488: \subsubsection{Evaluating $I(\hY_1;Y_1|Y)$}
3489: Begin by writing
3490: \begin{eqnarray*}
3491:     I(\hY_1;Y_1|Y) & = &  h(\hY_1|Y_1) - h(\hY_1|Y_1,Y) \\
3492:         & = & h(Y|\hY_1) + H(\hY_1) - h(Y) - h(\hY_1|Y_1)
3493: \end{eqnarray*}
3494: where we used the fact that given $Y_1$, $\hY_1$ is independent of $Y$. All the terms in the above expressions have been calculated
3495: in the previous subsection, except $h(\hY_1|Y_1)$:
3496: \begin{eqnarray*}
3497:     h(\hY_1|Y_1) & = & \Pr(\hY_1 > T) h(\hY_1|Y_1 > T) + \Pr(|Y_1| \le T) h(\hY_1||Y_1| \le T) + \Pr(Y_1 < -T) h(\hY_1|Y_1 < -T)\\
3498:      & = & \Pr(\hY_1 > T) H(P_{\ers},1 - P_{\ers}) +  \Pr(\hY_1 < -T)H(P_{\ers},1 - P_{\ers}) \\
3499:      & = & (1 - P(|Y_1| \le T)H(P_{\ers},1 - P_{\ers}).
3500: \end{eqnarray*}
3501: 
3502: 
3503: 
3504: \subsection{Gaussian-Quantization Estimate-and-Forward}
3505: Here the relay uses the assignment of equation \eqref{eqn:def_qaussian_quant}:
3506: \[
3507:     \hY_1 = Y_1 + N_Q, \qquad N_Q \sim \mN(0, \sigQ).
3508: \]
3509: We first evaluate
3510: \begin{eqnarray*}
3511:     I(X;Y,\hY_1) = h(Y,\hY_1) - h(Y,\hY_1|X):
3512: \end{eqnarray*}
3513: \begin{enumerate}
3514:     \item
3515:         \begin{eqnarray}
3516:             h(Y,\hY_1) & = & - \int_{y = -\infty}^{\infty} \int_{\hy_1 = -\infty}^{\infty}
3517:                 f_{Y,\hY_1}(y, \hy_1) \log_2(f_{Y,\hY_1}(y,\hy_1)) dy \; d\hy_1\nonumber\\
3518:         \label{eqn:joint_y_hy1_gq_eaf}
3519:             f_{Y,\hY_1}(y,\hy_1) & = & \frac{1}{2}\left(G_y(\sqrt{P},\sigD)G_{\hy_1}(g\sqrt{P},\sigR + \sigQ)
3520:                         +G_y(-\sqrt{P},\sigD)G_{\hy_1}(-g\sqrt{P},\sigR + \sigQ)\right).
3521:         \end{eqnarray}
3522: 
3523:     \item We also have
3524:     \begin{eqnarray*}
3525:         h(Y,\hY_1|X) & = & h(X + N, gX + N_1 + N_Q|X)\\
3526:                     & = & h( N,  N_1 + N_Q|X)\\
3527:                     & = & h(N) + h(N_1 + N_Q)\\
3528:                     & = & \frac{1}{2}\log_2\left((2\pi e)^2\sigD (\sigR + \sigQ)\right).
3529:     \end{eqnarray*}
3530: \end{enumerate}
3531: Lastly we need to evaluate
3532: \[
3533:     I(\hY_1;Y_1|Y) = h(\hY_1|Y) - h(\hY_1 | Y_1,Y) = h(\hY_1,Y) - h(Y) - h(\hY_1 | Y_1,Y),
3534: \]
3535: where
3536: \[
3537:     h(\hY_1| Y_1, Y) = h(Y_1 + N_Q | Y_1,Y) = h(N_Q|Y_1,Y) = h(N_Q) = \frac{1}{2} \log_2(2 \pi e \sigQ).
3538: \]
3539: 
3540: \subsection{Approximation of HD-EAF for $\sigD \rightarrow \infty$}
3541:      \label{appndx:appndxHD-EAF-highSNR}
3542:      Using \eqref{eqn:def_p_hy1_given_x} and \eqref{eqn:appndx_p_hy1} we can write
3543:         \begin{eqnarray*}
3544:             R \le I(X;\hY_1) & = & H(\hY_1) - H(\hY_1|X) \nonumber\\
3545:                     & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right)
3546:                             - H\left(P_1 P_{\ners}, 1 - P_{\ners}, (1-P_1)P_{\ners}\right) \nonumber\\
3547:                     & = & -P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right) -(1 - P_{\ners})\log_2(1 - P_{\ners})+  P_1 P_{\ners} \log_2(P_1 P_{\ners})\nonumber\\
3548:                     &   & \quad     +(1 - P_{\ners})\log_2(1 - P_{\ners})  +  (1-P_1)P_{\ners}\log_2((1-P_1)P_{\ners})\nonumber\\
3549:                     & = & -P_{\ners} \log_2\left(P_{\ners}\right) +P_{\ners}  +  P_1 P_{\ners} \log_2(P_1) + P_1 P_{\ners} \log_2(P_{\ners})\nonumber\\
3550:                     &   & \quad       +  (1-P_1)P_{\ners}\log_2(1-P_1) + (1-P_1)P_{\ners}\log_2(P_{\ners}) \nonumber\\
3551:                     & = &  P_{\ners}(1  +  P_1  \log_2(P_1) +  (1-P_1)\log_2(1-P_1) ) \nonumber\\
3552:                     & = &  P_{\ners}(1  -H ( P_1  ,1-P_1 )).
3553:             \end{eqnarray*}
3554:         \begin{eqnarray*}
3555:             I(Y_1;\hY_1|Y)  & = & h(\hY_1|Y) - h(\hY_1|Y_1,Y)\\
3556:                     & \stackrel{(a)}{\approx} & H(\hY_1) - H(\hY_1|Y_1)\\
3557:                     & = & H\left(\frac{1}{2}P_{\ners}, 1 - P_{\ners} ,\frac{1}{2}P_{\ners}\right) -
3558:                         H(P_{\ners},1-P_{\ners})\\
3559:                     & = & - 2 \frac{1}{2}P_{\ners} \log_2\left(\frac{1}{2}P_{\ners}\right)
3560:                         - (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right) + P_{\ners} \log_2(P_{\ners})\\
3561:                     &   & \quad        + (1 - P_{\ners}) \log_2\left(1 - P_{\ners}\right)\\
3562:                     & = &  P_{\ners},
3563:         \end{eqnarray*}
3564: where in (a) we used the fact that $\hY_1$ and $Y$ are independent as $\sigD \rightarrow \infty$, and that given
3565: $Y_1$, $\hY_1$ is independent of $Y$.
3566: 
3567: 
3568: 
3569: 
3570: 
3571: 
3572: 
3573: 
3574: \setcounter{equation}{0}
3575: \section{Proof of Corollary \ref{corr:single-coomon-message-with-multi-step}}
3576: \label{appndx:prof_corollary_single_common}
3577: %\begin{proof}
3578: 
3579: %    In the proof we combine channel coding and the multi-step
3580: %    conference proposed by Kaspi in \cite{Kaspi:85}.
3581: %
3582: %    Fix $n$, $\alpha \in [0,1]$, $p(x)$, and for $k = 1,2,...,K$, fix $p\left(\hy_1^{(k)}|y_1,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$
3583: %    and\\
3584: %     $p\left(\hy_2^{(k)}|y_2,\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$.
3585: %
3586:     In the following we highlight only the modifications from the general broadcast result due to the application of
3587:     DAF to the last
3588:     conference step from $\Rgood$ to $\Rbad$, and the fact that we transmit a single message.
3589: 
3590:     \subsubsection{Codebook Generation and Encoding at the Transmitter}
3591:         The transmitter generates $2^{nR}$ codewords $\xvec$ in an i.i.d. manner according to
3592:         $p(\xvec(w)) = \prod_{i=1}^n p(x_i(w))$, $w \in \mW = \left\{1,2,...,2^{nR}\right\}$. For transmission
3593:         of the message $w_i$ at time $i$ the transmitter outputs $\xvec(w_i)$.
3594: 
3595:     \subsubsection{Codebook Generation at the $\Rgood$}
3596: %        \begin{itemize}
3597: %            \item For the first conference step from $\Rgood$ to $\Rbad$, $\Rgood$ generates a codebook with $2^{nR_{12}'^{(1)}}$ codeword denoted
3598: %                $\mZ_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}'^{(1)}}\right\}$ according to the distribution
3599: %                $p\left(\hy_1^{(1)}\right)$:
3600: %                $p\left(\hyvec_1^{(1)}(z_{12}^{(1)})\right) = \prod_{i=1}^n p\left(\hy_{1,i}^{(1)}(z_{12}^{(1)})\right)$,
3601: %                $z_{12}^{(1)} \in \mZ_{12}^{(1)}$. $\Rgood$ then uniformly and independently partitions the codebook
3602: %                $\mZ_{12}^{(1)}$ into $2^{nR_{12}^{(1)}}$ subsets indexed by
3603: %                $w_{12}^{(1)} \in \mW_{12}^{(1)} = \left\{1,2,...,2^{nR_{12}^{(1)}}\right\}$. Denote these sets
3604: %                with $\mS_{12,w_{12}^{(1)}}^{(1)}$.
3605: %            \item For the first conference step from $\Rbad$ to $\Rgood$, $\Rbad$ generates a codebook
3606: %                for each codeword $\hyvec_1^{(1)}(z_{12}^{(1)})$, $z_{12}^{(1)} \in \mZ_{12}^{(1)}$ in an i.i.d.
3607: %                manner according to
3608: %                $p\left(\hyvec_2^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\right)                 \prod_{i=1}^n p\left(\hy_{2,i}^{(1)}(z_{21}^{(1)}|z_{12}^{(1)})\Big|\hy_{1,i}(z_{12}^{(1)})\right)$,
3609: %                $z_{21}^{(1)} \in \mZ_{21}^{(1)} = \left\{1,2,..., 2^{nR_{21}'^{(1)}}\right\}$.
3610: %                $\Rbad$ then uniformly and independently partitions the codebook $\mZ_{21}^{(1)}$
3611: %                 into $2^{nR_{21}^{(1)}}$ subsets indexed by
3612: %                $w_{21}^{(1)} \in \mW_{21}^{(1)} = \left\{1,2,...,2^{nR_{21}^{(1)}}\right\}$. Denote these sets
3613: %                with $\mS_{21,w_{21}^{(1)}}^{(1)}$.
3614: %            \item For the $k$'th conference step from $\Rgood$ to $\Rbad$, then for each combination of
3615: %                $z_{12}^{(1)},z_{12}^{(2)},...,z_{12}^{(k-1)}$, and \\
3616: %                $z_{21}^{(1)},z_{21}^{(2)},...,z_{21}^{(k-1)}$, $\Rgood$ generates a codebook with $2^{nR_{12}'^{(k)}}$
3617: %                messages denoted by $\mZ_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}'^{(k)}}\right\}$,
3618: %                according to the distribution
3619: %                 $p\left(\hy_1^{(k)}|\hy_1^{(1)},\hy_1^{(2)},...,\hy_1^{(k-1)},\hy_2^{(1)},\hy_2^{(2)},...,\hy_2^{(k-1)}\right)$.
3620: %                 Then $\Rgood$ uniformly and independently partitions the codebook
3621: %                $\mZ_{12}^{(k)}$ into $2^{nR_{12}^{(k)}}$ subsets indexed by
3622: %                $w_{12}^{(k)} \in \mW_{12}^{(k)} = \left\{1,2,...,2^{nR_{12}^{(k)}}\right\}$. Denote these sets
3623: %                with $\mS_{12,w_{12}^{(k)}}^{(k)}$.
3624: %            \item The codebook for the $k$'th conference step from $\Rbad$ to $\Rgood$ is generated in a parallel manner
3625: 
3626:              The $K$ conference steps from $\Rgood$ to $\Rbad$ are carried out exactly as in section \ref{sec:DecEncMultiStepRgood}.
3627:              The first $K-1$ steps from $\Rbad$ to $\Rgood$ are carried out as in section \ref{sec:DecEncMultiStepRbad}.
3628:              The $K$'th conference step from $\Rbad$ to $\Rgood$, is different from that of theorem \ref{thm:multi-step-general-bc},
3629:              as after the $K$'th step from $\Rgood$ to $\Rbad$, $\Rbad$ may decode the message
3630:              since $\Rbad$ received all the $K$ conference messages from $\Rgood$. Then, $\Rbad$ uses decode-and-forward for
3631:                 its $K$'th conference transmission to $\Rgood$. Therefore, $\Rbad$ simply partitions $\mW$ into $2^{n \alpha C_{21}}$
3632:                 subsets in a uniform and independent manner.
3633: %        \end{itemize}
3634: 
3635: 
3636: 
3637:     \subsubsection{Encoding and Decoding at the $K$'th Conference Step from $\Rbad$ to $\Rgood$}
3638:         \begin{itemize}
3639: %            \item Encoding at $\Rgood$ at the $K$'th conference step is done as described in section
3640: %                \ref{sec:DecEncMultiStepRgood}.
3641:             \item Before the $K$'th conference step, $\Rbad$ decodes its message using his channel input and all the
3642:             $K$ conference messages received from $\Rgood$. This can be done with an arbitrarily small probability of error as long as \eqref{eqn:R_2} is satisfied.
3643:             \item  Having decoded its message, $\Rbad$ uses the decode-and-forward strategy to select the
3644:                 $K$'th conference message to $\Rgood$. The conference capacity allocated to this step is
3645:                 $R_{21}^{(K)} = \alpha C_{21}$.
3646:             \item Having received the $K$'th conference message from $\Rbad$, $\Rgood$ can now
3647:                 decode its message using the information received at the first $K-1$ steps,
3648:                 and combining it with the information from the last step using the decode-and-forward
3649:                 decoding rule. This gives rise to \eqref{eqn:R_1}.
3650:         \end{itemize}
3651: 
3652:     \subsubsection{Combining All the Conference Rate Bounds}
3653:         The bounds on $R_{12}'^{(k)}$, $k = 1,2,...,K$ can be obtained as in section \ref{sec:combining_bounds_general}:
3654:         \begin{eqnarray*}
3655:             C_{12} & = & \sum_{k = 1}^K R_{12}^{(k)}\\
3656:                    & \ge &   I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},
3657:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)};Y_1\big| Y_2\right) +2K\eps,
3658:         \end{eqnarray*}
3659:         and similarly
3660:         \[
3661:             (1-\alpha)C_{21} \ge I\left( \hY_1^{(1)},\hY_1^{(2)},...,\hY_1^{(K)},
3662:                         \hY_2^{(1)},\hY_2^{(2)},...,\hY_2^{(K-1)};Y_2\big| Y_1\right) +2K\eps,
3663:         \]
3664:         where $(1-\alpha)C_{21}$ is the total capacity allocated to the first $K-1$ conference steps from $\Rbad$ to $\Rgood$.
3665:          This provides the rate constraints on the conference auxiliary variables.
3666: %\end{proof}
3667: 
3668: 
3669: 
3670: 
3671: 
3672: 
3673: \begin{comment}
3674: 
3675: 
3676:            \section{The Gaussian Relay Channel}
3677:            \label{sec:Gauss_appendix}
3678: 
3679:            \subsection{The Motivating Example}
3680: 
3681:            \begin{eqnarray*}
3682:                Y_1 & = & \sqrt{g}X + N_1\\
3683:                Y   & = & X + N_2
3684:            \end{eqnarray*}
3685:            let
3686:            \[
3687:                \hY_1 = Y_1 + \nQ = \sqrt{g}X + N_1 + \nQ.
3688:            \]
3689:            Where for this scenario we have $Y \sim \mathcal{NC}(0,(1+P))$,
3690:            $\left(\begin{array}{c} y_1 \\ y \end{array} \right) = \left(\begin{array}{c} \sqrt{g}X + N_1 \\ X + N_2 \end{array} \right) \sim
3691:            \mathcal{NC}\left(\left[\begin{array}{c} 0 \\ 0 \end{array} \right],\left[\begin{array}{cc} gP+1 & \sqrt{g}P \\ \sqrt{g}P & P+1 \end{array} \right] \right)$
3692:            \begin{eqnarray*}
3693:                I(X;Y) & = & h(Y) - h(Y|X)\\
3694:                       & = & \log(1+P) - h(N_2)\\
3695:                       & = & \log(1+P) - \log(1)\\
3696:                       & = & \log(1+P)\\
3697:                I(X;Y_1|Y) & = & h(Y_1|Y) - h(Y_1|Y,X)\\
3698:                           & = & h(Y_1|Y) - h(\sqrt{g}X+N_2|X,X+N_1)\\
3699:                           & = & h(Y_1|Y) - h(N_2|X,N_1)\\
3700:                           & = & h(Y_1|Y) - h(N_2)\\
3701:                           & = & h(Y_1|Y)\\
3702:                h(Y_1|Y)   & = & h(Y_1,Y) - h(Y)\\
3703:                           & = & \log((gP+1)(P+1)-gP^2) - \log(1+P)\\
3704:                           & = & \log(P+1 + gP^2+gP-gP^2) - \log(1+P)\\
3705:                           & = & \log\left(1 +\frac{gP}{1+P}\right)
3706:            \end{eqnarray*}
3707:            \begin{eqnarray*}
3708:                I(Y_1;\hY_1|Y)  & = & h(\hY_1|Y) - h(\hY_1|Y,Y_1)\\
3709:                                & = & h(\sqrt{g}X + N_1 + \nQ|X + N_2) - h(h(Y_1 + \nQ|Y,Y_1)\\
3710:                                & = & h(\sqrt{g}X + N_1 + \nQ,X + N_2) - h(X + N_2)) - h(\nQ|Y,Y_1)\\
3711:                                & = & \log\left((gP + 1 + \sigQ)(P+1) - \left(\sqrt{g}P\right)^2 \right) - \log(P+1) - h(\sigQ)\\
3712:                                & = & \log\left(gP^2 + gP + (1 + \sigQ)(P+1) - gP^2 \right) - \log(P+1) - h(\sigQ)\\
3713:                                & = & \log\left(gP + (1 + \sigQ)(P+1)  \right) - \log(P+1) - h(\sigQ)\\
3714:                                & = & \log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}  \right)\\
3715:            \end{eqnarray*}
3716:            \begin{eqnarray*}
3717:                I(X;\hY_1|Y)    & = & h(\hY_1|Y) - h(\hY_1|Y,X)\\
3718:                                & = & \log\left(gP + (1 + \sigQ)(P+1)  \right) - \log(P+1) - h(\sqrt{g}X + N_1 + \nQ|X + N_2,X)\\
3719:                                & = & \log\left(1 + \sigQ + \frac{gP}{P+1}  \right)  - h( N_1 + \nQ|N_2,X)\\
3720:                                & = & \log\left(1 + \sigQ + \frac{gP}{P+1}  \right)  - h( N_1 + \nQ)\\
3721:                                & = & \log\left(1 + \sigQ + \frac{gP}{P+1}  \right)  - \log(1+\sigQ)\\
3722:                                & = & \log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\\
3723:            \end{eqnarray*}
3724: 
3725: 
3726: 
3727:            \subsection{A Motivating Example: The Noiseless Relay Case}
3728:            \label{sec:motivation_gauss}
3729:            We consider the noiseless relay scenario considered in \cite{Goldsmith:2006}. For this scenario the received
3730:            signals are described by
3731:            \begin{eqnarray*}
3732:                y_1 & = & \sqrt{g}x + n_1\\
3733:                y   & = & x + n_2
3734:            \end{eqnarray*}
3735:            where $x,y_1,y,n_1,n_2 \in \mathbb{C}$, $g \in \mathbb{R}_+$,
3736:            \[  \left(
3737:                    \begin{array}{c}
3738:                        n_1 \\ n_2
3739:                    \end{array}
3740:                \right) \sim
3741:                    \mathcal{NC} \left(
3742:                        \left[
3743:                            \begin{array}{c}
3744:                            0 \\ 0
3745:                          \end{array}
3746:                        \right],
3747:                        \left[
3748:                    \begin{array}{cc}
3749:                            1 & 0 \\ 0 & 1
3750:                    \end{array}
3751:                        \right]
3752:                    \right),
3753:            \]
3754:            and the transmitter has an average power constraint $E\left\{|x|^2\right\} \le P$. It is further assumed that
3755:            all the nodes have perfect channel state information (CSI). The receivers have noiseless conference links
3756:            between them, with capacities $\alpha C$  from the relay to the destination, and $(1-\alpha)C$ from the
3757:            destination to the relay, $\alpha \in [0,1]$. In \cite{Goldsmith:2006} two cooperation strategies are considers:
3758:            one shot cooperation ($\alpha = 1$), which is the standard relay scenario, and an iterative cooperation scheme in which
3759:             the receiver first sends a message to the relay and then the relay sends a message back to the receiver.
3760:            The results in \cite{Goldsmith:2006} for the upper bound ($C_{os,CS}$), one-shot DAF ($R_{os,DF}$),
3761:            one-shot EAF ($R_{os,CF}$) and iterative ($(R_i)$ conferences are summarized below:
3762:            \begin{eqnarray}
3763:                C_{os,CS} & = & \min\left\{\log(1+(1+g)P), \log(1+P) + C \right\}\\
3764:                R_{os,DF} & = & \min\left\{\log(1+gP), \log(1+P)+C \right\}\\
3765:                R_{os,CF} %& = & \log\left(1+P+ \frac{gP(2^C-1)(P+1)}{2^C(P+1) + gP} \right)\\
3766:                          & = & \log\left(1+P+ \frac{gP}{1+ \frac{1+ P + gP}{(2^C-1)(P+1)}} \right)\\
3767:                          & = & \log(1+P) + \log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)\\
3768:                R_i       & = & \max_{0 \le \alpha \le 1} \min \left\{\log\left(1+gP + \frac{P}{1+N_i} \right), \log(1+P) +\alpha C \right\},\\
3769:                          &   & N_i = \frac{(1+g)P+1}{(2^{(1-\alpha)C} - 1)(gP+1)}.
3770:            \end{eqnarray}
3771:            We now evaluate the expression of corollary \ref{corr:single_relay_TAF}  for this scenario, assuming
3772:            $X \sim \mathcal{NC}(0,P)$:
3773:            \begin{eqnarray*}
3774:                R_{os,TF}  & = & I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{I(\hY_1;Y_1|X_1,Y)} \right]^* I(X;\hY_1|X_1,Y)\\
3775:                           & = & I(X;Y) + \left[ \frac{C}{I(Y_1;\hY_1|Y)} \right]^* I(X;\hY_1|Y)\\
3776:                           & = & \log(1+P) + \min\left\{1,\frac{C}{I(Y_1;\hY_1|Y)}\right\}\left(h(\hY_1|Y) - h(\hY_1|Y,X)\right)\\
3777:                           & = & \log(1+P) + \min \left\{\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right) ,
3778:                           \frac{C}{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\right\}\\
3779:                           & = & \min \left\{\log\left(1+P + \frac{gP}{1 + \sigQ}  \right)
3780:                           ,\log(1+P) +\frac{C}{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)\right\}\\
3781:            \end{eqnarray*}
3782:            We see that a one-shot TAF to be superior we need:
3783:            \[
3784:                \sigQ < \frac{1+ P + gP}{(2^C-1)(P+1)}
3785:            \]
3786:            but then
3787:            \begin{eqnarray*}
3788:                \log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right) & = &  \log\left(1 + \frac{1}{\sigQ}\left(\frac{(1+P+gP}{(P+1)}\right)\right)\\
3789:                        & > & \log \left( 1 + \frac{(2^C-1)(P+1)}{1+ P + gP}\left(\frac{(1+P+gP}{(P+1)}\right)\right)\\
3790:                        & = & \log\left(2^C\right)\\
3791:                        & = & C.
3792:            \end{eqnarray*}
3793:            Hence, when $\sigQ > \frac{1+ P + gP}{(2^C-1)(P+1)}$, then EAF is better than TAF. When $\sigQ = \frac{1+ P + gP}{(2^C-1)(P+1)}$
3794:            both expressions are equivalent. When $\sigQ < \frac{1+ P + gP}{(2^C-1)(P+1)}$, then first we have
3795:            \[
3796:                \log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right) >
3797:                    \log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)
3798:            \]
3799:            So, for TAF to be better than EAF we need:
3800:            \[
3801:                \frac{C}{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)
3802:                        > \log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)
3803:            \]
3804:            or
3805:            \[
3806:                \frac{C}{\log\left(1 + \frac{gP}{(1+P)\left(1 + \frac{1+ P + gP}{(2^C-1)}\right)} \right)}
3807:                        > \frac{\log\left(1 + \frac{1}{\sigQ} + \frac{gP}{\sigQ(P+1)}\right)}
3808:                            {\log\left(1 + \frac{gP}{(1 + \sigQ)(P+1)}  \right)}
3809:            \]
3810:            we have that TAF is better than EAF.
3811: 
3812:            \subsection{The General Gaussian Relay Channel}
3813:            The general form of the Gaussian relay channel is given by
3814:            \begin{eqnarray}
3815:                Y   & = & h X + h_0 X_1 + N,\\
3816:                Y_1 & = & h_1 X + N_1,
3817:            \end{eqnarray}
3818:            where $h$, $h_0$ and $h_1$ are fixed know channel gains, $N_1 \sim \mathcal{NC}(0,\sigR)$,
3819:            $N \sim \mathcal{NC}(0,\sigD)$, independent of $N_1$, $E\left\{|X^2|\right\} \le P$ and
3820:            $E\left\{|X_1^2|\right\} \le P_1$.
3821: 
3822:            \subsection{TAF for the Gaussian Case}
3823:            Consider the following assignment of the auxiliary random variable of theorem \ref{thm:CEG_EAF}:
3824:            \begin{equation}
3825:                p(\hY_1|Y_1,X_1) = \left\{
3826:                        \begin{array}{cl}
3827:                            q &, \hY_1 = Y_1 + \nQ\\
3828:                            1-q & ,\hY_1 = \nQ
3829:                        \end{array}
3830:                    \right.
3831:            \end{equation}
3832:            where $\nQ \sim \mathcal{NC}(0,\sigQ)$ is independent of all other variables.
3833:            Under this assignment the feasibility condition of \eqref{eqn:EAF_feasible}:
3834:            \begin{eqnarray*}
3835:                I(X_1;Y) & \ge & I(\hY_1;Y_1|X_1,Y) \\
3836:                         & = &   H(Y_1|X_1,Y) - H(Y_1|X_1,Y,\hY_1) \\
3837:                         & = &   H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y,N_Q) - q H(Y_1|X_1,Y,Y_1+\nQ)\\
3838:                         & = &   H(Y_1|X_1,Y) - (1-q)H(Y_1|X_1,Y) - q H(Y_1|X_1,Y,Y_1+\nQ)\\
3839:                         & = &   q (H(Y_1|X_1,Y) - H(Y_1|X_1,Y,Y_1+\nQ))\\
3840:                         & = &   q \Bigg(
3841:                                \log \left(\frac{(h_1^2\hP + \sigR ) (h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2}{h^2 \hP + \sigD }\right)\\
3842:                         &   & \qquad  - \log \left( \frac{\left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2\right)\sigQ}
3843:                                        {(h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ}\right)\Bigg)\\
3844:                         & = &   q   \log \left( \frac{(h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ}
3845:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)\\
3846:                         & = &   q   \log \left( 1 + \frac{ (h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 }
3847:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)\\
3848:                         & = &   q   \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }
3849:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)
3850:            \end{eqnarray*}
3851:            Hence
3852:            \begin{eqnarray*}
3853:                q \le \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }
3854:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)}.
3855:            \end{eqnarray*}
3856:            Combining with the constraint $q \le 1$ we obtain
3857:            \begin{equation}
3858:                q \le \left[\frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }
3859:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)}\right]^*.
3860:            \end{equation}
3861:            and the rate expression becomes
3862:            \begin{eqnarray*}
3863:                R & \le & I(X;Y_1,\hY_1|X_1)\\
3864:                    & = & I(X;Y|X_1) + I(X; \hY_1|X_1,Y)\\
3865:                    & = & I(X;Y|X_1) + H(X| X_1,Y) - H(X|X_1,Y,\hY_1)\\
3866:                    & = & I(X;Y|X_1) + H(X| X_1,Y) - (1-q) H(X|X_1,Y,\nQ) - q H(X|X_1,Y,Y_1)\\
3867:                    & = & I(X;Y|X_1) + H(X| X_1,Y) - (1-q) H(X|X_1,Y) - q H(X|X_1,Y,Y_1+\nQ)\\
3868:                    & = & I(X;Y|X_1) + q I(X;Y_1 + \nQ|X_1,Y)
3869:            \end{eqnarray*}
3870:            hence we want to maximize $q$.
3871: 
3872:            Assuming $X \sim \mathcal{NC}(0,\hP)$ and $X_1 \sim \mathcal{NC}(0,\hP_1)$, $X$ and $X_1$ independent,
3873:            we can evaluate the expression of corollary \ref{corr:single_relay_TAF} as follows:
3874:            \begin{eqnarray*}
3875:                R_{TF} & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) + q\log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)\\
3876:                       & = &  \min\left\{\log\left(1 + \frac{h^2\hP}{\sigD}\right) +\log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right),\right.\\
3877:                       &   & \left.    \log\left(1 + \frac{h^2\hP}{\sigD}\right) + \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)
3878:                                \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }
3879:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)} \right\}\\
3880:                       & = &  \min\left\{\log\left(1 + \frac{h^2\hP}{\sigD} + \frac{ h_1^2 \hP}{(\sigR + \sigQ)}\right),\right.\\
3881:                       &   & \left.    \log\left(1 + \frac{h^2\hP}{\sigD}\right) + \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)
3882:                                \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }
3883:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)} \right\}
3884:            \end{eqnarray*}
3885:            \[
3886:                R_{Gauss} = \log\left(1 + \frac{h^2\hP}{\sigD} +  \frac{h_1^2 \hP}{\left(\sigR+\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}\right)}\right)
3887:            \]
3888:            Let $h_0^2\hP_1$ be large enough such that $q = 1$.
3889:            Then if
3890:            \[
3891:                \sigQ < \frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}
3892:            \]
3893:            But, for $q = 1$ we need:
3894:            \begin{eqnarray*}
3895:                 \frac{h_0^2 \hP_1}{h^2\hP + \sigD} & > & \frac{\left(h^2 \hP + \sigD\right)\sigR + h_1^2 \hP \sigD }{\left(h^2 \hP + \sigD\right)\sigQ}\\
3896:                 \sigQ & > & \frac{\sigR (h^2 \hP + \sigD) + h_1^2 \hP \sigD }{h_0^2 \hP_1}
3897:            \end{eqnarray*}
3898:            So this does not work.
3899:            Therefore we conclude that if $\sigQ  >  \frac{\sigR (h^2 \hP + \sigD) + h_1^2 \hP \sigD }{h_0^2 \hP_1}$
3900:            then $R_{Gauss} > R_{TF}$, and if $\sigQ  =  \frac{\sigR (h^2 \hP + \sigD) + h_1^2 \hP \sigD }{h_0^2 \hP_1}$ then
3901:            $R_{Gauss} = R_{TF}$. Now consider
3902:            \[
3903:                    \sigQ < \frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}
3904:            \]
3905:            compare $R_{TF}$ with
3906:            \[
3907:              R_{Gauss} =   \log\left(1 + \frac{h^2\hP}{\sigD}\right)
3908:                            + \log \left( 1 + \frac{h_1^2 \hP\sigD}{(\sigR+\sigW)(h^2\hP + \sigD)}\right)
3909:            \]
3910:            so we need
3911:            \[
3912:                \log \left( 1 + \frac{h_1^2 \hP\sigD}{\left(\sigR+\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}\right)(h^2\hP + \sigD)}\right) <
3913:                  \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)
3914:                                \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{ \log \left( 1 + \frac{\sigR}{\sigQ} + \frac{ h_1^2 \hP \sigD   }
3915:                                 {\left(h^2 \hP + \sigD\right)\sigQ}\right)}
3916:            \]
3917: 
3918:            %\begin{eqnarray*}
3919:            %    R_{TF}  & = & I(X;Y|X_1) + \left[ \frac{I(X_1;Y)}{H(Y_1|X_1,Y)} \right]^* I(X;Y_1|X_1,Y)\\
3920:            %            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) + \min\left\{1,\frac{I(X_1;Y)}{H(Y_1|X_1,Y)}\right\}I(X;Y_1|X_1,Y)\\
3921:            %            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) +\min\left\{ \log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)- \log(\sigR),
3922:            %                \frac{I(X_1;Y)}{h(Y_1|X_1,Y)}(h(Y_1|X_1,Y) - h(Y_1|X,X_1,Y))\right\}\\
3923:            %            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right) +\min\left\{ \log \left(1 + \frac{h_1^2\hP \sigD}{\sigR(h^2 \hP + \sigD) }\right),
3924:            %                \log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)
3925:            %                    - \frac{\log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)}{\log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)} \log(\sigR)\right\}
3926:            %\end{eqnarray*}
3927: 
3928:            $\left(\begin{array}{c}
3929:                    h_1 X + N_1 \\ h X  + N \\ h_1 X + N_1 + \nQ
3930:                \end{array}\right) \sim \left(\left[
3931:                                                \begin{array}{c}
3932:                                                    0 \\ 0 \\ 0
3933:                                                \end{array}
3934:                                                \right], \left[
3935:                                                            \begin{array}{ccc}
3936:                                                                h_1^2\hP + \sigR & h_1 h \hP       & h_1^2\hP + \sigR\\
3937:                                                                h_1 h \hP        & h^2 \hP + \sigD & h h_1 \hP\\
3938:                                                                h_1^2\hP + \sigR & h_1 h \hP       & h_1^2 \hP + \sigR + \sigQ
3939:                                                            \end{array}
3940:                                                            \right] \right)$
3941:            \begin{eqnarray*}
3942:                h(h_1 X + N_1, h X  + N, h_1 X + N_1 + \nQ) & = & \log\Big((h_1^2\hP + \sigR)(h^2 \hP + \sigD)(h_1^2 \hP + \sigR + \sigQ)
3943:                            + 2(h_1 h \hP)^2(h_1^2\hP + \sigR) \\
3944:                            &  & - (h_1^2\hP + \sigR)^2(h^2 \hP + \sigD) - (h_1 h \hP)^2(h_1^2\hP + \sigR)\\
3945:                            &  & - (h_1 h \hP)^2(h_1^2 \hP + \sigR + \sigQ)\Big)\\
3946:                            & = & \log\left(\left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - (h_1 h \hP)^2\right)\sigQ\right)\\
3947:                h(h X  + N, h_1 X + N_1 + \nQ) & = & \log\left((h^2 \hP + \sigD)(h_1^2 \hP + \sigR + \sigQ) - (h h_1 \hP)^2\right)\\
3948:                                               & = & \log\left((h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ\right)\\
3949:                h(Y_1|X_1,Y,Y_1+\nQ) & = & h(h_1 X + N_1 | X_1, h X + h_0 X_1 + N, h_1 X + N_1 + \nQ)\\
3950:                                     & = & h(h_1 X + N_1 | X_1, h X  + N, h_1 X + N_1 + \nQ)\\
3951:                                     & = & h(h_1 X + N_1 | h X  + N, h_1 X + N_1 + \nQ)\\
3952:                                     & = & h(h_1 X + N_1, h X  + N, h_1 X + N_1 + \nQ) - h(h X  + N, h_1 X + N_1 + \nQ)\\
3953:                                     & = & \log \left( \frac{\left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - (h_1 h \hP)^2\right)\sigQ}
3954:                                        {(h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ}\right)
3955:            \end{eqnarray*}
3956:            \begin{eqnarray*}
3957:                I(X;Y_1 + \nQ|X_1,Y)    & = & h(Y_1 + \nQ|X_1,Y) - h(Y_1 + \nQ|X,X_1,Y)\\
3958:                                        & = & \log\left(1 + \frac{\sigD h_1^2 \hP}{(h^2\hP+\sigD)(\sigR + \sigQ)}\right)\\
3959:                    h(Y_1 + \nQ|X_1,Y)  & = & h(h_1 X + N_1 + \nQ|X_1,h X + h_0 X_1 + N)\\
3960:                                        & = & h(h_1 X + N_1 + \nQ|X_1,h X  + N)\\
3961:                                        & = & h(h_1 X + N_1 + \nQ|h X  + N)\\
3962:                                        & = & h(h_1 X + N_1 + \nQ,h X  + N) - h(h X  + N)\\
3963:                                        & = & \log\left((h^2 \hP + \sigD)(h_1^2 \hP + \sigR)  - (h h_1 \hP)^2 + (h^2 \hP + \sigD)\sigQ\right)-\log(h^2\hP+\sigD)\\
3964:                                        & = & \log\left( \frac{\sigD h_1^2 \hP    + (h^2 \hP + \sigD)(\sigQ + \sigR)}{h^2\hP+\sigD}\right)\\
3965:                                        & = & \log\left( \sigQ + \sigR + \frac{\sigD h_1^2 \hP}{h^2\hP+\sigD}\right)\\
3966:                h(Y_1 + \nQ|X,X_1,Y)    & = & h(h_1 X + N_1 + \nQ|X_1,h X + h_0 X_1 + N,X)\\
3967:                                        & = & h( N_1 + \nQ|X_1,  N,X)\\
3968:                                        & = & h( N_1 + \nQ)\\
3969:                                        & = & \log(\sigR + \sigQ)
3970:            \end{eqnarray*}
3971: 
3972:            \begin{eqnarray*}
3973:                h(Y|X_1)      & = & h(h X + h_0 X_1 + N|X_1)\\
3974:                            & = & h(h X + N|X_1)\\
3975:                            & = & h(h X + N)\\
3976:                            & = & \log(h^2\hP + \sigD)\\
3977:                h(Y|X_1,X)  & = & h(h X + h_0 X_1 + N|X_1,X)\\
3978:                            & = & h(N|X_1,X)\\
3979:                            & = & h(N)\\
3980:                            & = & \log(\sigD)\\
3981:                I(X;Y|X_1)  & = & h(Y|X_1) - h(Y|X_1,X)\\
3982:                            & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right)\\
3983:                I(X_1;Y)    & = & h(Y) - h(Y|X_1)\\
3984:                            & = & \log(h^2 \hP + h_0^2 \hP_1 + \sigD) - \log(h^2\hP + \sigD)\\
3985:                            & = & \log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right)\\
3986:            \end{eqnarray*}
3987:            \begin{eqnarray*}
3988:                h(Y_1|X_1,Y)& = & h(h_1 X + N_1 | X_1, h X + h_0 X_1 + N)\\
3989:                            & = & h(h_1 X + N_1 | X_1, h X + N)\\
3990:                            & = & h(h_1 X + N_1 | h X + N)\\
3991:                            & = & h(h_1 X + N_1 , h X + N) - h( h X + N)\\
3992:                            & = & \log \left((h_1^2\hP + \sigR)(h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2\right) - \log\left(h^2 \hP + \sigD \right)\\
3993:                            & = & \log \left(\frac{h_1^2\hP(h^2 \hP + \sigD) + \sigR(h^2 \hP + \sigD) - \left(h_1 h \hP\right)^2}{h^2 \hP + \sigD }\right)\\
3994:                            & = & \log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)\\
3995:             I(X;Y_1|X_1,Y) & = & h(Y_1|X_1,Y)- h(Y_1|X,X_1,Y)\\
3996:                            & = & h(Y_1|X_1,Y)- h(N_1|X,X_1, N)\\
3997:                            & = & h(Y_1|X_1,Y)- h(N_1)\\
3998:                            & = & \log \left(\sigR + \frac{h_1^2\hP \sigD}{h^2 \hP + \sigD }\right)- \log(\sigR).
3999:            \end{eqnarray*}
4000: 
4001:            $\left(\begin{array}{c} h_1 X + N_1 \\ h X + N \end{array} \right)  \sim
4002:            \mathcal{NC}\left(\left[\begin{array}{c} 0 \\ 0 \end{array} \right],\left[\begin{array}{cc} h_1^2\hP + \sigR & h_1 h \hP \\ h_1 h \hP & h^2 \hP + \sigD\end{array} \right] \right)$
4003: 
4004:            The standard application of EAF for the Gaussian channel uses the assignment
4005:            \[
4006:                \hY_1 = Y_1 + W = h_1 X + N_1 + W, \qquad W \sim \mathcal{NC}(0,\sigW).
4007:            \]
4008:            The rate expression is given by:
4009:            \begin{eqnarray}
4010:                \label{eqn:R_gauss_exp1}
4011:                R   & = & I(X;Y,\hY_1|X_1)\nonumber\\
4012:                    & = & I(X;Y|X_1) + I(X;\hY_1|X_1,Y)\nonumber\\
4013:                    & = & I(X;Y|X_1) + h(\hY_1|X_1,Y) - h(\hY_1|X,X_1,Y)\nonumber\\
4014:                    & = & \log\left(1 + \frac{h^2\hP}{\sigD}\right)
4015:                            + \log \left( 1 + \frac{h_1^2 \hP\sigD}{(\sigR+\sigW)(h^2\hP + \sigD)}\right)\nonumber\\
4016:                    & = & \log\left(1 + \frac{h^2\hP}{\sigD} +  \frac{h_1^2 \hP}{(\sigR+\sigW)}\right)
4017:            \end{eqnarray}
4018:            subject to
4019:            \begin{eqnarray*}
4020:                I(X_1;Y) = \log\left( 1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \right) & \ge & I(\hY_1;Y_1|X_1,Y)\\
4021:                    & = & h(\hY_1|X_1,Y) - h(\hY_1|Y_1,X_1,Y)\\
4022:                    & = & \log \left( \sigR + \sigW + \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD} \right) - \log(\sigW)\\
4023:                    & = & \log \left( 1 + \frac{\sigR}{\sigW} + \frac{h_1^2 \hP\sigD}{\sigW(h^2\hP + \sigD)} \right)
4024:            \end{eqnarray*}
4025:            \begin{eqnarray}
4026:                \label{eqn:sigW_cond}
4027:                1 + \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \ge  1 + \frac{\sigR}{\sigW} + \frac{h_1^2 \hP\sigD}{\sigW(h^2\hP + \sigD)}\nonumber\\
4028:                \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \ge  \frac{1}{\sigW} \left(\sigR+ \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD}\right)\nonumber\\
4029:                \frac{h_0^2 \hP_1}{h^2\hP + \sigD} \ge  \frac{1}{\sigW} \left(\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h^2\hP + \sigD}\right)\nonumber\\
4030:                \sigW \ge   \frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}.
4031:            \end{eqnarray}
4032:            Maximizing the rate implies making $\sigW$ as small as possible, i.e. use equality in \eqref{eqn:sigW_cond}. Plugging this back
4033:            into \eqref{eqn:R_gauss_exp1} yields
4034:            \[
4035:                R = \log\left(1 + \frac{h^2\hP}{\sigD} +  \frac{h_1^2 \hP}{\left(\sigR+\frac{\sigR(h^2\hP + \sigD) + h_1^2 \hP\sigD}{h_0^2 \hP_1}\right)}\right)
4036:            \]
4037: 
4038: 
4039:            \begin{eqnarray*}
4040:                h(\hY_1|X,X_1,Y)    & = & h(h_1 X + N_1 + W|X,X_1,h X + h_0 X_1 + N)\\
4041:                                    & = & h(N_1 + W|X,X_1,N)\\
4042:                                    & = & h(N_1 + W)\\
4043:                                    & = & \log(\sigR+\sigW)\\
4044:                h(\hY_1|X_1,Y)      & = & h(h_1 X + N_1 + W|X_1,h X + h_0 X_1 + N)\\
4045:                                    & = & h(h_1 X + N_1 + W|X_1,h X + N)\\
4046:                                    & = & h(h_1 X + N_1 + W|h X + N)\\
4047:                                    & = & h(h_1 X + N_1 + W,h X + N) - h(h X + N)\\
4048:                                    & = & \log \left( (h_1^2 \hP + \sigR + \sigW)(h^2\hP + \sigD)- \left(h h_1 \hP\right)^2\right)
4049:                                             - \log(h^2\hP + \sigD) \\
4050:                                    & = & \log \left( h_1^2 \hP\sigD + (\sigR + \sigW)(h^2\hP + \sigD) \right)
4051:                                             - \log(h^2\hP + \sigD) \\
4052:                                    & = & \log \left( \sigR + \sigW + \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD} \right)\\
4053:                I(X;\hY_1|X_1,Y)    & = & I(X;\hY_1|X_1,Y)\\
4054:                                    & = & \log \left( \sigR + \sigW + \frac{h_1^2 \hP\sigD}{h^2\hP + \sigD} \right) - \log(\sigR+\sigW)\\
4055:                                    & = & \log \left( 1 + \frac{h_1^2 \hP\sigD}{(\sigR+\sigW)(h^2\hP + \sigD)} \right)\\
4056:                h(\hY_1|Y_1,X_1,Y)  & = & h(h_1 X + N_1 + W|h_1 X + N_1,X_1,h X + h_0 X_1 + N)\\
4057:                                    & = & h(h_1 X + N_1 + W|h_1 X + N_1,X_1,h X  + N)\\
4058:                                    & = & h(W|h_1 X + N_1,X_1,h X  + N)\\
4059:                                    & = & h(W)\\
4060:                                    & = & \log(\sigW)
4061:            \end{eqnarray*}
4062:            $\left(\begin{array}{c} h_1 X + N_1 + W \\ h X + N \end{array} \right) \sim
4063:            \mathcal{NC}\left(\left[\begin{array}{c} 0 \\ 0 \end{array} \right],
4064:                \left[\begin{array}{cc} h_1^2 P + \sigR + \sigW & h h_1 P \\ h h_1 P & h^2P + \sigD \end{array} \right] \right)$
4065: 
4066: \end{comment}
4067: 
4068: 
4069: 
4070: 
4071: 
4072: 
4073: 
4074: 
4075: 
4076: 
4077: 
4078: 
4079: 
4080: 
4081: \begin{comment}
4082: 
4083:            \setcounter{equation}{0}
4084: 
4085:            \section{Proof of Theorem \ref{thm:achieve-three-steps}}
4086:            \label{sec:three-steps-proof}
4087:                \subsection{Rate Bounds for the Three-Step Conference}
4088:                \label{sec:three-steps}
4089:                \subsubsection{Codebooks Construction and Conference Steps}
4090:                Fix $p_X(x)$ on $\mX$, $\eps > 0$, $\delta > 0$ and the block length $n$. Let $0 < \alpha \le 1$ and
4091:                \begin{equation}
4092:                \label{eqn:def_m_coop_BC}
4093:                    m = \min \left\{n,  \left\lfloor\frac{n C_{21}^a }{H(Y_2|Y_1) + \delta}\right\rfloor\right\}.
4094:                \end{equation}
4095:                Let $K = \frac{m}{n}$,
4096:                and $K_{\eps} \triangleq \!\left[\frac{C_{21}^a}{H(Y_2|Y_1)+\delta}\right]^*\! - \eps$.
4097:                Increase $n$  s.t. $K > K_{\eps}$. Note that $K \le 1$.
4098:                    \paragraph{Codebook Generation and Encoding at the Transmitter}
4099:                        The transmitter generates $2^{nR}$ i.i.d. codewords $\xvec$ of length $n$ such that
4100:                        $p(\xvec(w)) = \prod_{l=1}^n p_X(x_l(w))$,
4101:                        $w \in \mW = \left\{1,2,...,2^{nR}\right\}$.
4102:                        For transmitting the message $w_i$ at time $i$, the transmitter outputs the corresponding channel
4103:                        codeword $\xvec(w_i)$.
4104:                    \paragraph{Relay Codebook Generation at $\Rgood$}
4105:                        First define
4106:                        \[
4107:                            \btyp(Y_1,\yvec_2^m) = \bigcup_{\yvec_1^m \in A_{\delta}^{(m)}(Y_1|\yvec_2^m)} \ttyp(Y_1,\yvec_1^m).
4108:                        \]
4109:                        Now, for each $\yvec_2^m \in \typm(Y_2)$, $\Rgood$ partitions the set $\btyp(Y_1,\yvec_2^m)$
4110:                        into $2^{n C_{12}}$ subsets
4111:                        $S_k(\yvec_2^m)$, $k \in \big\{1,2,...,2^{nC_{12}}\big\}$,
4112:                        in a uniform and independent manner.
4113:                    \paragraph{Relay Codebook Generation at $\Rbad$}
4114:                        \begin{itemize}
4115:                            \item For the first conference step, $\Rbad$ simply enumerates all the sequences in the
4116:                                set $\typm(Y_2)$ with an index $l \in \left\{1,2,...,2^{n R_{21}^a}\right\}$. This can be done
4117:                                when
4118:                                \[
4119:                                 ||\typm(Y_2)|| \le 2^{nR_{21}^a}.
4120:                                \]
4121: 
4122:                                Now $\Rbad$ partitions the set of $\typm(Y_2)$ into $2^{nC_{21}^a}$ subsets
4123:                                in a uniform and independent manner. Denote these subsets with $B_k$, $k \in
4124:                                \left\{1,2,...,2^{nC_{21}^a}\right\}$.
4125:                            \item For the third conference step, $\Rbad$ partitions the message set $\mW$ into $2^{n C_{21}^b}$
4126:                            subsets, $B_{k'}$, $k' \in \big\{1,2,...,2^{n C_{21}^b} \big\}$,
4127:                            in a uniform and independent manner.
4128: 
4129:                        \end{itemize}
4130:                    \paragraph{Encoding at $\Rbad$ at the First Conference Step (time $i+1$)}
4131:                        Upon receiving the
4132:                        channel output $\yvec_2(i)$, $\Rbad$ considers its first $m$ symbols denoted $\yvec_2^m(i)$. $\Rbad$
4133:                        then finds the index $l_i$ of the partition $B_{l_i}$ that contains the received $\yvec_2^m(i)$
4134:                        in $\typm(Y_2)$ and sends it to $\Rgood$ through the conference link.
4135:                    \paragraph{Encoding at $\Rgood$ at the Second Conference Step (time $i+2$)}
4136:                      From $l_i$ $\Rgood$ knows the set $B_{l_i}$ into which $\yvec_2(i)$ belongs. $\Rgood$ now
4137:                      looks for a sequence $\yvec_2^m$ such that $\yvec_2^m \in \typm(Y_2|\yvec_1^m(i)) \bigcap B_{l_i}.$
4138:                        If there is none or there is more than one, an error is declared.
4139:                       Now $\Rgood$ knows $\yvec_2^m(i)$, therefore $\Rgood$
4140:                        can construct the set $A_{\delta}^{(m)}(Y_1|\yvec_2^m(i))$ and thus $\btyp(Y_1,\yvec_2^m(i))$.
4141:            %            knows $\btyp(Y_1,\yvec_2^m(i))$.
4142:                        $\Rgood$ then looks for the partition of $\btyp(Y_1,\yvec_2^m(i))$
4143:                         into which $\yvec_1(i)$ belongs, denoted $S_{k(i)}$, and
4144:                        transmits its index $k(i)$ to $\Rbad$.
4145:                    \paragraph{Decoding at $\Rbad$ (time $i+2$)}
4146:                    \label{sec:decoding-at-Rx2}
4147:                        Note that $\Rbad$ knows $\btyp(Y_1,\yvec_2^m(i))$, so both $\Rgood$ and $\Rbad$ can refer to the same set.
4148:                        Now, $\Rbad$ generates the set $\mL_2(i) = \left\{ w \in \mW : (\xvec(w), \yvec_2(i)) \in \typ(X,Y_2)\right\}$.
4149:                        Then, $\Rbad$ looks for a unique message $\hw \in \mL_2(i)$, such that
4150:                        $\left(\xvec(\hw),\yvec_1,\yvec_2(i)\right) \in \typ(X,Y_1,Y_2)$ for at least one $\yvec_1 \in S_{k(i)}$.
4151:                    \paragraph{Encoding at $\Rbad$ at the Third Conference Step (time $i+3$)}
4152:                        $\Rbad$ looks for the partition $B_{k'(i)}$ of $\mW$ into which the decoded message $\hw$ belongs.
4153:                        $\Rbad$ then transmits $k'(i)$ to $\Rgood$ through the conference link.
4154:                        % through the conference link.
4155:                    \paragraph{Decoding at $\Rgood$ (time $i+3$)}
4156:                    \label{sec:three-steps-decoder-1}
4157:                        $\Rgood$ generates the set\\
4158:                        $\tmL_1(i) = \Big\{ w \in \mW : (\xvec^m(w), \yvec_1^m(i)) \in \typm(X,Y_1), \;
4159:                            (\xvec(w), \yvec_1(i)) \in \typ(X,Y_1)\Big\}$. Then $\Rgood$ looks
4160:                        for a unique message $\hw \in \tmL_1(i)$ such that $\hw \in B_{k'(i)}$ and
4161:                        $\left(\xvec^m(\hw),\yvec_1^m(i),\yvec_2^m(i)\right) \in \typm(X,Y_1,Y_2)$.
4162: 
4163:                \subsubsection{Error Events}
4164:                \label{sec:three-steps-error-events}
4165:                    For the scheme defined above we have the following error events, for decoding the message transmitted at
4166:                    time $i$:
4167:                    \paragraph{Joint Typicality Decoding Fails}
4168:                        $E_0 = E_0' \bigcup E_0''$,\\
4169:                        $E_0' = \left\{\left(\xvec(w_i),\yvec_1(i),\yvec_2(i) \right) \notin \typ(X,Y_1,Y_2) \right\}$,\\
4170:                        $E_0''= \left\{\left(\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)\right) \notin \typm(X,Y_1,Y_2) \right\}$.
4171:                    \paragraph{Encoding at $\Rbad$ at the First Step Fails} $\phantom{x}$\\
4172:                        $E_1' = \left\{\yvec_2^m(i) \notin \typm(Y_2) \right\}$.
4173:                    \paragraph{Encoding at $\Rgood$ at the Second Step Fails}$\phantom{xxxxx}$\\
4174:            %            $E_1'' = E_{1,a}'' \bigcup E_{1,b}''$,\\
4175:                        $E_{1,a}'' = \left\{ \yvec_2^m(i) \notin \typm(Y_2|\yvec_1^m(i)) \bigcap B_{l_i} \right\}$,\\
4176:                        $E_{1,b}'' = \left\{ \exists \yvec_2^m \ne \yvec_2^m(i), \yvec_2^m \in \typm(Y_2|\yvec_1^m(i)) \bigcap B_{l_i}\right\}$,\\
4177:                        $E_{1,c}'' = \left\{ \yvec_1^m(i) \notin \typm(Y_1|\yvec_2^m(i))\right\}$,\\
4178:                        $E_{1,d}'' = \left\{ \yvec_1(i)  \notin \btyp(Y_1,\yvec_2^m(i)) \right\}$.\\
4179:            %            $E_1 = E_1' \bigcup E_{1,a}'' \bigcup E_{1,b}''$.
4180:                        Let $E_1 = E_1' \bigcup E_{1,a}'' \bigcup E_{1,b}'' \bigcup E_{1,c}'' \bigcup E_{1,d}''$.
4181:                    \paragraph{Decoding at $\Rbad$ Fails} $E_2 = E_2' \bigcup E_2'' \bigcup E_2'''$, \\
4182:                        $E_2'  = \Big\{w_i \notin \mL_2(i) \Big\}$,\\
4183:                        $E_2'' =\left\{\nexists \yvec_1 \mbox{$\in$} S_{k(i)} \mbox{ s.t. } (\xvec(w_i),\yvec_1, \yvec_2(i))
4184:                            \mbox{$\in$} \typ(X,Y_1,Y_2)\right\}$,\\
4185:                        $E_2''' = \Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},
4186:                             (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)\Big\}$.
4187:                    \paragraph{Decoding at $\Rgood$ Fails} $E_3 = E_3' \bigcup E_3'' \bigcup E_3'''$,\\
4188:                        $E_3' = \Big\{w_i \notin B_{k'(i)} \bigcap \tmL_1(i) \Big\}$,\\
4189:                        $E_3'' = \left\{\left(\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)\right) \notin \typm(X,Y_1,Y_2) \right\}$,\\
4190:                        $E_3''' = \Big\{\exists w \ne w_i, w \in  B_{k'(i)}, w \in \tmL_1(i),
4191:                            (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i)) \in \typm(X,Y_1,Y_2)\Big\}$.
4192: 
4193:                \subsubsection{Bounding the Probability of Error Events}
4194:                \label{sec:three-steps-error-bounds}
4195:                    The average probability of error can be bounded by
4196:                    \begin{eqnarray*}
4197:                        \Pe & \le & \Pr\left(\bigcup_{l=0}^3 E_l\right)\\
4198:                            &  =  & \sum_{l=0}^3 \Pr\left(E_l  \bigcap_{k=0}^{l-1} E_k^c\right).
4199:                    \end{eqnarray*}
4200:                    \paragraph{Probability of $E_0$}
4201:                        By the AEP (see \cite[ch. 3]{cover-thomas:it-book}), taking $n$ large enough we can make $\Pr(E_0') \le \eps$
4202:                        and $\Pr(E_0'') \le \delta$ for any $\eps$, $\delta$ positive, arbitrarily small, since from equation
4203:                        \eqref{eqn:def_m_coop_BC} $m$ is increasing with $n$: $m \ge K_{\eps}n$.
4204:                        Thus, by the union bound $\Pr(E_0) \le \Pr(E_0') + \Pr(E_0'') \le \eps + \delta$ for $n$ large enough.
4205: 
4206:                    \paragraph{Probability of $E_1 \bigcap E_0^c$}
4207:                        Note that $E_0^c = E_0'^c \bigcap E_0''^c$. Now, from the definition
4208:                        of jointly typical sets (see \cite[ch. 14.2]{cover-thomas:it-book}) it follows that
4209:                        $\Pr\left(E_1' \bigcap E_0''^c\right) = \Pr\left(E_{1,a}'' \bigcap E_0''^c\right) =
4210:                                \Pr\left(E_{1,c}'' \bigcap E_0''^c\right) = 0$, and
4211:                        $\Pr\left(E_{1,d}''\bigcap E_{1,c}''^c \bigcap E_0'^c \bigcap E_0''^c\right) = 0$.
4212:                        The last equality holds since $E_o'^c \bigcap E_0'^c \bigcap E_0''^c$ imply that
4213:                        $\yvec_1(i) \in \ttyp(Y_1,\yvec_1^m(i))$, and $E_{1,c}''^c$ implies correct decoding of
4214:                        $\yvec_2^m(i)$ at $\Rgood$.
4215:                        Following similar arguments to \cite[theorem 6]{cover-thomas:it-book} we conclude that
4216:                        taking $n$ large enough we can make
4217:                        $\Pr(E_{1,c}'' \bigcap E_0''^c) \le \eps$, as long as
4218:                        \begin{eqnarray}
4219:                             K_{\eps}  (H(Y_2|Y_1)   + 2\eps) & < & C_{12}^a\nonumber\\
4220:                            \label{eqn:K_eps_condition}
4221:                               \Rightarrow K_{\eps}   & < & \frac{C_{12}^a}{H(Y_2|Y_1)   + 2\eps}.
4222:                        \end{eqnarray}
4223:            %            Therefore by the union bound $\Pr\left(E_1 \bigcap E_0^c\right) = 0$.
4224:                        Note that \eqref{eqn:K_eps_condition} is satisfied when $\eps > \frac{\delta }{2}$.
4225:                        Hence, taking $n$ large enough we can make $\Pr\left(E_1 \bigcap E_0^c\right) \le \eps$.
4226: 
4227:                    \paragraph{Probability of $E_2 \bigcap E_1^c \bigcap E_0^c$}
4228:                        $E_0^c$ implies that $(\xvec(w_i), \yvec_2(i)) \in \typ(X,Y_2)$, hence
4229:                        $\Pr\left(E_2' \bigcap E_0^c\right) = 0$.
4230:                        Next, $E_1^c \bigcap E_0^c$ implies that $\yvec_1(i) \in \btyp\left(Y_1,\yvec_2^m(i)\right)$, and hence $\yvec_1(i)$ is in some
4231:                        partition $S_{k(i)}$ of $\btyp\left(Y_1,\yvec_2^m(i)\right)$.
4232:                        %Since this is the partition transmitted to $\Rbad$ this implies that $\Pr\left(E_2'' \bigcap E_1^c \bigcap E_0^c\right) = 0$. We now calculate
4233:                        This implies that $\Pr\left(E_2'' \bigcap E_1^c \bigcap E_0^c\right) = 0$.
4234:            %            We now calculate $\Pr\left(E_2''' \bigcap E_1^c \bigcap E_0^c\right)$:
4235:                        Lastly, consider
4236:                        {\setlength\arraycolsep{0pt}
4237:                        \begin{eqnarray*}
4238:                        &  & \Pr\left(E_2''' \bigcap E_1^c \bigcap E_0^c\right)= \\
4239:            %                    = \Pr\left(E_{2,A}'''\right) + \Pr\left(E_{2,B}'''\right),\\
4240:                        &  & \qquad \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},
4241:                              (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)
4242:                                \Big\}\bigcap E_1^c \bigcap E_0^c\Big)\\
4243:                        &  &\qquad = \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, (\xvec(w), \yvec_1(i), \yvec_2(i)) \in \typ(X,Y_1,Y_2)
4244:                                 \Big\}\bigcap E_1^c \bigcap E_0^c\Big)\\
4245:                        &  & \qquad \quad  +\Pr\Big(\Big\{ \exists \yvec_1 \in S_{k(i)}, \yvec_1 \ne \yvec_1(i),\exists w \in \mL_2(i),
4246:                              w \ne w_i, (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big)\\
4247:                        &  & \triangleq \Pr\left(E_{2,A}'''\right) + \Pr\left(E_{2,B}'''\right).
4248:                        \end{eqnarray*}}
4249:                        First we note that from \cite[equation (36)]{CoverG:79} we have that
4250:                        \begin{equation}
4251:                        \label{equ:size_L2}
4252:                           E_{\yvec_2}\big\{||\mL_2(i)||\big\} \le 1 + 2^{n(R- I(X;Y_2) + 3\eps)}.
4253:                        \end{equation}
4254:            %             where $||\mA||$ denotes the cardinality of the set $\mA$.
4255:                         Now, consider $\Pr\left(E_{2,A}'''\right)$:
4256:            %            {\setlength\arraycolsep{0pt}
4257:            %            \begin{eqnarray*}
4258:            %            &  & \Pr\left(E_{2,A}'''\right)
4259:            %%            &  &\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i,  \\
4260:            %%            &  &  \phantom{xx}  (\xvec(w), \yvec_1(i), \yvec_2(i)) \in \typ(X,Y_1,Y_2)
4261:            %%                    \Big\}\bigcap E_1^c \bigcap E_0^c)\Big)\\
4262:            %%            &  & = \sum_{\mL_2(i)}\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, (\xvec(w), \yvec_1(i), \yvec_2(i))  \\
4263:            %%            &  &  \phantom{xxxx}  \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big| \mL_2(i)\Big)\Pr\big(\mL_2(i)\big)\\
4264:            %%             &  &\stackrel{(a)}{=} \sum_{\yvec_2(i)}
4265:            %%                    \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, (\xvec(w), \yvec_1(i), \yvec_2(i))  \\
4266:            %%            &  &  \phantom{xxxx}  \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big|
4267:            %%                                  \yvec_2(i)\Big)\Pr\big(\yvec_2(i)\big)\\
4268:            %             \stackrel{(a)}{=}  E_{\yvec_2}\bigg\{\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i,    \\
4269:            %            &  &    (\xvec(w), \yvec_1(i), \yvec_2(i)) \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big| \yvec_2(i)\Big)\bigg\}
4270:            %            \end{eqnarray*}
4271:            %%            \begin{eqnarray*}
4272:            %%%            &  & \stackrel{(b)}{\le} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}
4273:            %%%                                 \Pr\Big(\Big\{(\xvec(w), \yvec_1(i), \yvec_2(i))  \\
4274:            %%%            &  &  \phantom{xxxxxxxxx}  \in \typ(X,Y_1,Y_2)\Big\}  \bigcap E_1^c \bigcap E_0^c\Big| \yvec_2(i)\Big)\Bigg\}\\
4275:            %%%            &  & = E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}
4276:            %%%                                 \sum_{ \substack{\yvec_1(i) \in \\ \typ(Y_1|\xvec(w), \yvec_2(i))}}
4277:            %%%                                     \Pr (\yvec_1(i)|\xvec(w), \yvec_2(i)) \Bigg\}\\
4278:            %%            &  & \stackrel{(b)}{\le} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}
4279:            %%                                 \sum_{ \substack{\yvec_1(i) \in \\ \typ(Y_1|\xvec(w), \yvec_2(i))}}
4280:            %%                                     \Pr (\yvec_1(i)|\xvec(w), \yvec_2(i)) \Bigg\}\\
4281:            %%            &  & \stackrel{(c)}{=} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}}
4282:            %%                                 \sum_{ \substack{\yvec_1(i) \in \\ \typ(Y_1|\xvec(w), \yvec_2(i))}}
4283:            %%                                     \Pr (\yvec_1(i)| \yvec_2(i))  \Bigg\}\\
4284:            %%            &  & \stackrel{(d)}{\le} E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \mL_2(i),\\ w \ne w_i}} 2^{n (H(Y_1|X,Y_2) + 2\eps)}
4285:            %%                2^{-n(H(Y_1|Y_2) - 2\eps)} \Bigg\}\\
4286:            %%%            &  &  \le E_{\yvec_2}\Big\{ ||\mL_2(i)|| 2^{-n (I(X;Y_1|Y_2) - 4\eps)} \Big\}\\
4287:            %%%            &  &   = 2^{-n (I(X;Y_1|Y_2) - 4\eps)} E_{\yvec_2}\Big\{ ||\mL_2(i)|| \Big\}\\
4288:            %%            &  &   \le 2^{-n (I(X;Y_1|Y_2) - 4\eps)} E_{\yvec_2}\Big\{ ||\mL_2(i)|| \Big\}\\
4289:            %%            &  &  \stackrel{(e)}{\le} 2^{-n (I(X;Y_1|Y_2) - 4\eps)} \left( 1 + 2^{n(R - I(X;Y_2) + 3\eps)}\right)
4290:            %%%            &  &  = 2^{-n (I(X;Y_1|Y_2) - 4\eps)}  + 2^{n(R - I(X;Y_2)  -I(X;Y_1|Y_2) + 7\eps)}\\
4291:            %%%%            &  &  = 2^{-n (I(X;Y_1|Y_2) - 4\eps)}  + 2^{n(R - I(X;Y_1,Y_2)  + 7\eps)},
4292:            %%            \end{eqnarray*}
4293:            %            }
4294:            %            where (a) is because $\mL_2(i)$ is a deterministic function of $\yvec_2(i)$ (we denote
4295:            %            $\Pr(\yvec_2(i)) \triangleq \Pr(\yvec_2(i)|w_i \mbox{ transmitted})$).
4296:            %            %(b) follows from the
4297:            %            %union bound, (c) comes from the independence of $\yvec_1(i)$ and $\xvec(w)$, $w \ne w_i$, (d)
4298:            %            %follows from the properties of conditionally typical sets (see \cite[ch. 14.2]{cover-thomas:it-book}), and
4299:            %            %(e) follows the same lines as in \cite[equation (36)]{CoverG:79}.
4300:            %            %This can be made arbitrarily small as long as
4301:                        by the point-to-point channel capacity theorem (see \cite[theorem 8.7.1]{cover-thomas:it-book})
4302:                        $\Pr(E_{2,A}''')$ can be made arbitrarily small by taking $n$ large enough as long as
4303:                        \begin{equation}
4304:                        \label{equ:EA_rate_bound}
4305:            %                $R < I(X;Y_1,Y_2) - 7\eps$.
4306:                            R < I(X;Y_1,Y_2)-2\eps.
4307:                        \end{equation}
4308:                        Next, consider $\Pr\left(E_{2,B}'''\right)$:
4309:                        {\setlength\arraycolsep{0mm}
4310:                        \begin{eqnarray}
4311:            %            &  & \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},
4312:            %                    \yvec_1 \ne \yvec_1(i), \nonumber\\
4313:            %            &  &  \phantom{xx}  (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)
4314:            %                    \Big\}\bigcap E_1^c \bigcap E_0^c\Big) \nonumber\\
4315:                        &  & \Pr\left(E_{2,B}'''\right)
4316:                           = E_{\yvec_1,\yvec_2} \bigg\{\Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i,
4317:                            \exists \yvec_1 \in S_{k(i)}, \yvec_1 \ne \yvec_1(i),  \nonumber\\
4318:                        &  &       \phantom{xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx}  (\xvec(w), \yvec_1, \yvec_2(i))\in  \typ(X,Y_1,Y_2) \Big\}\bigcap E_1^c \bigcap E_0^c\Big| \yvec_1(i), \yvec_2(i) \Big)\bigg\}\nonumber\\
4319:            %%%%%%%%%%%%%%%%%%%%%
4320:             %           &  & \le E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}
4321:             %                       \Pr\Big(\Big\{ \exists \yvec_1 \in S_{k(i)}, \yvec_1 \ne \yvec_1(i),\nonumber\\
4322:             %           &  &       \phantom{xxxxx}   (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2) \Big\}\nonumber\\
4323:             %           &  &       \phantom{xxxxxxxxx} \bigcap E_1^c \bigcap E_0^c\Big| \yvec_1(i), \yvec_2(i) \Big)\Bigg\}\nonumber\\
4324:                        &  & \stackrel{(a)}{\le} E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}
4325:                                    \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}}
4326:                                    \Pr\Big(\Big\{ (\xvec(w), \yvec_1, \yvec_2(i))   \in \typ(X,Y_1,Y_2) \Big\}
4327:                                \bigcap E_1^c \bigcap E_0^c\Big| \yvec_1(i), \yvec_2(i) \Big)\Bigg\}\nonumber\\
4328:            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4329:            %            &  & = E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}
4330:                        &  & = E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}
4331:                                    \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}}
4332:                 \sum_{\substack{\tilde{\yvec}_1 \in \typ(Y_1| \xvec(w),\yvec_2(i))}}
4333:                                            \Pr\big(\tilde{\yvec}_1|\xvec(w), \yvec_2(i)  \big)\Bigg\}\nonumber\\
4334:            %            &  & \stackrel{(a)}{=} E_{\yvec_1,\yvec_2} \Bigg\{\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}
4335:            %                        \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}}
4336:            %                    \sum_{\substack{\yvec_1 \in \\ \typ(Y_1|\xvec(w),\yvec_2(i))}} \Pr\big(\yvec_1 \big)\Bigg\}\nonumber\\
4337:                        &  & \stackrel{(b)}{\le}\!  E_{\yvec_1,\yvec_2} \Bigg\{\!\sum_{\substack{ w \in \mL_2(i),\\ w \ne w_i}}
4338:                                    \sum_{\substack{\yvec_1 \in S_{k(i)},\\ \yvec_1 \ne \yvec_1(i)}} 2^{n(H(Y_1|X,Y_2) + 2\eps)}
4339:            %            &  & \phantom{xxxxxxxxxxxxxxxxxxxxx}
4340:                        2^{-n(H(Y_1) - \eps)}\!\Bigg\}\nonumber\\
4341:            %            &  & \le  2^{-n(I(X,Y_2;Y_1) - 3\eps)} E_{\yvec_1,\yvec_2} \Big\{ ||\mL_2(i)|| \cdot ||S_{k(i)}||  \Big\}\nonumber\\
4342:            %            &  &  = 2^{-n(I(X,Y_2;Y_1) - 3\eps)} E_{\yvec_2}
4343:            %                    \bigg\{ E_{\yvec_1|\yvec_2} \Big\{ ||\mL_2(i)|| \cdot ||S_{k(i)}|| \Big\} \bigg\}\nonumber\\
4344:                        &  &  \le 2^{-n(I(X,Y_2;Y_1) - 3\eps)} E_{\yvec_2}
4345:                                \bigg\{ E_{\yvec_1|\yvec_2} \Big\{ ||\mL_2(i)|| \cdot ||S_{k(i)}|| \Big\} \bigg\}\nonumber\\
4346:                        \label{eqn:Rx2_rate_bound_2}
4347:                        &  &  = 2^{-n(I(X,Y_2;Y_1) - 3\eps)}
4348:                                E_{\yvec_2} \bigg\{ ||\mL_2(i)|| E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\} \bigg\},
4349:                        \end{eqnarray}}
4350:                        where (a) follows from the union bound
4351:                        and (b) is because $\yvec_1 \ne \yvec_1(i)$ is selected independently of $\xvec(w)$ and $\yvec_2(i)$
4352:                        and we also used the properties of typical sets, see \cite[ch. 14.2]{cover-thomas:it-book}. In the derivation
4353:                        above we used
4354:                        \[
4355:                            \Pr(\yvec_1(i),\yvec_2(i)) \triangleq \Pr((\yvec_1(i), \yvec_2(i)) \mbox{ recevied } | \xvec(w_i)
4356:                                \mbox{ transmitted}).
4357:                        \]
4358: 
4359:                        Next, we bound $E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\}$:
4360:            %            From $E_1^c \bigcap E_0^c$ we have that the correct $\yvec_2^m(i)$ is available at $\Rgood$. Now, by
4361:                        By \cite[ch. 14.2]{cover-thomas:it-book}, we have that
4362:                        the size of the set $\typm(Y_1|\yvec_2^m(i))$ is at most
4363:                        $||\typm(Y_1|\yvec_2^m(i))|| \le 2^{m(H(Y_1|Y_2) + 2\delta)}$, for any
4364:                            $\yvec_2^m(i)$.
4365:                        For  each $\yvec_1^m \in \typm(Y_1|\yvec_2^m(i))$ we look for the vectors $\yvec_1 \in \typ(Y_1)$,
4366:                        such that their first $m$ elements equal to $\yvec_1^m$. Let us examine one such $\yvec_1$ vector:
4367:                        since this vector is $\eps$-typical then, by definition
4368:                        \begin{equation}
4369:                        \label{eqn:epsilon-typical-set}
4370:                            H(Y_1) - \eps \le -\frac{1}{n}\sum_{i=1}^n \log p_{Y_1}(y_{1,i}) \le H(Y_1) + \eps,
4371:                        \end{equation}
4372:                        and its first $m$ elements, by the definition of conditionally typical sets and jointly typical sets,
4373:                        satisfy
4374:                        \begin{equation}
4375:                        \label{eqn:delta-typical-set}
4376:                            H(Y_1) - \delta \le -\frac{1}{m}\sum_{i=1}^m \log p_{Y_1}(y_{1,i}) \le H(Y_1) + \delta.
4377:                        \end{equation}
4378:            %%            Therefore, we can write
4379:            %%            {\setlength\arraycolsep{0pt}
4380:            %%            \begin{eqnarray*}
4381:            %%                &  & \left|\frac{1}{n}\sum_{i=1}^n \log p(y_{1,i}) - H(Y_1)\right|  =  \\
4382:            %%%                &  & \quad    \left|\frac{1}{n}\sum_{i=1}^m \log p(y_{1,i}) + \frac{1}{n}\sum_{i=m+1}^n \log p(y_{1,i})-H(Y_1)\right|\\
4383:            %%                &  & \phantom{x}   \left|\frac{m}{n} \frac{1}{m}\sum_{i=1}^m \log p(y_{1,i}) +
4384:            %%                        \frac{1}{n}\sum_{i=m+1}^n \log p(y_{1,i}) - H(Y_1)\right| \le \eps,
4385:            %%            \end{eqnarray*}
4386:            %%            }
4387:            %%            hence,
4388:                        From  (\ref{eqn:epsilon-typical-set}) and \eqref{eqn:delta-typical-set} it follows that
4389:                        \begin{eqnarray*}
4390:                            -\frac{1}{n}\!\sum_{i=m+1}^n \!\!\log p_{Y_1}(y_{1,i}) \! & \le & \!
4391:                            H(Y_1) + \!\frac{m}{n} \frac{1}{m}\sum_{i=1}^m \log p_{Y_1}(y_{1,i})\!+\eps\\
4392:                             & \le & H(Y_1) - \frac{m}{n} \left( H(Y_1) - \delta \right) + \eps\\
4393:                             &  =  & H(Y_1)\left( 1 - \frac{m}{n} \right) +\eps + \frac{m}{n}\delta.
4394:            %                 &  \stackrel{(a)}{\le}  & H(Y_1)\left( 1 - \frac{m}{n} \right) +\eps + \frac{m}{n}\delta.
4395:                        \end{eqnarray*}
4396:            %            where (a) follows from the fact that $-\frac{1}{m}\sum_{i=1}^m \log p_{Y_1}(y_{1,i}) \ge H(Y_1) - \delta$.
4397:                         Finally we get that
4398:                        \begin{eqnarray}
4399:                            -\frac{1}{n-m}\sum_{i=m+1}^n \log p_{Y_1}(y_{1,i})  & \le &
4400:                              H(Y_1) +\frac{n}{n-m}\left(\eps + \frac{m}{n}\delta\right) \nonumber\\
4401:                        %\label{eqn:Entropy_calc_tail}
4402:                              & = & H(Y_1) +\frac{1}{1-K}\left(\eps + K\delta\right). \nonumber
4403:            %                   H(Y_1) +\frac{1}{1-K}\left(\eps + K\delta\right). \nonumber
4404:                        \end{eqnarray}
4405:                        Letting $\eps' \triangleq \frac{1}{1-K}\left(\eps + K\delta\right)$ we get that the
4406:                        last $n-m$ symbols of $\yvec_1 \in \ttyp(Y_1,\yvec_1^m)$ belong to
4407:            %            the $\eps'$-typical set of $Y_1$ of length $n-m$,
4408:                        $A_{\eps'}^{(n-m)}(Y_1)$.
4409:            %            \[
4410:            %                H(Y_1) - \eps' \le \frac{1}{n-m}\sum_{i=m+1}^n \log p(y_{1,i}) \le H(Y_1) + \eps'.
4411:            %            \]
4412:                        Therefore,
4413:                        %the size of the set of typical sequences of length $n$ with the first $m$ elements
4414:                        %fixed,
4415:                        $||\ttyp(Y_1,\yvec_1^m)|| \le 2^{(n-m)(H(Y_1) + \eps')}$
4416:            %            is upper bounded by $2^{(n-m)(H(Y_1) + \eps')}$,
4417:                        for any $\yvec_1^m \in \typm(Y_1|\yvec_2^m(i))$.
4418:            %            In conclusion, the size of $\btyp(Y_1,\yvec_2^m(i))$ is upper bounded by:
4419:                        In conclusion, we get
4420:                        \begin{eqnarray}
4421:                        ||\btyp(Y_1,\yvec_2^m(i))|| & \le & ||\typm(Y_1|\yvec_2^m(i))|| \times
4422:                                \max_{\yvec_1^m \in \typm(Y_1|\yvec_2^m(i))}\left\{||\ttyp(Y_1,\yvec_1^m) ||\right\}\nonumber\\
4423:                               & \le & 2^{m(H(Y_1|Y_2) + 2\delta)} 2^{(n-m)(H(Y_1) + \eps')}\nonumber\\
4424:                               & = & 2^{nH(Y_1) + m(H(Y_1|Y_2) - H(Y_1)) +2m\delta + (n - m) \eps'}\\
4425:                               & = & 2^{nH(Y_1)  - m I(Y_1;Y_2) + n \left( \frac{m}{n}2 \delta + \frac{n-m}{n} \eps' \right)}\nonumber\\
4426:            %                   & = & 2^{n(H(Y_1) - K I(Y_1;Y_2) + \left(2K \delta + \eps + K \delta\right))}\nonumber\\
4427:                        \label{eqn:typ_set_size_bound}
4428:                               & = & 2^{n(H(Y_1) - K I(Y_1;Y_2) + \eps'')},
4429:                        \end{eqnarray}
4430:                        where we set $\eps'' \triangleq \eps + 3K \delta$. Note that this result is derived for $K<1$.
4431:                        Repeating the derivation for $K=1$ we get that the bound in (\ref{eqn:typ_set_size_bound}) remains
4432:                        valid also when $K=1$. Since this is independent of the particular
4433:                        $\yvec_2^m(i)$ sequence, we have that
4434:                        \begin{equation}
4435:                        \label{eqn:Conditional-bound-set-size}
4436:                            E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\} \le 1 + 2^{n(H(Y_1) - K I(Y_1;Y_2)  - C_{12} + \eps'')}.
4437:                        \end{equation}
4438:            %            Note that $H(Y_1) - K I(Y_1;Y_2) < H(Y_1)$ when $K >0$, therefore
4439:            %            the partition size for this conference step is reduced compared to theorem \ref{thm:achive_common_one_step},
4440:            %            resulting in higher rates to $\Rbad$.
4441:                        Plugging (\ref{eqn:Conditional-bound-set-size})
4442:                            back into (\ref{eqn:Rx2_rate_bound_2}) and using the bound on
4443:                            $E_{\yvec_2}\big\{||\mL_2(i)||\big\}$ from equation \eqref{equ:size_L2}, we get
4444:                        \begin{eqnarray*}
4445:            %%            &  & \Pr\Big(\Big\{ \exists w \in \mL_2(i), w \ne w_i, \exists \yvec_1 \in S_{k(i)},
4446:            %%                    \yvec_1 \ne \yvec_1(i), \nonumber\\
4447:            %%            &  &  \phantom{xx}  (\xvec(w), \yvec_1, \yvec_2(i)) \in \typ(X,Y_1,Y_2)
4448:            %%                    \Big\}\bigcap E_1^c \bigcap E_0^c\Big) \nonumber\\
4449:                        &  & \Pr(E_{2,B}''') \nonumber\\
4450:            %            &  &  \le 2^{-n(I(X,Y_2;Y_1) - 3\eps)}
4451:            %                     E_{\yvec_2} \bigg\{ ||\mL_2(i)|| E_{\yvec_1|\yvec_2} \Big\{  ||S_{k(i)}|| \Big\} \bigg\}\\
4452:            %            &  &  \le E_{\yvec_2} \Big\{ ||\mL_2(i)||\Big\}  2^{-n(I(X,Y_2;Y_1) - 3\eps)} \times \\
4453:            %            &  &  \phantom{xxxxxxxxxx}   \left( 1 + 2^{n(H(Y_1) - K I(Y_1;Y_2)  - C_{12} + \eps'')}\right)\\
4454:                        &  & \le \left( 1 + 2^{n(R- I(X;Y_2) + 3\eps)}\right) \times \Big(2^{-n(I(X,Y_2;Y_1) - 3\eps)}
4455:                                      + 2^{-n(C_{12} + K I(Y_1;Y_2) - H(Y_1 | X,Y_2) - \eps'' - 3\eps)} \Big)\\
4456:                        %%%%%%%%%%%%%%%%%%%%%%%%
4457:                        &  & = 2^{-n(C_{12} + K I(Y_1;Y_2) - H(Y_1 | X,Y_2) - \eps'' - 3\eps)} +
4458:                                 2^{n(R- I(X;Y_2)  -C_{12} - K I(Y_1;Y_2) + H(Y_1 | X,Y_2) + \eps'' + 6\eps)}+\\
4459:                        &  & \phantom{xxxxxx} 2^{-n(I(X,Y_2;Y_1) - 3\eps)} + 2^{n(R- I(X;Y_2) - I(X,Y_2;Y_1) + 6\eps)}.
4460:                        \end{eqnarray*}
4461:            %            Making this arbitrarily small for $n$ large enough requires
4462:                        Therefore,
4463:                        $\Pr(E_{2,B}''')$ can be made arbitrarily small by taking $n$ large enough, as
4464:                        long as
4465:                        \begin{eqnarray}
4466:                            \label{eqn:three-steps-capacity-conditions}
4467:                            C_{12} &  >  &\! H(Y_1 | X,Y_2) - K_{\eps} I(Y_1;Y_2) + \eps''' + 3\eps,\\
4468:            %                R      & < &  I(X;Y_2) - H(Y_1 | X,Y_2) + C_{12} +  K I(Y_1;Y_2)  - \eps'' - 6\eps.
4469:                            R      & < & \! I(X;Y_2) - H(Y_1 | X,Y_2) + C_{12} +  K_{\eps} I(Y_1;Y_2)  - \eps'''',\nonumber
4470:                        \end{eqnarray}
4471:                        where $\eps''' \! \triangleq \!\eps \!+ \! 3 \delta$ and $\eps''''\!\triangleq\!\eps'''\! + 6\eps$.
4472:                        Combining this with the rate bound from \eqref{equ:EA_rate_bound} we get that if
4473:            %            $C_{12}$ satisfies (\ref{eqn:three-steps-capacity-conditions})~and
4474:            %            \[
4475:            %                C_{12}   >   H(Y_1 | X,Y_2) - K I(Y_1;Y_2) + \eps'' +3\eps,
4476:            %            \]
4477:            %            and
4478:                        \begin{eqnarray}
4479:                            R  & < &   \max \Big\{I(X;Y_2), \;\; I(X;Y_2) - H(Y_1 | X,Y_2) +
4480:                        \label{eqn:three-steps-decode-first}
4481:                                \min\left(C_{12} + K_{\eps} I(Y_1;Y_2), H(Y_1|Y_2) \right) \Big\}- \eps'''',
4482:            %                   &     & \phantom{x} \min\left(C_{12} + K I(Y_1;Y_2), H(Y_1|Y_2) \right) - \eps'' - 6\eps,
4483:                        \end{eqnarray}
4484:                        the probability
4485:                        $\Pr\left(E_2 \bigcap E_1^c \bigcap E_0^c\right)$
4486:            %            \begin{eqnarray*}
4487:            %                &   & \Pr\left(E_2 \bigcap E_1^c \bigcap E_0^c\right)  \le
4488:            %                            \Pr\left(E_2' \bigcap E_0^c\right) \\
4489:            %                &   & \phantom{xxxxx}+ \Pr\left(E_2''\bigcap E_1^c \bigcap E_0^c\right) +
4490:            %                    \Pr\left(E_2''' \bigcap E_1^c \bigcap E_0^c\right) \\
4491:            %                &   & \le 2^{-n(C_{12} + K I(Y_1;Y_2) - H(Y_1 | X,Y_2) - \eps'' - 3\eps)} + \\
4492:            %            &  & \phantom{xxxx} 2^{n(R- I(X;Y_2)  -C_{12} - K I(Y_1;Y_2) + H(Y_1 | X,Y_2) + \eps'' + 6\eps)}+\\
4493:            %            &  & \phantom{xxxxxx} 2^{-n (I(X;Y_1|Y_2) - 4\eps)}  + 2^{n(R - I(X;Y_1,Y_2)  + 7\eps)},
4494:            %            \end{eqnarray*}
4495:                        can be made arbitrarily small by taking $n$ large enough.
4496:                        %\footnote{Note that $K$ is
4497:                        %also a function of $n$ but we can approach the rate arbitrarily close by taking
4498:                        %$n$ large enough.}.
4499:                        Compared with proposition \ref{prop:achive_common_one_step} we note that for the same $C_{12}$
4500:                        the rate to $\Rbad$ is increased and the region of $C_{12}$ where cooperation is useful is
4501:                        also increased.
4502: 
4503: 
4504: 
4505:                    \subsubsection{Probability of $E_3 \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c$}
4506:                    \label{sec:prob-bound-E3}
4507:                        First note that $E_2^c \bigcap E_1^c \bigcap E_0^c$ implies correct decoding at $\Rbad$ and
4508:                        thus $w_i \in B_{k'(i)}$ for some index $k'(i)$. It also implies that
4509:                        $\left(\xvec^m(w_i) ,\yvec_1^m(i) \right) \in \typm(X,Y_1)$
4510:                        and $\left(\xvec(w_i) ,\yvec_1(i) \right) \in \typ(X,Y_1)$
4511:                        and therefore
4512:            %            $w_i \in \tmL_1(i)$. Thus
4513:                        $w_i \in B_{k'(i)} \bigcap \tmL_1(i)$ and
4514:                        $\Pr\left(E_3' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right) =  \Pr\left(E_3'' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right) = 0$.
4515:            %            Additionally, $E_0^c$ implies that
4516:                        %$\left(\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)\right) \in \typm(X,Y_1,Y_2)$ and thus
4517:            %            $\Pr\left(E_3'' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right) = 0$.
4518:                        Consider now $\Pr\left(E_3''' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right)$:
4519:                        {\setlength\arraycolsep{0mm}
4520:                        \begin{eqnarray}
4521:                            &  &  \Pr\left(E_3''' \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\right)\nonumber\\
4522:            %                &  & \Pr\Big(\Big\{\exists w \ne w_i,  w \in \tmL_1(i),
4523:            %                        (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\nonumber\\
4524:            %                &  &   \phantom{xxxxx} \in \typm(X,Y_1,Y_2),w \in  B_{k'(i)}\Big\} \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c\Big)\nonumber\\
4525:            %%%                &  &  =  E_{\yvec_1}\bigg\{\Pr\Big(\Big\{\exists w \ne w_i,  w \in \tmL_1(i), w \in  B_{k'(i)},\nonumber\\
4526:            %%%                &  &   \phantom{xxxxx} (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\in \typm(X,Y_1,Y_2)\Big\} \nonumber\\
4527:            %%%                &  &   \phantom{xxxxxxxxxx}  \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\bigg\}\nonumber\\
4528:                            &  & \qquad \le E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}
4529:                                    \Pr\Big(\Big\{   w \in  B_{k'(i)},
4530:                                (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i)) \in  \typm(X,Y_1,Y_2)\Big\}
4531:                            \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\Bigg\}\nonumber\\
4532:                            &  & \qquad \stackrel{(a)}{=} E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}
4533:                                    \Pr\big(w \in  B_{k'(i)}\big)\times\nonumber\\
4534:                            &  &   \phantom{xxxxxxxxxxxxxxxxxxxx} \Pr\Big(\Big\{  (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\in \typm(X,Y_1,Y_2)\Big\}
4535:                                  \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\Bigg\}\nonumber\\
4536:            %%%                &  & = E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}} 2^{-nC_{21}^b}\times \nonumber\\
4537:            %%%                &  &   \phantom{xxxxx} \sum_{\yvec_2^m(i) \in \typm(Y_2|\yvec_1^m(i),\xvec^m(w))}
4538:            %%%                            \Pr(\yvec_2^m(i)|\yvec_1^m(i), \xvec^m(w))\Bigg\}\nonumber\\
4539:            %%%                &  & \stackrel{(b)}{=} 2^{-nC_{21}^b} E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}
4540:            %%%               \sum_{\substack{\yvec_2^m(i) \in \\ \typm(Y_2|\yvec_1^m(i),\\\phantom{xxxxxx} \xvec^m(w))}}
4541:            %%%                            \Pr(\yvec_2^m(i)|\yvec_1^m(i))\Bigg\}\nonumber\\
4542:            %%                &  & \stackrel{(a)}{=}  E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i),  w \ne w_i}}
4543:            %%                \Pr\big(w \in  B_{k'(i)}\big)\nonumber\\
4544:            %%                &  & \phantom{xxxxxxxxx}\sum_{\substack{\yvec_2^m(i) \in  \typm(Y_2|\yvec_1^m(i), \xvec^m(w))}}
4545:            %%                            \Pr(\yvec_2^m(i)|\yvec_1^m(i))\Bigg\}\nonumber\\
4546:            %                &  &   \qquad   \le E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}
4547:            %                          \Pr\Big(\Big\{   w \in  B_{k'(i)},
4548:            %                            (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i)) \in \typm(X,Y_1,Y_2)\Big\}
4549:            %                            \bigcap E_2^c \bigcap E_1^c \bigcap E_0^c \Big| \yvec_1(i)\Big)\Bigg\}\nonumber\\
4550:                            &  & \qquad    \stackrel{(b)}{=}  E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i),  w \ne w_i}}
4551:                            \Pr\big(w \in  B_{k'(i)}\big) \times
4552:                                    \sum_{\substack{\yvec_2^m \in  \typm(Y_2|\yvec_1^m(i), \xvec^m(w))}}
4553:                                        \Pr(\yvec_2^m|\yvec_1^m(i))\Bigg\}\nonumber\\
4554:            %%%                &  & \le 2^{-nC_{21}^b}  E_{\yvec_1}\Bigg\{\sum_{\substack{w \in \tmL_1(i), \\ w \ne w_i}}
4555:            %%%                         2^{m(H(Y_2|Y_1,X) + 2\delta)} 2^{-m(H(Y_2|Y_1) - 2\delta)}\Bigg\}\nonumber\\
4556:            %%%                &  & = 2^{-nC_{21}^b} 2^{-m(I(X;Y_2|Y_1) - 4\delta)} E_{\yvec_1}\Big\{||\tmL_1(i)||\Big\}\nonumber\\
4557:                            &  & \qquad \le 2^{-nC_{21}^b} 2^{-m(I(X;Y_2|Y_1) - 4\delta)} E_{\yvec_1}\Big\{||\tmL_1(i)||\Big\}\nonumber\\
4558:                            &  & \qquad \stackrel{(c)}{\le} 2^{-n(C_{21}^b + K I(X;Y_2|Y_1) -  4 K \delta)}\left(1 + 2^{n\left(R - I(X;Y_1) + 3\eps + 6 K \delta\right)} \right) \nonumber
4559:            %%%                &  & = 2^{-n(C_{21}^b + K I(X;Y_2|Y_1) -  4 K \delta)}\nonumber\\
4560:            %%%            \label{eqn:Pron_error_E3'''}
4561:            %%%                &  & \phantom{xxxxxx}  + 2^{n\left(R - I(X;Y_1) -C_{21}^b - K I(X;Y_2|Y_1) + 3\eps  +  10 K \delta\right)},
4562:                        \end{eqnarray}}
4563:                        \noindent
4564:                        where (a) is due to the independent mapping of the messages into the sets $B_{k'}$, (b) is
4565:                         because $\yvec_2^m(i)$ is generated by $\xvec^m(w_i)$ and hence
4566:                        is independent of any $\xvec^m(w)$ with $w \ne w_i$.
4567:                        To obtain the bound in (c) we recall that $\avec^m \in \typm(A)$ and
4568:                        $\avec^n \in \typ(A)$ imply that the elements $\avec_{m+1}^n \in A_{\eps'}^{(n-m)}(A)$.
4569:                        Therefore, the probability of an i.i.d. sequence $\xvec$ independent of $\yvec_1(i)$ to be in
4570:                        $\tmL_1(i)$ is bounded~by
4571:                        \begin{eqnarray*}
4572:                            \Pr(\xvec \in \tmL_1(i)) & \le &\! \Pr\Big((\xvec^m,\yvec_1^m(i)) \in \typm(X,Y_1)\Big) \times
4573:                                \Pr\Big((\xvec_{m+1}^n, \yvec_{1,m+1}^n(i)) \in A_{\eps'}^{(n-m)}(X,Y_1)\Big)\\
4574:                              & \le & 2^{-m(I(X;Y_1) - 3 \delta)} 2^{-(n-m)(I(X;Y_1) - 3 \eps')}\\
4575:                              & = & 2^{-n (I(X;Y_1)  - 3 \eps - 6 K \delta)}.
4576:                        \end{eqnarray*}
4577:            %            The expression in (\ref{eqn:Pron_error_E3'''})
4578:                        Therefore $\Pr(E_3'''\bigcap E_2^c \bigcap E_1^c \bigcap E_0^c)$ can
4579:                        be made arbitrarily small by taking $n$ large enough, as long as
4580:                        \begin{equation}
4581:                        \label{eqn:three-steps-decode-second}
4582:                            R < I(X;Y_1) + C_{21}^b + K_{\eps} I(X;Y_2|Y_1) - 3\eps  -  10  \delta.
4583:                        \end{equation}
4584:                        Combining this with equation (\ref{eqn:three-steps-decode-first}) yields the rate expression
4585:                        $R_{212}$. Switching the roles of $\Rgood$ and $\Rbad$ we obtain $R_{121}$.
4586:                        The case where $\alpha = 0$ can be obtained from proposition \ref{prop:achive_common_one_step}.
4587: 
4588: 
4589:                \subsection{Rate Bounds for the Two-Step Conference}
4590:                \label{sec:two-step}
4591:                        Consider the following two-step conference:
4592:                        \begin{enumerate}
4593:                            \item $\Rgood$ sends information at rate $C_{12}$ to $\Rbad$.
4594:                            \item $\Rbad$ decodes and sends information at rate $C_{21}$ to $\Rgood$.
4595:                        \end{enumerate}
4596:                        Lastly $\Rgood$ decodes.
4597:                        For this setup we specialize the conference scheme of appendix
4598:                        \ref{sec:three-steps} (we state only the modifications).
4599:                        Let $m = \min \left\{ n , \left\lfloor \frac{n C_{12}}{H(Y_1) + \delta} \right\rfloor \right\}$,
4600:                        $K = \frac{m}{n}$ and $K_{\eps} = \left[\frac{C_{12}}{H(Y_1) + \delta}\right]^* - \eps$.
4601:                        Increase $n$ to obtain $K > K_{\eps}$. Note that  $K \le 1$.
4602:            %            \[
4603:            %                m = \min \left\{ n , \left\lfloor \frac{n C_{12}}{H(Y_1) + \delta} \right\rfloor \right\},
4604:            %                \qquad K = \frac{m}{n}.
4605:            %            \]
4606:                        \subsubsection{Codebooks Construction and Conference Steps}
4607:                        \paragraph{Relay Codebook Construction at $\Rgood$}
4608:                            $\Rgood$ enumerates all the sequences in $\typm(Y_1)$ with an index
4609:                            $l \in \left\{1,2,...,2^{nC_{12}} \right\}$.
4610:                        \paragraph{Relay Codebook Construction at $\Rbad$}
4611:                            $\Rbad$
4612:                            %has only one codebook, generated by partitioning the message set
4613:                            partitions the set $\mW$ into $2^{nC_{21}}$
4614:                            sets in a uniform and independent manner. Denote these sets with $B_{k'}$,
4615:                            $k' \in \left\{1,2,...,2^{nC_{21}}\right\}$.
4616:                        \paragraph{Encoding at $\Rgood$ at the First Conference Step (time $i+1$)}
4617:                            Upon reception of $\yvec_1(i)$, $\Rgood$ considers its first $m$ elements denoted $\yvec_1^m(i)$,
4618:                            and looks for the index $l$ of $\yvec_1^m(i)$ in
4619:                            $\typm(Y_1)$. $\Rgood$ then sends this index to $\Rbad$ through the conference link.
4620:                        \paragraph{Decoding at $\Rbad$ (time $i+1$) and Encoding for the Second Conference Step}
4621:                            $\Rbad$ generates the set $\tmL_2(i)$,
4622:                            defined in the same way as $\tmL_1(i)$ in appendix \ref{sec:three-steps-decoder-1},
4623:                            with $\yvec_2(i)$, $\yvec_2^m(i)$ and $Y_2$ replacing $\yvec_1(i)$, $\yvec_1^m(i)$ and
4624:                            $Y_1$ respectively. Then,~$\Rbad$ looks for a unique message $\hw \in \tmL_2(i)$
4625:                            such that $\left(\xvec^m(\hw),\yvec_1^m(i), \yvec_2^m(i)\right)\! \in\! \typm(X,Y_1,Y_2)$.
4626:                            After decoding $w_i$, $\Rbad$ finds the index $k'(i)$ of the partition $B_{k'(i)}$ into which
4627:                            the decoded $w_i$ belongs. At time $i+2$, $\Rbad$ sends $k'(i)$ to $\Rgood$ through the conference
4628:                            link.
4629:                        \paragraph{Decoding at $\Rgood$ (time $i+2$)}
4630:                        \label{sec:two-step-decode-rx2}
4631:                            $\Rgood$ uses the standard set-intersection relay decoding as in \cite[theorem 1]{CoverG:79}.
4632:                            Thus the achievable rate to $\Rgood$ is bounded by
4633:            %                \begin{equation}
4634:            %                \label{eqn:two-step-rgood-rate}
4635:                                $R \le I(X;Y_1) + C_{21}$.
4636:            %                \end{equation}
4637: 
4638:                        \subsubsection{Error Events and Error Probability Bounds}
4639:                        We present here only the error events for decoding at $\Rbad$. The rest of the error events follow
4640:                        easily from the analysis in appendix \ref{sec:three-steps-error-events}. Define
4641:                        \begin{itemize}
4642:                         \item    $E_2' \!  = \!\Big\{ w_i \notin \tmL_2(i) \Big\}$,
4643:                         \item    $E_2''  = \left\{\! (\xvec^m(w_i), \yvec_1^m(i), \yvec_2^m(i)) \notin \typm(X,Y_1,Y_2)\!\right\}$,
4644:                         \item     $E_2'''\!\! =\!\! \Big\{\! \exists w \! \in \! \tmL_2(i), w \! \ne \! w_i,
4645:                                   (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\! \in \!\typm(X,Y_1,Y_2)\!\Big\}$.
4646:                        \end{itemize}
4647:            %        \subsection{Bounding the Probability of Error Events}
4648:                    By similar considerations to those in appendix \ref{sec:three-steps-error-bounds}, we have that
4649:                    $\Pr\left(E_2' \bigcap E_0^c\right) = \Pr\left(E_2'' \bigcap E_0^c\right) = 0$, and~we need to
4650:                    bound $\Pr\left(E_2'''\bigcap E_0^c\right)$:
4651:            %        {\setlength\arraycolsep{0mm}
4652:            %        \begin{eqnarray*}
4653:            %        & & \Pr\Big(\Big\{ \exists w \in \tmL_2(i), w \ne w_i, (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\\
4654:            %        & &  \qquad \qquad   \in \typm(X,Y_1,Y_2)\Big\}\bigcap E_1^c \bigcap E_0^c\Big)\\
4655:            %%        & & = E_{\yvec_2}\bigg\{ \Pr\Big(\Big\{ \exists w \in \tmL_2(i), w \ne w_i, (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\\
4656:            %%        & &  \qquad \qquad   \in \typm(X,Y_1,Y_2)\Big\}\bigcap E_1^c \bigcap E_0^c \Big|\yvec_2(i)\Big)\bigg\}\\
4657:            %        & & \le E_{\yvec_2}\Bigg\{ \sum_{\substack{w \in \tmL_2(i), \\ w \ne w_i}}
4658:            %            \Pr\Big(\Big\{  (\xvec^m(w), \yvec_1^m(i), \yvec_2^m(i))\\
4659:            %        & &  \qquad \qquad   \in \typm(X,Y_1,Y_2)\Big\}\bigcap E_1^c \bigcap E_0^c \Big|\yvec_2(i)\Big)\Bigg\}\\
4660:            %%        & & \le E_{\yvec_2}\bigg\{ \sum_{\substack{w \in \tmL_2(i), \\ w \ne w_i}} 2^{-m(I(X;Y_1|Y_2) - 4 \delta)}
4661:            %%            \bigg\}\\
4662:            %%        & & = 2^{-m(I(X;Y_1|Y_2) - 4 \delta)} E_{\yvec_2}\Big\{||\tmL_2(i)||\Big\}\\
4663:            %        & & \le 2^{-m(I(X;Y_1|Y_2) - 4 \delta)} E_{\yvec_2}\Big\{||\tmL_2(i)||\Big\}\\
4664:            %        & & \le 2^{-nK(I(X;Y_1|Y_2) - 4 \delta)}\left(1 + 2^{n(R - I(X;Y_2) + 3 \eps + 6 K \delta)}\right),
4665:            %%       & & = 2^{-nK(I(X;Y_1|Y_2) - 4 \delta)} + 2^{n(R - I(X;Y_2) - KI(X;Y_1|Y_2) +10K\delta + 3 \eps)}
4666:            %        \end{eqnarray*}
4667:            %        }
4668:            %        which can be made arbitrarily small as long as
4669:                    using similar derivation to that in %section \ref{sec:prob-bound-E3}
4670:                    theorem \ref{thm:main_thm} we conclude that
4671:                    $\Pr(E_2''' \bigcap E_0^c)$ can be made arbitrarily small as long as
4672:                    \[
4673:                        R \le I(X;Y_2) + K_{\eps} I(X;Y_1|Y_2) - 3\eps -10  \delta.
4674:                    \]
4675:            %        Combining this with equation (\ref{eqn:two-step-rgood-rate}) we get the rate
4676:                    Combining this with the rate constraint in appendix \ref{sec:two-step-decode-rx2}
4677:                    we get the rate expression for $R_{12}$ of equation (\ref{eqn:thm_3step_2steps_rate}).
4678:            %        {\setlength\arraycolsep{0pt}
4679:            %        \begin{eqnarray*}
4680:            %          &  &  R_{12} = \min\bigg(I(X;Y_1) + C_{21}, \\
4681:            %          &  &  \phantom{xxxxxxxxxxxxx} I(X;Y_2) + \left[\frac{C_{12}}{H(Y_1)}\right]^*I(X;Y_1|Y_2) \bigg).
4682:            %        \end{eqnarray*}}
4683:                    Switching the order of the conference we obtain the expression for $R_{21}$.
4684:                    Finally, combining this with the three-step conference rate expressions
4685:                    obtained in appendix \ref{sec:three-steps} yields the rates of theorem \ref{thm:achieve-three-steps}.
4686:            %        Note that since the minimum includes the two-step conference result which always (i.e. when the
4687:            %        channels are not degraded) gives a rate increase over the non-cooperative rate,
4688:            %        we do not need to state explicitly
4689:            %        the condition on the capacity of equation (\ref{eqn:three-steps-capacity-conditions}).
4690: 
4691: \end{comment}
4692: 
4693: 
4694: \begin{thebibliography}{10}
4695: 
4696: \bibitem{Meulen:71}
4697: E. C. van der Meulen.
4698: \newblock {``Three-Terminal Communication Channels"}.
4699: \newblock {\em Adv. Appl. Probab.},vol. 3, pp. 120--154, 1971.
4700: 
4701: \bibitem{CoverG:79}
4702: T.~M. Cover and A.~A. {El Gamal}.
4703: \newblock {``Capacity Theorems for the Relay Channel"}.
4704: \newblock {\em IEEE Trans. Inform. Theory}, IT-25(5):572--584, 1979.
4705: 
4706: \bibitem{GuptaKumar:2003}
4707: P.~Gupta and P.~R.~Kumar.
4708: \newblock{``Towards an Information Theory of Large Networks: An Achievable Rate Region"}.
4709: \newblock{\em IEEE Trans. Inform. Theory}, 49(8):1877--1894, 2003.
4710: 
4711: \bibitem{XieKumar:2004}
4712: L.~-L.~Xie and P.~R.~Kumar.
4713: \newblock{``A Network Information Theory for Wireless Communication: Scaling Laws and Optimal Operation"}.
4714: \newblock{\em IEEE Trans. Inform. Theory}, 50(5):748--767, 2004.
4715: 
4716: \bibitem{XieKumar:2005}
4717: L.~-L.~Xie and P.~R.~Kumar.
4718: \newblock{``An Achievable Rate for the Multiple-Level Relay Channel"}.
4719: \newblock{\em IEEE Trans. Inform. Theory}, 51(4):1348--1358, 2005.
4720: 
4721: \bibitem{Kramer:2003}
4722: G.~Kramer, M.~Gastpar, and P.~Gupta.
4723: \newblock{``Capacity Theorems for Wireless Relay Channels"}.
4724: \newblock {\em Proc. 41st Allerton Conf.  Communications, Control, and Computing}, pp. 1074--1083, Monticello, IL, 2003.
4725: 
4726: \bibitem{Madsen:2005}
4727: B.~Wang, J.~Zhang and A.~Host-Madsen.
4728: \newblock{``On the Capacity of MIMO Relay Channels"}.
4729: \newblock{\em IEEE Trans. Inform. Theory}, 51(1):29--43, 2005.
4730: 
4731: \bibitem{Kramer:2005}
4732: G.~Kramer, M.~Gastpar, and P.~Gupta.
4733: \newblock{``Cooperative Strategies and Capacity Theorems for Relay Networks"}.
4734: \newblock{\em IEEE Trans. Inform. Theory}, 51(9):3037--3063 , 2005.
4735: 
4736: \bibitem{Gastpar:2002}
4737: M. Gastpar, G. Kramer and P. Gupta.
4738: \newblock{``The Multiple-Relay Channel: Coding and Antenna-Clustering Capacity"}.
4739: \newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Lausanne, Switzerland, 2002, pg. 136.
4740: 
4741: %\bibitem{SchienGallager:2000}
4742: %B.~Schein and R.~Gallager.
4743: %\newblock {``The Gaussian Parallel Relay Network"}.
4744: %\newblock {\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Sorrento, Italy, 2000, pg. 22.
4745: 
4746: \bibitem{ElGamalH:2006}
4747: L.~Lifeng, L.~Ke and H.~El-Gamal.
4748: \newblock{``The Three-Node Wireless Network: Achievable Rates and Cooperation Strategies"}.
4749: \newblock{\em IEEE Trans. Inform. Theory},  52(3):805--828,  2006.
4750: 
4751: \bibitem{Goldsmith:2006}
4752: C. T. K. Ng, I. Maric, A. J. Goldsmith, S. Shamai and R. D. Yates.
4753: \newblock{``Iterative and One-Shot Conferencing in Relay Channels"}.
4754: \newblock {\em Proc. IEEE Inform. Theory Workshop (ITW)}, Punta del Este, Uruguay, 2006.
4755: 
4756: %\bibitem{Motani:2005}
4757: %H.~F.~Chong, M.~Motani and  H.~K.~Garg.
4758: %\newblock{``New Coding Strategies for the Relay Channel"}.
4759: %\newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Adelaide,  Australia, 2005, pp. 1086--1090.
4760: 
4761: \bibitem{DraperFK:03}
4762: S.~C. Draper, B.~J. Frey, and F.~R. Kschischang.
4763: \newblock {``Interactive Decoding of a Broadcast Message"}.
4764: \newblock {\em Proc. 41st Allerton Conf.}, % on Communication, Control and Computing},
4765:  Urbana, IL, 2003.
4766: 
4767: \bibitem{RonSer:2005}
4768: R. Dabora and S.~D. Servetto,
4769: \newblock {``Broadcast Channels with Cooperating Decoders"}.
4770: \newblock {{\em  IEEE Trans. Inform. Theory}}, to appear.
4771: 
4772: \bibitem{LiagV:2005}
4773: Y. Liang and V. V. Veeravalli.
4774: \newblock{``Cooperative Broadcast Relay Channels"}.
4775: \newblock{Submitted to the {\em IEEE Trans. Inform. Theory}}, July 2005.
4776: 
4777: \bibitem{ElGamal:06}
4778: A. El-Gamal, M. Mohseni and S. Zahedi,
4779: \newblock{``Bounds on Capacity and Minimum Energy-per-Bit for AWGN Relay Channels"}.
4780: \newblock{\em IEEE Trans. Inform. Theory}, IT-52(4):1545--1561, 2006.
4781: 
4782: \bibitem{HostMadsen:05}
4783: A. Host-Madsen, and J. Zhang.
4784: \newblock{``Capacity Bounds and Power Allocation for Wireless Relay Channels"}.
4785: \newblock{\em IEEE Trans. Inform. Theory}, IT-51(6):2020--2040, 2006.
4786: 
4787: \bibitem{Laneman:2000}
4788: J. N. Laneman and G. W. Wornell.
4789: \newblock{``Energy-Efficient Antenna Sharing and Relaying for Wireless Networks"}.
4790: \newblock{\em Proc. IEEE Wireless Communications and Networking Conference (WCNC)} 2000, vol. 1,  pp. 7--12.
4791: 
4792: \bibitem{Bao:2005}
4793: X. Bao and J. Li.
4794: \newblock{``Decode-Amplify-Forward (DAF): A New Class of Forwarding Strategy for Wireless Relay Channels"}.
4795: \newblock{\em Proc. 6th IEEE Workshop on Signal Proc. Adv. in Wireless Comm. (SPAWC) }, New York, 2005,  pp. 816--820.
4796: 
4797: \bibitem{Kramer:Asi05}
4798: G. Kramer.
4799: \newblock{``Distributed and Layered Codes for Relaying"}.
4800: \newblock{\em Proc. 39th Asilomar Conf. on Signals, Systems and Computers}, 2005, pp. 1752--1756.
4801: 
4802: \bibitem{Stankovic:05}
4803: L. Zhixin, V. Stankovic and X. Zixiang.
4804: \newblock{``Wyner-Ziv Coding for the Half-Duplex Relay Channel"}.
4805: \newblock{\em Proc. IEEE Int. Conf. on Acoustics, Speech, and Signal Processing (ICASSP)}, Philadelphia, 2005,
4806: vol. 5, pp. 1113--1116.
4807: 
4808: \bibitem{Marton:79}
4809: K.~Marton.
4810: \newblock {``A Coding Theorem for the Discrete Memoryless Broadcast Channel"}.
4811: \newblock {\em IEEE Trans. Inform. Theory}, IT-25(3):306--311, 1979.
4812: 
4813: \bibitem{Motani:06}
4814: M. Motani, H.-F. Chong and H. K. Garg.
4815: \newblock{``Backward Decoding Strategies for the Relay Channel"}.
4816: \newblock{\em MSRI Workshop: Mathematics of Relaying and Cooperation in Communication Networks}, Berkeley, 2006.
4817: 
4818: \bibitem{YeungBook}
4819: R.~W. Yeung.
4820: \newblock{\em A First Course in Information Theory}.
4821: \newblock Springer, 2002.
4822: 
4823: \bibitem{cover-thomas:it-book}
4824: T.~M. Cover and J.~Thomas.
4825: \newblock {\em {Elements of Information Theory}}.
4826: \newblock John Wiley and Sons Inc., 1991.
4827: 
4828: \bibitem{WZ:1976}
4829: A.~Wyner and J.~Ziv.
4830: \newblock{``The Rate-Distortion Function for Source Coding with Side Information at the Decoder"}.
4831: \newblock {\em IEEE Trans. Inform. Theory}, 22(1):1--10, 1976.
4832: 
4833: \bibitem{Willems:83}
4834: F.~M.~J. Willems.
4835: \newblock {``The Discrete Memoryless Multiple Access Channel with Partially
4836:   Cooperating Encoders"}.
4837: \newblock {\em IEEE Trans. Inform. Theory}, 29(3):441--445, 1983.
4838: 
4839: \bibitem{RonISIT05:05}
4840: R.~Dabora and S.~D. Servetto.
4841: \newblock{``On the Rates for the General Broadcast Channel with Partially Cooperating Receivers"}.
4842: \newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Adelaide,  Australia, 2005, pp. 2174--2178.
4843: 
4844: \bibitem{Cover:98}
4845: T.~M. Cover.
4846: \newblock {``Comments on Broadcast Channels"}.
4847: \newblock {\em IEEE Trans. Inform. Theory}, 44(6):2524--2530, 1998.
4848: 
4849: \bibitem{ElGamalM:81}
4850: A.~A. {El Gamal} and E.~C. van~der Meulen.
4851: \newblock {``A Proof of Marton's Coding Theorem for the Discrete Memoryless
4852:   Broadcast Channel"}.
4853: \newblock {\em IEEE Trans. Inform. Theory}, IT-27(1):120--122, 1981.
4854: 
4855: \bibitem{Kaspi:85}
4856: A.~H. Kaspi.
4857: \newblock{``Two-Way Source Coding with a Fidelity Criterion"}.
4858: \newblock {\em IEEE Trans. Inform. Theory}, IT-31(6):735--740, 1985.
4859: 
4860: \bibitem{Shlomo_BZ}
4861: A. Steiner, A. Sanderovich and S. Shamai.
4862: \newblock{``Broadcast Cooperation Strategies for Two Colocated Users"}.
4863: \newblock{Submitted to the {\em IEEE Trans. Inform. Theory}}, August 2007.
4864: 
4865: \bibitem{ron:ISIT06}
4866: R.~Dabora and S.~D. Servetto.
4867: \newblock{``A Multi-Step Conference for Cooperative Broadcast"}.
4868: \newblock{\em Proc. IEEE Int. Symp. Inform. Theory (ISIT)}, Seattle, WA, July 2006.
4869: 
4870: \end{thebibliography}
4871: \end{document}
4872: