cs0509064/p90r.tex
1: % Corrected version (including the comments at the top of p90.tex and after running ispell).
2: % This is the version that is also posted in the website.
3: % After p90.tex was forwarded to Yeung, but before he sent it for review, it
4: % has been replaced by this version on 18.5.04.
5: \documentclass[11pt]{article}
6: \usepackage{amsmath}
7: \usepackage{amsfonts}
8: \usepackage{amssymb}
9: \usepackage{graphicx}
10: \newcommand{\EE}{{\Bbb E}}
11: \newcommand{\eps}{\epsilon}
12: \newcommand{\al}{\alpha}
13: \newcommand{\dsum}{\displaystyle\sum}
14: \newcommand{\dint}{\displaystyle\int}
15: \newcommand{\dfr}{\displaystyle\frac}
16: \newcommand{\bign}{\mbox{\Large\rm n}}
17: 
18: 
19: %\def\thesection{\arabic{section}}^M
20: %\def\thesubsection {\thesection.\arabic{subsection}}^M
21: %\renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}}^M
22: %\renewcommand{\theequation}{\thesection.\arabic{equation}}^M
23: %\newcommand{\Prob}{\operatorname{Prob}\, }^M
24: %\newcommand{\snr}{\operatorname{SNR}\, }^M
25: 
26: 
27: \newcommand{\req}[1]{(\ref{#1})}
28: \def\le{\leq}
29: \def\ge{\geq}
30: \def\lt{<}
31: \def\gt{>}
32: \newcommand{\ls}[1]
33:    {\dimen0=\fontdimen6\the\font \lineskip=#1\dimen0
34: \advance\lineskip.5\fontdimen5\the\font \advance\lineskip-\dimen0
35: \lineskiplimit=.9\lineskip \baselineskip=\lineskip
36: \advance\baselineskip\dimen0 \normallineskip\lineskip
37: \normallineskiplimit\lineskiplimit \normalbaselineskip\baselineskip
38: \ignorespaces }
39: 
40: %\ls{1} % single space ^M
41: %\ls{1.5} % double space^M
42: %\ls{2}^M
43: %\ls{1.6}^M
44: %\ls{1.8}^M
45: 
46: 
47: %\documentstyle[11pt,epsf]{article}
48: %\topmargin      0.25truein
49: %\oddsidemargin  -0.1truein
50: %\evensidemargin -0.1truein
51: %\textheight     8.5truein
52: %\textwidth      6.5truein
53: %\footheight     0.15truein
54: %\footskip       0.6truein
55: %\headheight     0.0truein
56: %\headsep        0.0truein
57: %\parskip 4pt plus 1pt
58: 
59: \newenvironment{define}{\begin{trivlist}\item[]{\bf Definition:}\rm}{\end{trivlist}}
60: \newenvironment{corol}{\begin{trivlist}\item[]{\bf Corollary:}\rm}{\end{trivlist}}
61: \newenvironment{discus}{\begin{trivlist}\item[]{\bf Discussion:}\rm}{\end{trivlist}}
62: \newtheorem{theorem}{Theorem}
63: \newtheorem{lemma}{Lemma}
64: \newcommand {\dfn} {\stackrel{\Delta} {=}}
65: \newcommand {\exe} {\stackrel{\cdot} {=}}
66: \newcommand{\eqa}{\stackrel{\mbox{(a)}}{=}}
67: \newcommand{\eqb}{\stackrel{\mbox{(b)}}{=}}
68: \newcommand{\eqc}{\stackrel{\mbox{(c)}}{=}}
69: \newcommand{\eqd}{\stackrel{\mbox{(d)}}{=}}
70: \newcommand{\eqe}{\stackrel{\mbox{(e)}}{=}}
71: \newcommand{\eqf}{\stackrel{\mbox{(f)}}{=}}
72: \newcommand{\lea}{\stackrel{\mbox{(a)}}{\le}}
73: \newcommand{\leb}{\stackrel{\mbox{(b)}}{\le}}
74: \newcommand{\lec}{\stackrel{\mbox{(c)}}{\le}}
75: \newcommand{\led}{\stackrel{\mbox{(d)}}{\le}}
76: \newcommand{\lee}{\stackrel{\mbox{(e)}}{\le}}
77: \newcommand{\lef}{\stackrel{\mbox{(f)}}{\le}}
78: \newcommand{\gea}{\stackrel{\mbox{(a)}}{\ge}}
79: \newcommand{\geb}{\stackrel{\mbox{(b)}}{\ge}}
80: \newcommand{\gec}{\stackrel{\mbox{(c)}}{\ge}}
81: \newcommand{\ged}{\stackrel{\mbox{(d)}}{\ge}}
82: \newcommand{\gee}{\stackrel{\mbox{(e)}}{\ge}}
83: \newcommand{\gef}{\stackrel{\mbox{(f)}}{\ge}}
84: \newcommand {\reals} {{\rm I\!R}}
85: \newcommand {\ba} {\mbox{\boldmath $a$}}
86: \newcommand {\bb} {\mbox{\boldmath $b$}}
87: \newcommand {\bc} {\mbox{\boldmath $c$}}
88: \newcommand {\bd} {\mbox{\boldmath $d$}}
89: \newcommand {\be} {\mbox{\boldmath $e$}}
90: \newcommand {\Bf} {\mbox{\boldmath $f$}}
91: \newcommand {\bg} {\mbox{\boldmath $g$}}
92: \newcommand {\bh} {\mbox{\boldmath $h$}}
93: \newcommand {\bi} {\mbox{\boldmath $i$}}
94: \newcommand {\bj} {\mbox{\boldmath $j$}}
95: \newcommand {\bk} {\mbox{\boldmath $k$}}
96: \newcommand {\bl} {\mbox{\boldmath $l$}}
97: \newcommand {\bm} {\mbox{\boldmath $m$}}
98: \newcommand {\bn} {\mbox{\boldmath $n$}}
99: \newcommand {\bo} {\mbox{\boldmath $o$}}
100: \newcommand {\bp} {\mbox{\boldmath $p$}}
101: \newcommand {\bq} {\mbox{\boldmath $q$}}
102: \newcommand {\br} {\mbox{\boldmath $r$}}
103: \newcommand {\bs} {\mbox{\boldmath $s$}}
104: \newcommand {\bt} {\mbox{\boldmath $t$}}
105: \newcommand {\bu} {\mbox{\boldmath $u$}}
106: \newcommand {\bv} {\mbox{\boldmath $v$}}
107: \newcommand {\bw} {\mbox{\boldmath $w$}}
108: \newcommand {\bx} {\mbox{\boldmath $x$}}
109: \newcommand {\by} {\mbox{\boldmath $y$}}
110: \newcommand {\bz} {\mbox{\boldmath $z$}}
111: \newcommand {\bA} {\mbox{\boldmath $A$}}
112: \newcommand {\bB} {\mbox{\boldmath $B$}}
113: \newcommand {\bC} {\mbox{\boldmath $C$}}
114: \newcommand {\bD} {\mbox{\boldmath $D$}}
115: \newcommand {\bE} {\mbox{\boldmath $E$}}
116: \newcommand {\bF} {\mbox{\boldmath $F$}}
117: \newcommand {\bG} {\mbox{\boldmath $G$}}
118: \newcommand {\bH} {\mbox{\boldmath $H$}}
119: \newcommand {\bI} {\mbox{\boldmath $I$}}
120: \newcommand {\bJ} {\mbox{\boldmath $J$}}
121: \newcommand {\bK} {\mbox{\boldmath $K$}}
122: \newcommand {\bL} {\mbox{\boldmath $L$}}
123: \newcommand {\bM} {\mbox{\boldmath $M$}}
124: \newcommand {\bN} {\mbox{\boldmath $N$}}
125: \newcommand {\bO} {\mbox{\boldmath $O$}}
126: \newcommand {\bP} {\mbox{\boldmath $P$}}
127: \newcommand {\bQ} {\mbox{\boldmath $Q$}}
128: \newcommand {\bR} {\mbox{\boldmath $R$}}
129: \newcommand {\bS} {\mbox{\boldmath $S$}}
130: \newcommand {\bT} {\mbox{\boldmath $T$}}
131: \newcommand {\bU} {\mbox{\boldmath $U$}}
132: \newcommand {\hU} {\hat{U}}
133: \newcommand {\hu} {\hat{u}}
134: \newcommand {\bV} {\mbox{\boldmath $V$}}
135: \newcommand {\bW} {\mbox{\boldmath $W$}}
136: \newcommand {\bX} {\mbox{\boldmath $X$}}
137: \newcommand {\bY} {\mbox{\boldmath $Y$}}
138: \newcommand {\bZ} {\mbox{\boldmath $Z$}}
139: \newcommand{\calA}{{\cal A}}
140: \newcommand{\calB}{{\cal B}}
141: \newcommand{\calC}{{\cal C}}
142: \newcommand{\calD}{{\cal D}}
143: \newcommand{\calE}{{\cal E}}
144: \newcommand{\calF}{{\cal F}}
145: \newcommand{\calG}{{\cal G}}
146: \newcommand{\calH}{{\cal H}}
147: \newcommand{\calI}{{\cal I}}
148: \newcommand{\calJ}{{\cal J}}
149: \newcommand{\calK}{{\cal K}}
150: \newcommand{\calL}{{\cal L}}
151: \newcommand{\calM}{{\cal M}}
152: \newcommand{\calN}{{\cal N}}
153: \newcommand{\calO}{{\cal O}}
154: \newcommand{\calP}{{\cal P}}
155: \newcommand{\calQ}{{\cal Q}}
156: \newcommand{\calR}{{\cal R}}
157: \newcommand{\calS}{{\cal S}}
158: \newcommand{\calT}{{\cal T}}
159: \newcommand{\calU}{{\cal U}}
160: \newcommand{\calV}{{\cal V}}
161: \newcommand{\calW}{{\cal W}}
162: \newcommand{\calX}{{\cal X}}
163: \newcommand{\calY}{{\cal Y}}
164: \newcommand{\calZ}{{\cal Z}}
165: 
166: \setlength{\textwidth}{6in}
167: \setlength{\textheight}{9in}
168: \setlength{\topmargin}{-0.5in}
169: \setlength{\oddsidemargin}{.25in}
170: 
171: \begin{document}
172: \thispagestyle{empty}
173: \title{On Joint Coding for Watermarking and Encryption}
174: \author{Neri Merhav}
175: \date{}
176: \maketitle
177: 
178: \begin{center}
179: Department of Electrical Engineering \\
180: Technion - Israel Institute of Technology \\
181: Haifa 32000, ISRAEL \\
182: {\tt merhav@ee.technion.ac.il}
183: \end{center}
184: \vspace{1.5\baselineskip}
185: \setlength{\baselineskip}{1.5\baselineskip}
186: 
187: \begin{abstract}
188: In continuation to earlier works where the problem of joint
189: information embedding and lossless compression (of the composite signal)
190: was studied in the absence \cite{MM03} and in the presence \cite{MM04}
191: of attacks, here we consider the additional ingredient of
192: protecting the secrecy of the watermark against an unauthorized party, which
193: has no access to a secret key shared by the legitimate parties.
194: In other words, we study the problem of joint
195: coding for three objectives: information embedding, compression, and encryption.
196: Our main result is a coding theorem that provides a 
197: single--letter characterization of the best achievable tradeoffs among
198: the following parameters: the distortion between the composite signal and
199: the covertext, the distortion in reconstructing the watermark by the legitimate
200: receiver, the compressibility of the composite signal (with and without the key),
201: and the equivocation of the watermark, as well as its reconstructed
202: version, given the composite signal. In the attack--free case, if the key
203: is independent of the covertext, this coding 
204: theorem gives rise to a {\it threefold} separation 
205: principle that tells that asymptotically, for long block codes, no optimality
206: is lost by first applying a rate--distortion code to the watermark source,
207: then encrypting the compressed codeword, and finally, embedding it into the
208: covertext using the embedding scheme of \cite{MM03}. In the more general case,
209: however, this separation principle is no longer valid, as the key plays an
210: additional role of side information used by the embedding unit.
211:  
212: \vspace{1cm}
213: 
214: \noindent
215: {\bf Index Terms:} Information hiding, watermarking, encryption, data compression,
216: separation principle, side information, equivocation, rate--distortion.
217: \end{abstract}
218: 
219: \newpage
220: \section{Introduction}
221: 
222: It is common to say that encryption and watermarking (or information hiding)
223: are related but they are substantially 
224: different in the sense that in the former,
225: the goal is to protect the secrecy of 
226: the {\it contents} of information, whereas in
227: the latter, it is the very {\it existence} 
228: of this information that is to be kept
229: secret. 
230: 
231: In the last few years, however, we are witnessing increasing 
232: efforts around the {\it combination} of encryption and watermarking,
233: which is motivated by the desire to 
234: further enhance the security of sensitive information that is
235: being hidden in the host signal. This is to guarantee that even 
236: if the watermark is somehow detected by a hostile
237: party, its contents still remain secure due to the encryption.
238: This combination of watermarking and encryption 
239: can be seen both in recently reported research work (see, e.g., 
240: \cite{AKS02},\cite{CC02},\cite{JML00},\cite{KNSTN02},\cite{MW04},\cite{SIA99} 
241: and references therein)
242: and in actual technologies used in 
243: commercial products with a copyright protection 
244: framework, such as the CD and the DVD. Also, some commercial companies 
245: that provide Internet documents, have in their
246: websites links to copyright warning messages, 
247: saying that their data are protected by
248: digitally encrypted watermarks (see, e.g., 
249: {\tt http://genealogy.lv/1864Lancaster/copyright.htm}).
250: 
251: This paper is devoted to the information--theoretic
252: aspects of joint watermarking and encryption together with 
253: lossless compression of the composite signal that 
254: contains the encrypted watermark. Specifically, we extend
255: the framework studied in \cite{MM03} and \cite{MM04}
256: of joint watermarking and compression, so as to include
257: encryption using a secret key. Before we describe the
258: setting of this paper concretely, we pause then to give some
259: more detailed background on the work reported in \cite{MM03} and 
260: \cite{MM04}.
261: 
262: In \cite{MM03}, the following problem was studied: Given a 
263: covertext source vector $X^n=(X_1,\ldots,X_n)$, generated by
264: a discrete memoryless source (DMS), and a message $m$, 
265: uniformly distributed in $\{1,2,\ldots, 2^{nR_e}\}$, independently of $X^n$,
266: with $R_e$ designating the embedding rate, we wish to 
267: generate a composite (stegotext) vector
268: $Y^n=(Y_1,\ldots,Y_n)$ that satisfies the 
269: following requirements: (i) Similarity to the
270: covertext (for reasons of maintaining quality), in the sense that a distortion 
271: constraint, $Ed(X^n,Y^n)=\sum_{t=1}^nEd(X_t,Y_t)\le nD$,
272: holds, (ii) compressibility (for reasons of saving storage space and bandwidth), in the sense 
273: that the normalized entropy, $H(Y^n)/n$, does not exceed some
274: threshold $R_c$, and (iii) reliability in decoding the message $m$ 
275: from $Y^n$, in the sense that
276: the decoding error probability is arbitrarily 
277: small for large $n$. A single--letter characterization
278: of the best achievable tradeoffs among $R_c$, $R_e$, 
279: and $D$ was given in \cite{MM03}, and was
280: shown to be achievable by an extension 
281: of the ordinary lossy source coding theorem, giving rise to the
282: existence of $2^{nR_e}$ {\it disjoint} 
283: rate--distortion codebooks (one per each possible watermark
284: message) as long as $R_e$ does not exceed
285: a certain fundamental limit. In \cite{MM04}, this setup 
286: was extended to include 
287: a given memoryless attack channel, 
288: $P(Z^n|Y^n)$, where item (iii) above was redefined such that
289: the decoding was based on $Z^n$ rather than on $Y^n$, and where, in view of requirement (ii),
290: it is understood that the attacker has access to the compressed version of $Y^n$,
291: and so, the attacker decompresses $Y^n$ before the attack and re--compresses it after.
292: This extension from [8] to [9] involved
293: an different approach, which was in the 
294: spirit of the Gel'fand--Pinsker coding theorem for
295: a channel with non--causal side information 
296: (SI) at the transmitter \cite{GP80}. The role of SI, in this case, was
297: played by the covertext.
298: 
299: In this paper, we extend the settings of \cite{MM03} and \cite{MM04}
300: to include encryption. For the sake of clarity of the exposition,
301: we do that in several steps.
302: 
303: In the first step, we extend the attack--free setting of 
304: \cite{MM03}: In addition to including encryption,
305: we also extend the model of the watermark message
306: source to be an arbitrary DMS, $U_1,U_2,\ldots$, 
307: independent of the covertext, 
308: and not necessarily a binary symmetric source 
309: (BSS) as in \cite{MM03} and \cite{MM04}.
310: Specifically, we now assume that the encoder
311: has three inputs (see Fig.\ \ref{gen}): The covertext source vector, 
312: $X^n$, an independent (watermark) message source vector 
313: $U^N=(U_1,\ldots,U_N)$, where $N$ may differ from 
314: $n$ if the two sources operate
315: in different rates, and a secret key (shared also with 
316: the legitimate decoder) $K^n=(K_1,\ldots,K_n)$, which, for mathematical
317: convenience, is assumed to operate at the same 
318: rate as the covertext. It is assumed, 
319: at this stage, that $K^n$
320: is independent of $U^N$ and $X^n$.
321: %\footnote{In fact,
322: %the choice of the conditional distribution $P(K^n|X^n)$ is a degree of freedom
323: %that can be optimizated subject to the given randomness resources.}
324: Now, in addition to requirements
325: (i)-(iii), we impose a requirement on the equivocation 
326: of the message source relative to an
327: eavesdropper that has access to $Y^n$, but not
328: to $K^n$. Specifically, we would like the normalized
329: conditional entropy, $H(U^N|Y^n)/N$, to exceed
330: a prescribed threshold, $h$ (e.g., $h=H(U)$ 
331: for perfect secrecy). Our first result is 
332: a coding theorem that gives a set of necessary
333: and sufficient conditions, in terms of single--letter inequalities,
334: such that a triple $(D,R_c,h)$ is achievable,
335: while maintaining 
336: reliable reconstruction of $U^N$ at the legitimate receiver.
337: 
338: In the second step, we relax the requirement of perfect (or almost
339: perfect) watermark reconstruction, and assume that we are willing to
340: tolerate a certain distortion between the watermark message $U^N$ and its
341: reconstructed version $\hat{U}^N$, that is,
342: $Ed'(U^N,\hat{U}^N)=\sum_{i=1}^NEd'(U_i,\hat{U}_i)\le ND'$. For example,
343: if $d'$ is the Hamming distortion measure then $D'$, of course, designates 
344: the maximum allowable bit error probability (as opposed to the block error
345: probability requirement of \cite{MM03} and \cite{MM04}). Also, in this case,
346: it makes sense to impose a requirement regarding the
347: equivocation of the {\it reconstructed} message,
348: $\hat{U}^N$, namely, $H(\hat{U}^N|Y^n)/N\ge h'$, for some prescribed
349: constant $h'$. The rationale is that it is $\hat{U}^N$, 
350: not $U^N$, that
351: is actually conveyed to the legitimate receiver, and hence
352: there is an incentive to protect the secrecy of $\hat{U}^N$.
353: We will take into account both
354: equivocation requirements, with the understanding that if one of them
355: is superfluous, then the corresponding threshold 
356: ($h$ or $h'$ accordingly) can always be set to zero.
357: Our second result then extends the above--mentioned
358: coding theorem to a single--letter characterization of achievable quintuples
359: $(D,D',R_c,h,h')$. As will be seen, 
360: this coding theorem gives rise
361: to a threefold separation theorem, that separates, without asymptotic loss
362: of optimality, between three stages: rate--distortion coding of $U^N$,
363: encryption of the compressed bitstream, and finally, embedding the
364: resulting encrypted version using the embedding scheme of \cite{MM03}.
365: The necessary and sufficient conditions related to the
366: encryption are completely decoupled from those of the 
367: embedding and the stegotext compression.
368: 
369: In the third and last step, we drop the assumption of an attack--free
370: system and we assume a given memoryless attack channel, in analogy to \cite{MM04}.
371: Again, referring to Fig.\ \ref{gen}, it should be understood that the stegotext $Y^n$ is
372: stored (or transmitted) in compressed form, and that the attacker decompresses $Y^n$
373: before the attack and decompresses after (the compression and decompression units
374: are omitted from the figure).
375: As it will turn out, in the case of a memoryless attack, there is an interaction between the
376: encryption and the embedding, even if the key is still
377: assumed independent of the covertext. In particular,
378: it will be interesting to see 
379: that the key, in addition to its original role in encryption,
380: serves as SI that is available
381: to both encoder and decoder (see Fig.\ \ref{dir}).
382: Also, because of the dependence between
383: the key and the composite signal, and the fact that the key is available
384: to the legitimate decoder as well,
385: it is reasonable 
386: to let the compressibility constraint correspond also to the
387: conditional entropy of $Y^n$ given $K^n$, that is, {\it private} compression as opposed
388: to the previously considered
389: {\it public} compression, without the key, which enables decompression but not decryption
390: (when these two operations are carried out by different, remote units).
391: Accordingly, we will consider both the conditional and the unconditional
392: entropies of $Y^n$, i.e.,
393: $H(Y^n)/n\le R_c$ and $H(Y^n|K^n)/n\le R_c'$.
394: Our final result then is a coding theorem that 
395: provides a single--letter characterization of the
396: region of achievable six--tuples $(D,D',R_c,R_c',h,h')$. 
397: Interestingly, this characterization remains essentially
398: unaltered even if there is dependence between the key and the covertext,
399: which is a reasonable thing 
400: to have once the key and the stegotext interact in the first place.\footnote{In fact,
401: the choice of the conditional distribution $P(K^n|X^n)$ is a degree of freedom
402: that can be optimized subject to the given randomness resources.}
403: In this context, the system designer confronts an interesting
404: dilemma regarding the desirable degree of statistical dependence
405: between the key and the covertext, which affects the dependence
406: between the key and the stegotext.
407: On the one hand, strong dependence can reduce
408: the entropy of $Y^n$ given $K^n$ (and thereby reduce 
409: $R_c'$), and can also help in the embedding 
410: process: For example, the extreme case of $K^n=X^n$ (which
411: corresponds to {\it private} watermarking since
412: the decoder actually has access to the covertext) is particularly
413: interesting because in this case, 
414: for the encryption key, there is no need for any external resources
415: of randomness, in addition to 
416: the randomness of the covertext that is already available. 
417: On the other hand, when there is strong dependence between $K^n$ and $Y^n$,
418: the secrecy of the watermark might be sacrificed since $H(K^n|Y^n)$
419: decreases as well. 
420: An interesting point, in this context, is that the
421: Slepian--Wolf encoder \cite{SW73} (see Fig.\ \ref{dir}) is used to 
422: generate, from $K^n$, random bits that are essentially 
423: independent of $Y^n$
424: (as $Y^n$ is generated only after the encryption).
425: These aspects will be seen in detail in Section 4, and even more so, in Section 6.
426: 
427: The remaining parts of this paper are organized as follows:
428: In Section 2, we set some notation conventions. 
429: Section 3 will be devoted to a formal problem description
430: and to the presentation of the main result for the attack--free case
431: with distortion--free watermark reconstruction (first step
432: described above). In Section 4, the setup and the results will
433: be extended along the lines of the second and the third steps,
434: detailed above,
435: i.e., a given distortion level in the watermark reconstruction and the
436: incorporation of an attack channel. Finally, Sections 5 and 6 
437: will be devoted to the proof of the last (and most general) version of the
438: coding theorem, with Section 5 focusing on the converse part,
439: and Section 6 -- on the direct part. 
440: 
441: \section{Notation Conventions} 
442: 
443: We begin by establishing some notation conventions.
444: Throughout this paper, scalar random 
445: variables (RV's) will be denoted by capital
446: letters, their sample values will be denoted by
447: the respective lower case letters, and their alphabets will be denoted
448: by the respective calligraphic letters.
449: A similar convention will apply to
450: random vectors and their sample values,
451: which will be denoted with same symbols superscripted by the dimension.
452: Thus, for example, $A^\ell$ ($\ell$ -- positive integer)
453: will denote a random $\ell$-vector $(A_1,...,A_\ell)$,
454: and $a^\ell=(a_1,...,a_\ell)$ is a specific vector value in $\calA^\ell$,
455: the $\ell$-th Cartesian power of $\calA$. The 
456: notations $a_i^j$ and $A_i^j$, where $i$
457: and $j$ are integers and $i\le j$, will designate segments $(a_i,\ldots,a_j)$
458: and $(A_i,\ldots,A_j)$, respectively,
459: where for $i=1$, the subscript will be omitted (as above). 
460: For $i > j$, $a_i^j$ (or $A_i^j$) will be understood as the null string.
461: Sequences without specifying indices are denoted by $ \{\cdot\} $.
462: 
463: Sources and channels will be denoted generically by the letter $P$, or $Q$,
464: subscripted by the name of the RV and its conditioning,
465: if applicable, e.g., $P_U(u)$ is the probability function of
466: $U$ at the point $U=u$, $P_{K|X}(k|x)$
467: is the conditional probability of $K=k$ given $X=x$, and so on.
468: Whenever clear from the context, these subscripts will be omitted.
469: Information theoretic quantities like entropies and mutual
470: informations will be denoted following the usual conventions
471: of the information theory literature, e.g., $H(U^N)$, $I(X^n;Y^n)$,
472: and so on. For single--letter
473: information quantities (i.e., when $n=1$ or $N=1$), 
474: subscripts will be omitted, e.g., $H(U^1)=H(U_1)$ will
475: be denoted by $H(U)$, 
476: similarly, $I(X^1;Y^1)=I(X_1;Y_1)$ will be denoted by $I(X;Y)$, and so on.
477: 
478: \section{Problem Definition and Main Result for Step 1}
479: 
480: We now turn to the formal description 
481: of the model and the problem setting for step 1,
482: as described in the Introduction.
483: A source $P_X$, henceforth referred to as the 
484: {\it covertext source} or the {\it host source}, generates a sequence of
485: independent copies, $\{X_t\}_{t=-\infty}^{\infty}$, of a finite--alphabet RV,
486: $X\in\calX$. At the same time and independently,
487: another source $P_U$, henceforth referred to as the {\it message source}, 
488: or the {\it watermark source}, generates a sequence of
489: independent copies, $\{U_i\}_{i=-\infty}^{\infty}$, of a finite--alphabet RV,
490: $U\in\calU$. The relative rate between the message source and the covertext
491: source is $\lambda$ message symbols per covertext symbol. This means that
492: while the covertext 
493: source generates a block of $n$ symbols, say, $X^n=(X_1,\ldots,X_n)$,
494: the message source generates a block 
495: of $N=\lambda n$ symbols, $U^N=(U_1,\ldots,U_N)$
496: (assuming, without essential loss of 
497: generality, that $\lambda n$ is a positive integer).
498: In addition to the covertext source and 
499: the message source, yet another source, $P_K$,
500: henceforth referred to as the {\it key source},
501: generates a sequence of independent copies, 
502: $\{K_t\}_{t=-\infty}^{\infty}$, of a finite--alphabet RV,
503: $K\in\calK$, independently\footnote{The assumption 
504: of independence between $\{K_t\}$
505: and $\{X_t\}$ is temporary and made now primarily for 
506: the sake of simplicity of the exposition. It will
507: be dropped later on.}
508: of both $\{X_t\}$ and $\{U_i\}$. 
509: The key source is assumed to operate at the same
510: rate as the covertext source, that is, while 
511: the covertext source generates the block $X^n$
512: of length $n$,
513: the key source generates a block of $n$
514: symbols as well, $K^n=(K_1,\ldots,K_n)$.
515: 
516: Given $n$ and $\lambda$, a block code 
517: for {\it joint watermarking, encryption, and compression}
518: is a mapping $f_n:\calU^N\times\calX^n\times\calK^n\to\calY^n$, 
519: $N=\lambda n$, whose output
520: $y^n=(y_1,\ldots,y_n)=f_n(u^N,x^n,k^n)\in\calY^n$ 
521: is referred to as the {\it stegotext} or the
522: {\it composite signal}, and accordingly, the finite alphabet $\calY$ is
523: referred to as the {\it stegotext alphabet}. 
524: Let $d:\calX\times\calY\to \reals^+$ denote
525: a single--letter distortion measure between 
526: covertext symbols and stegotext symbols,
527: and let the distortion between the vectors, $x^n\in\calX^n$ and $y^n\in\calY^n$,
528: be defined additively across the corresponding components, as usual. 
529: 
530: An $(n,\lambda,D,R_c,h,\delta)$ code is a block code 
531: for joint watermarking, encryption, and compression,
532: with parameters $n$ and $\lambda$, that satisfies the following requirements:
533: \begin{itemize}
534: \item [1.] The expected distortion 
535: between the covertext and the stegotext satisfies
536: \begin{equation}
537: \sum_{t=1}^n Ed(X_t,Y_t)\le nD.
538: \end{equation}
539: \item [2.] The entropy of the stegotext
540: satisfies
541: \begin{equation}
542: H(Y^n)\le nR_c.
543: \end{equation}
544: \item [3.] The equivocation of the message source satisfies
545: \begin{equation}
546: H(U^N|Y^n)\ge Nh.
547: \end{equation}
548: \item [4.] There exists a
549: decoder $g_n:\calY^n\times\calK^n\to\calU^N$ such that
550: \begin{equation}
551: P_e\dfn\mbox{Pr}\{g_n(Y^n,K^n)\ne U^N\}\le \delta.
552: \end{equation}
553: \end{itemize}
554: For a given $\lambda$, a triple $(D,R_c,h)$ is said to be {\it achievable}
555: if for every $\epsilon > 0$, there is a sufficiently large $n$
556: for which 
557: $(n,\lambda,D+\epsilon,R_c+\epsilon,h-\epsilon,\epsilon)$ codes exist.
558: The {\it achievable region} of triples 
559: $(D,R_c,h)$ is the set of all achievable
560: triples $(D,R_c,h)$. For simplicity, it is assumed\footnote{At the end of Section 4 (after Theorem 4),
561: we discuss the case where this limitation (or its analogue in lossy reconstruction of $U^N$) is dropped.}
562: that $H(K)
563: \le\lambda H(U)$ as this upper limit on $H(K)$ suffices
564: to achieve perfect secrecy.
565: 
566: Our first coding theorem is the following:
567: \begin{theorem}
568: A triple $(D,R_c,h)$ is achievable if and only if the following conditions
569: are both satisfied:
570: \begin{itemize}
571: \item [(a)] $h \le H(K)/\lambda$.
572: \item [(b)] There exists a channel $\{P_{Y|X}(y|x),~x\in\calX,~y\in\calY\}$
573: such that: (i) $H(Y|X)\ge\lambda H(U)$, (ii) $R_c\ge\lambda H(U)+I(X;Y)$, and
574: (iii) $D\ge Ed(X,Y)$.
575: \end{itemize}
576: \end{theorem}
577: 
578: As can be seen, the encryption, on the one hand, and the embedding and the
579: compression, on the other hand, do not interact at all in this theorem.
580: There is a complete decoupling between them: While
581: condition (a) refers solely to the key and the secrecy of the watermark,
582: condition (b) is only about the embedding--compression 
583: part, and it is a replica of the conditions of 
584: the coding theorem in \cite{MM03}, where the role of the embedding rate, $R_e$
585: (see Introduction above),
586: is played by the product $\lambda H(U)$. This suggests a very simple separation 
587: principle, telling that in order to 
588: attain a given achievable triple $(D,R_c,h)$,
589: first compress the watermark $U^N$ to its entropy, then encrypt $Nh$ bits (out
590: of the $NH(U)$) of the compressed 
591: bit--string (by bit--by--bit XORing with the same number of
592: compressed key bits), and finally, embed this 
593: partially encrypted compressed bit--string into
594: the covertext, using the coding theorem of \cite{MM03} (again, 
595: see the Introduction
596: above for a brief description of this).
597: 
598: \section{Extensions to Steps 2 and 3}
599: 
600: Moving on to Step 2, we now relax requirement no.\ 4 in
601: the above definition of 
602: an $(n,\lambda,D,R_c,h,\delta)$ code, and allow a certain
603: distortion between $U^N$ and its reconstruction $\hat{U}^N$
604: at the legitimate decoder.
605: More precisely, let $\hat{\calU}$ denote a finite alphabet,
606: henceforth referred to as the {\it message reconstruction
607: alphabet}. Let $d':\calU\times\hat{\calU}\to\reals^+$ denote
608: a single--letter distortion measure between message symbols
609: and message reconstruction symbols, and let the distortion
610: between vectors $u^N\in\calU^N$ 
611: and $\hat{u}^N\in\hat{\calU}^N$ be again, defined additively
612: across the corresponding components. Finally, let $R_U(D')$
613: denote the rate--distortion function of the source $P_U$
614: w.r.t.\ $d'$, i.e.,
615: \begin{equation}
616: R_U(D')=\min\{I(U;\hU):~Ed'(U,\hU)\le D'\}.
617: \end{equation}
618: It will now be assumed that $H(K)\le\lambda R_U(D')$, for the
619: same reasoning as before.
620: 
621: Requirement no.\ 4 is now replaced by the
622: following requirement: 
623: There exists a
624: decoder $g_n:\calY^n\times\calK^n\to\hat{\calU}^N$ such that
625: $\hU^N=(\hU_1,\ldots,\hU_N)=g_n(Y^n,K^n)$ satisfies:
626: \begin{equation}
627: \label{distp}
628: \sum_{i=1}^NEd'(U_i,\hat{U}_i)\le ND'.
629: \end{equation}
630: In addition to this modification of requirement no.\ 4, we
631: add, to requirement no.\ 3, a specification regarding the
632: minimum allowed equivocation w.r.t.\ the reconstructed message:
633: \begin{equation}
634: H(\hU^N|Y^n)\ge Nh',
635: \end{equation}
636: in order to guarantee that the secrecy of the reconstructed message 
637: is also secure enough. Accordingly, we modify the above definition of a block
638: code as follows:
639: An $(n,\lambda,D,D',R_c,h,h')$ code is a block code 
640: for joint watermarking, encryption, and compression
641: with parameters $n$ and $\lambda$ that satisfies requirements 1--4,
642: with the above modifications of requirements 3 and 4.
643: For a given $\lambda$, a quintuple $(D,D',R_c,h,h')$ 
644: is said to be {\it achievable}
645: if for every $\epsilon > 0$, there is a sufficiently large $n$
646: for which 
647: $(n,\lambda,D+\epsilon,D'+\epsilon,R_c+\epsilon,h-\epsilon,h'-\epsilon)$ 
648: codes exist.
649: 
650: Our second theorem extends Theorem 1 to this setting:
651: 
652: \begin{theorem}
653: A quintuple $(D,D',R_c,h,h')$ is achievable 
654: if and only if the following conditions
655: are all satisfied:
656: \begin{itemize}
657: \item [(a)] $h\le H(K)/\lambda+H(U)-R_U(D')$.
658: \item [(b)] $h'\le H(K)/\lambda$.
659: \item [(c)] There exists a channel $\{P_{Y|X}(y|x),~x\in\calX,~y\in\calY\}$
660: such that: (i) $\lambda R_U(D')\le H(Y|X)$, 
661: (ii) $R_c\ge\lambda R_U(D')+I(X;Y)$, and
662: (iii) $D\ge Ed(X,Y)$.
663: \end{itemize}
664: \end{theorem}
665: 
666: As can be seen, the passage from Theorem 1 to Theorem 2 includes
667: the following modifications:
668: In condition (c), $H(U)$ is simply replaced by $R_U(D')$ as expected. 
669: This means that the lossless compression code of $U^N$, 
670: in the achievability of Theorem 1, is
671: now replaced by a rate--distortion code for distortion level $D'$.
672: Conditions (a) and (b) now tell us that the key rate (in terms of entropy)
673: should be sufficiently large to satisfy both equivocation requirements.
674: Note that the condition regarding the 
675: equivocation w.r.t.\ the clean message source is softer than
676: in Theorem 1 as $H(U)-R_U(D')\ge 0$. This is because the rate--distortion code
677: for $U^N$ already introduces an uncertainty of $H(U)-R_U(D')$ bits per
678: symbol, and so, the encryption 
679: should only complete it to the desired level of $h$
680: bits per symbol. This point is discussed in depth in \cite{Yamamoto97}.
681: Of course, by setting $D'=0$ (and hence also $h'=h$), we are back
682: to Theorem 1.
683: 
684: We also observe that the encryption and the embedding are still decoupled
685: in Theorem 2, and that an achievable quintuple can still be attained
686: by separation: First, apply a rate--distortion code to $U^N$, as mentioned
687: earlier, then encrypt $N\cdot\max\{h+R_U(D')-H(U),h'\}$ bits
688: of the compressed codeword (to satisfy both equivocation requirements),
689: and finally, embed the (partially) encrypted
690: codeword into $X^n$, again, by using the scheme of \cite{MM03}.
691: Note that without the encryption and without
692: requirement no.\ 2 of the compressibility of $Y^n$, this
693: separation principle is a special case of the one in
694: \cite{MS03}, where a separation theorem was established
695: for the Wyner--Ziv source (with SI correlated to the source at the decoder)
696: and the Gel'fand--Pinsker channel (with channel SI at the encoder).
697: Here, there is no SI correlated to the source
698: and the role of channel SI is fulfilled by the covertext.
699: Thus, the new observation here is that the separation theorem continues
700: to hold in the presence of encryption and requirement no.\ 2.
701: 
702: Finally, we turn to step 3, of including an attack channel (see Fig.\ 
703: \ref{gen}).
704: Let $\calZ$ be a finite alphabet, henceforth referred to as
705: the {\it forgery alphabet}, and 
706: let $\{P_{Z|Y}(z|y),~y\in\calY,~z\in\calZ\}$
707: denote a set of conditional PMF's from the stegotext alphabet to
708: the forgery alphabet. We now assume that the stegotext vector
709: is subjected to an attack modelled by the memoryless channel,
710: \begin{equation}
711: P_{Z^n|Y^n}(z^n|y^n)=\prod_{t=1}^n P_{Z|Y}(z_t|y_t).
712: \end{equation}
713: The output $Z^n$ of the attack channel will henceforth be referred to as
714: the {\it forgery}. 
715: 
716: It is now assumed 
717: and that the legitimate decoder has access to $Z^n$, 
718: rather than $Y^n$ (in addition, of course, to $K^n$).
719: Thus, in requirement no.\ 4, the
720: decoder is redefined again, this time,
721: as a mapping $g_n:\calZ^n\times\calK^n\to\hat{\calU}^N$ 
722: such that $\hat{U}^N=g_n(Z^n,K^n)$ satisfies
723: the distortion constraint (\ref{distp}). As for the equivocation requirements,
724: the conditioning will now be on both $Y^n$ and $Z^n$, i.e.,
725: \begin{equation}
726: H(U^N|Y^n,Z^n)\ge Nh~~~\mbox{and}~~~
727: H(\hU^N|Y^n,Z^n)\ge Nh',
728: \end{equation}
729: as if the
730: attacker and the eavesdropper are the same party (or if they cooperate),
731: then s/he may access both. In fact, 
732: for the equivocation of $U^N$, the conditioning on $Z^n$ is immaterial
733: since $U^N\to Y^n\to Z^n$ is always a Markov chain, but it is not clear
734: that $Z^n$ is superfluous for the equivocation w.r.t.\ $\hU^N$
735: since $Z^n$ is one of the inputs to the decoder whose output is $\hU^N$.
736: Nonetheless, for the sake of uniformity and convenience (in the proof),
737: we keep the conditioning on $Z^n$ in both equivocation criteria.
738: 
739: Redefining block codes and achievable quintuples $(D,D',R_C,h,h')$ 
740: according to the modified requirements in the same spirit, we now
741: have the following coding theorem, which is substantially
742: different from Theorems 1 and 2:
743: 
744: \begin{theorem}
745: A quintuple $(D,D',R_c,h,h')$ is achievable 
746: if and only if there exist RV's $V$ and $Y$ such that
747: $P_{KXVYZ}(k,x,v,y,z)=P_X(x)P_K(k)P_{VY|KX}(v,y|k,x)P_{Z|Y}(z|y)$,
748: where the alphabet size of $V$ is bounded by $|\calV|\le
749: |\calK|\cdot|\calX|\cdot|\calY|+1$, and such that the following
750: conditions are all satisfied:
751: \begin{itemize}
752: \item [(a)] $h\le H(K|Y)/\lambda+H(U)-R_U(D')$.
753: \item [(b)] $h'\le H(K|Y)/\lambda$.
754: \item [(c)] $\lambda R_U(D')\le I(V;Z|K)-I(V;X|K)$.
755: \item [(d)] $R_c\ge \lambda R_U(D')+I(X;Y,V|K)+I(K;Y)$.
756: \item [(e)] $D \ge Ed(X,Y)$.
757: \end{itemize}
758: \end{theorem}
759: 
760: First, observe that here, unlike in Theorems 1 and 2, it is no longer
761: true that the encryption and the embedding (along with stegotext compression) 
762: are decoupled, yet
763: the rate--distortion compression of $U^N$ is still separate and decoupled from both.
764: In other words, the separation principle applies here in partial manner only.
765: Note that now, although $K$ is still assumed independent of $X$,
766: it may, in general, depend on $Y$. On the negative side, 
767: this dependence causes a reduction
768: in the equivocation of both the message source and its reconstruction,
769: and therefore $H(K|Y)$ replaces $H(K)$ in conditions (a) and (b).
770: On the positive side, on the
771: other hand, this dependence introduces new degrees of freedom in
772: enhancing the tradeoffs between the embedding performance
773: (condition (c)) and the compressibility (condition (d)). 
774: 
775: The achievability of Theorem 3 involves essentially
776: the same stages as before (rate--distortion coding of $U^N$, followed by
777: encryption, followed in turn by embedding), but this time,
778: the embedding scheme is a conditional version of the one proposed in
779: \cite{MM04}, where all codebooks depend on $K^n$, the SI given at
780: both ends (see Fig.\ \ref{dir}). 
781: An interesting point regarding the encryption is that
782: one needs to generate, from $K^n$, essentially $nH(K|Y)$ random bits that
783: are {\it independent} of $Y^n$ (and $Z^n$), in order to protect the
784: secrecy against an eavesdropper that observes $Y^n$ and $Z^n$.
785: Clearly, if $Y^n$ was given in advance to the encrypting unit, then
786: the compressed bitstring of an optimal lossless
787: source code that compresses $K^n$, given $Y^n$ as SI, would have
788: this property (as if there was any dependence, then this bitstring could have
789: been further compressed, which is a contradiction). 
790: However, such a source code 
791: cannot be implemented
792: since $Y^n$ itself is generated 
793: from the encrypted message, i.e., {\it after}
794: the encryption. In other words, this would 
795: have required a circular mechanism, which may not be feasible.
796: A simple remedy is then to use a 
797: {\it Slepian--Wolf encoder} \cite{SW73}, that generates
798: $nH(K|Y)$ bits that are essentially 
799: independent of $Y^n$ (due to the same consideration), 
800: without the need to access the
801: vector $Y^n$ to be generated.
802: For more details, the reader is referred to the 
803: proof of the direct part (Section 6).
804: 
805: Observe that in the absence of attack (i.e., $Z=Y$),
806: Theorem 2 is obtained as a special case 
807: of Theorem 3 by choosing $V=Y$ and letting
808: both be independent of $K$, a choice which is simultaneously the best for
809: conditions (a)--(d) of Theorem 3. To see this, note the following simple
810: inequalities:
811: In conditions (a) and (b), $H(K|Y)\le H(K)$. In condition (c), 
812: by setting $Z=Y$, we have
813: \begin{eqnarray}
814: I(V;Y|K)-I(V;X|K)&\le&I(V;X,Y|K)-I(V;X|K)\nonumber\\
815: &=&I(V;Y|X,K)\nonumber\\
816: &\le&H(Y|X,K)\nonumber\\
817: &\le&H(Y|X).
818: \end{eqnarray}
819: Finally in condition (d), clearly, $I(K;Y)\ge 0$ and 
820: since $X$ is independent of $K$, then
821: $I(X;Y,V|K)=I(X;Y,V,K)\ge I(X;Y)$. Thus, for $Z=Y$, the achievable region
822: of Theorem 3 is a subset of the one given in Theorem 2. However, since
823: all these inequalities become equalities
824: at the same time by choosing $V=Y$ and letting both be independent of $K$,
825: the two regions are identical in the attack--free case.
826: 
827: Returning now to Theorem 3, as we observed,
828: $K^n$ is now involved not only in the role 
829: of a cipher key, but also as SI available at both encoder and decoder.
830: Two important points are now in order, in view of this fact.
831: 
832: First, one may argue that, actually,
833: there is no real reason to assume that $K^n$ is necessarily independent
834: of $X^n$ (see also \cite{MO03}).
835: If the user has control of the mechanism of generating the key,
836: then s/he might implement, in general, a channel 
837: $P_{K^n|X^n}(k^n|x^n)$ using the
838: available randomness resources, and taking (partial) advantage of the
839: randomness of the covertext. Let us 
840: assume that this channel is stationary and memoryless, i.e.,
841: \begin{equation}
842: P_{K^n|X^n}(k^n|x^n)=\prod_{t=1}^n P_{K|X}(k_t|x_t)
843: \end{equation}
844: with the single--letter transition probabilities
845: $\{P_{K|X}(k|x)~x\in\calX,~k\in\calK\}$ left as a degree of freedom
846: for design. While so far, we assumed that $K$ was independent of $X$,
847: the other extreme is, of course, $K=X$ (corresponding to private
848: watermarking). Note, however, that in the attack--free case, in the absence of
849: the compressibility requirement no.\ 2 (say, $R_c=\infty$), no optimality
850: is lost by assuming that $K$ is independent of $X$, since the only
851: inequality where we have used the independence assumption, in the previous
852: paragraph, corresponds to condition (d).
853: 
854: The second point is that in Theorems 1--3, so far, we have defined
855: the compressibility of the stegotext in terms of $H(Y^n)$, which is
856: suitable when the decompression of $Y^n$ is {\it public}, i.e., without
857: access to $K^n$. The legitimate decoder in our model, on the other hand,
858: has access to the SI $K^n$, which may depend on $Y^n$. In this context,
859: it then makes sense
860: to measure the compressibility of the stegotext 
861: also in a {\it private} regime,
862: i.e., in terms of the {\it conditional} entropy, $H(Y^n|K^n)$.
863: 
864: Our last (and most general) version of the 
865: coding theorem below takes these two points in to account.
866: Specifically, let us impose, in requirement no.\ 2, an additional inequality,
867: \begin{equation}
868: H(Y^n|K^n)\le nR_c', 
869: \end{equation}
870: where $R_c'$ is a prescribed constant, and let us redefine accordingly
871: the block codes and the achievable region in terms of six--tuples 
872: $(D,D',R_c,R_c',h,h')$. We now have the following result:
873: 
874: \begin{theorem}
875: A six--tuple $(D,D',R_c,R_c',h,h')$ is achievable 
876: if and only if there exist RV's $V$ and $Y$ such that
877: $P_{KXVYZ}(k,x,v,y,z)=P_{XK}(x,k)P_{VY|KX}(v,y|k,x)P_{Z|Y}(z|y)$,
878: where the alphabet size of $V$ is bounded by $|\calV|\le
879: |\calK|\cdot|\calX|\cdot|\calY|+1$, and such that the following
880: conditions are all satisfied:
881: \begin{itemize}
882: \item [(a)] $h\le H(K|Y)/\lambda+H(U)-R_U(D')$.
883: \item [(b)] $h'\le H(K|Y)/\lambda$.
884: \item [(c)] $\lambda R_U(D')\le I(V;Z|K)-I(V;X|K)$.
885: \item [(d)] $R_c\ge \lambda R_U(D')+I(X;Y,V|K)+I(K;Y)$.
886: \item [(e)] $R_c'\ge \lambda R_U(D')+I(X;Y,V|K)$.
887: \item [(f)] $D \ge Ed(X,Y)$.
888: \end{itemize}
889: \end{theorem}
890: Note that
891: the additional condition, (e), is similar to condition (d) except for
892: the term $I(K;Y)$. Also, in the joint PMF of $(K,X,V,Y,Z)$
893: we are no longer assuming that $K$ and $X$ are independent.
894: It should be pointed out that
895: in the presence of the new requirement regarding $H(Y^n|K^n)$,
896: it is more clear now that introducing dependence of $(V,Y)$ upon $K$
897: is reasonable, in general.
898: In the case $K=X$,
899: that was mentioned earlier,
900: the term $I(V;X|K)$, in condition (c),
901: and the term $I(X;Y,V|K)$, in conditions (d) and (e), both vanish.
902: Thus, both embedding performance and compression 
903: performance improve, like in private watermarking.
904: 
905: Finally, a comment is in order regarding the assumption $H(K)\le\lambda R_U(D')$,
906: which implies that $H(K|Y)$ cannot exceed $\lambda R_U(D')$ either.
907: If this assumption is removed, and even $H(K|Y)$ is allowed to exceed $\lambda R_U(D')$,
908: then Theorem 4 can be somewhat further extended. While $h$ cannot be further improved
909: if $H(K|Y)$ is allowed to exceed $\lambda R_U(D')$ (as it already reaches the maximum possible
910: value, $h=H(U)$, for $H(K|Y)=\lambda R_U(D')$), it turns out that there is still room for
911: improvement in $h'$. Suppose that instead of one rate--distortion codebook for $U^N$, we have
912: many {\it disjoint} codebooks. In fact, it has been shown in \cite{MM03} that there are exponentially
913: $2^{NH(\hU|U)}$ disjoint codebooks, 
914: each covering the set of typical source sequences by jointly typical
915: codewords. Now, if $H(K|Y) > \lambda R_U(D')$, we can use the $T=nH(K|Y)-NR_U(D')$ excess bits
916: of the compressed key
917: (beyond the $NR_U(D')$ bits that are used to encrypt the 
918: binary of representation of $\hU^N$), so as to select one of
919: $2^T$ codebooks (as long as $T < NH(\hU|U)$), and thus reach a total equivocation of $nH(K|Y)$ as long
920: as $nH(K|Y)\le NH(\hU)$, or equivalently, $H(K|Y)\le\lambda H(\hU)$. The equivocation level 
921: $h'=H(\hU)$ is now the ``saturation value'' that cannot be further improved (in analogy to $h=H(U)$
922: for the original source). This means that
923: condition (b) of Theorem 4 would now be replaced by the condition
924: \begin{equation}
925: \label{13}
926: h'\le \min\{H(\hU),H(K|Y)/\lambda\}.
927: \end{equation}
928: But with this condition, it is no longer clear that the best test channel for lossy compression of
929: $U^N$ is the one that achieves $R_U(D')$, because for the above modified version of condition (b),
930: it would be best to have $H(\hU)$ as large as possible (as long as it is below $H(K|Y)/\lambda$),
931: which is in partial conflict with the minimization 
932: of $I(U;\hU)$ that leads to $R_U(D')$. Therefore, a restatement of Theorem 4 would
933: require the existence of a channel $\{P_{\hU|U}(\hu|u),~u\in\calU,~\hu\in\hat{\calU}\}$ (in addition to
934: the existing requirement of a channel $P_{VY|KX}$), such that
935: the random variable $\hU$ takes now part in the 
936: compromise among {\it all} criteria of the problem. This means
937: that in conditions (a),(c),(d), and (e) of Theorem 4, 
938: $R_U(D')$ should be replaced by $I(U;\hU)$, and there would be an
939: additional condition (g): $Ed'(U,\hU)\le D'$. Condition (a), in view of the earlier discussion above,
940: would now be of the form:
941: \begin{equation}
942: \label{14}
943: h\le \min\{H(U),H(K|Y)/\lambda+H(U)-I(U;\hU)\}\equiv H(U)-[I(U;\hU)-H(K|Y)/\lambda]_+,
944: \end{equation}
945: where $[z]_+\dfn\max\{0,z\}$. 
946: Of course, under the assumption $H(K)\le\lambda R_U(D')$, that we have used thus far, 
947: \begin{equation}
948: H(\hU)\ge I(U;\hU)\ge R_U(D')\ge H(K)/\lambda\ge H(K|Y)/\lambda,
949: \end{equation}
950: in other words, $\min\{H(\hU),H(K|Y)/\lambda\}$ is always
951: attained by $H(K|Y)/\lambda$, and so, the dependence on $H(\hU)$ disappears, which means that the best
952: choice of $\hU$ (for all other conditions) is back to be the one that minimizes $I(U;\hU)$,
953: which gives us Theorem 4 as is. 
954: 
955: It is interesting to point out that 
956: this additional extension gives rise to yet 
957: another step in the direction of invalidating
958: the separation principle: While in Theorem 4 only the encryption and the
959: embedding interacted, yet the rate--distortion coding of $U^N$ was still
960: independent of all other ingredients
961: of the system, here even this is no longer true, as the choice
962: of the test channel $P_{\hU|U}$ takes into account also compromises that
963: are associated with the encryption and the embedding. 
964: 
965: Note that this discussion
966: applies also to the {\it classical} joint source--channel coding, where there is no
967: embedding at all: In this case, $X$ is a degenerate RV (say, $X\equiv 0$, if $0\in\calX$), and so,
968: the mutual information terms depending on $X$ in conditions (c), (d) and (e), all
969: vanish, the best choice of $V$ is $V=Y$ (thus, the r.h.s in condition (c) becomes the capacity
970: of the channel $P_{Z|Y}$ with $K$ as SI at both ends), 
971: and condition (f) may be interpreted as a (generalized) power
972: constraint (with power function $\phi(y)=d(0,y)$). Nonetheless, the new versions of conditions (a) and (b)
973: remain the same as in eqs.\ (\ref{13}) 
974: and (\ref{14}). This is to say that the violation of the separation principle
975: occurs even in the classical model of a communication system, 
976: once security becomes an issue
977: and one is interested in the security of the reconstructed source. 
978: 
979: \section{Proof of the Converse Part of Theorem 4}
980: 
981: Let an $(n,\lambda,D+\epsilon,D'+\epsilon,R_c+\epsilon,
982: R_c'+\epsilon,h-\epsilon,h'-\epsilon)$ code be given.
983: First, from the requirement $H(Y^n|K^n)\le n (R_c'+\epsilon)$, we have:
984: \begin{eqnarray}
985: n(R_c'+\epsilon) &\ge& H(Y^n|K^n)\label{1st}\\
986: &=&H(Y^n|U^N,K^n)+I(U^N;Y^n|K^n)\nonumber\\
987: &\ge&H(Y^n|U^N,K^n)+I(U^N;Z^n|K^n)\nonumber\\
988: &=&H(Y^n|U^N,K^n)+I(U^N;Z^n,K^n) \label{2nd}
989: \end{eqnarray}
990: where the second inequality comes from the data processing theorem
991: ($U^N\to Y^n\to Z^n$ is a Markov chain given $K^n$)
992: and the last equality comes from the 
993: chain rule and the fact that $U^N$ and $K^n$
994: are independent. Define
995: $\tilde{V}_t=(X_{t+1}^n,U^N,K^{t-1},Z^{t-1})$, 
996: $J$ -- as a uniform RV over $\{1,\ldots,n\}$, $X=X_J$, $K=K_J$, $Y=Y_J$,
997: $V'=\tilde{V}_J$, and $V=(\tilde{V}_J,J)=(V',J)$.
998: Now, the first term on the 
999: right--most side of eq.\ (\ref{2nd}) is further lower bounded 
1000: in the following manner. 
1001: \begin{eqnarray}
1002: H(Y^n|U^N,K^n)&\ge&I(X^n;Y^n|U^N,K^n)\nonumber\\
1003: &=&I(X^n;Y^n,U^N,K^n)-I(X^n;U^N,K^n)\nonumber\\
1004: &=&\sum_{t=1}^n I(X_t;Y^n,U^N,K^n|X_{t+1}^n)-I(X^n;K^n)\label{xp1}\\
1005: &=&\sum_{t=1}^n I(X_t;Y^n,U^N,K^n,X_{t+1}^n)-nI(X;K)\label{xp2}\\
1006: &\ge&\sum_{t=1}^n 
1007: I(X_t;K_t,Y_t,U^N,K^{t-1},Z^{t-1},X_{t+1}^n)-nI(X;K)\label{xp3}\\
1008: &=&\sum_{t=1}^n I(X_t;K_t,Y_t,\tilde{V}_t)-nI(X;K)\nonumber\\
1009: &=&n[I(X;K,Y,V'|J)-I(X;K)]\nonumber\\
1010: &=&n[I(X;K,Y,V',J)-I(X;K)]\label{xp4}\\
1011: &=&nI(X;Y,V|K)\label{4th}
1012: \end{eqnarray}
1013: where (\ref{xp1}) is due to the chain 
1014: rule and fact that $(X^n,K^n)$ is independent of
1015: $U^N$ (hence $U^N\to K^n\to X^n$ is trivially a Markov chain),
1016: (\ref{xp2}) is due to the memorylessness of $\{(X_t,K_t)\}$,
1017: (\ref{xp3}) is due to the data processing theorem,
1018: and (\ref{xp4}) follows from the fact that $\{X_t\}$ is stationary
1019: and so, $X=X_J$ is independent of $J$.
1020: The second term on the right--most 
1021: side of eq.\ (\ref{2nd}) is in turn lower bounded following
1022: essentially the same ideas as in the proof of the converse
1023: to the rate--distortion coding theorem (see, e.g., \cite{CT91}):
1024: \begin{eqnarray}
1025: I(U^N;Z^n,K^n)&=&H(U^N)-H(U^N|Z^n,K^n)\nonumber\\
1026: &=&\sum_{i=1}^N[H(U_i)-H(U_i|U^{i-1},Z^n,K^n)]\nonumber\\
1027: &=&\sum_{i=1}^N I(U_i;U^{i-1},Z^n,K^n)\nonumber\\
1028: &\ge&\sum_{i=1}^N I(U_i;[g_n(Z^n,K^n)]_i)\nonumber\\
1029: &\ge&\sum_{i=1}^N R_U(Ed'(U_i,[g_n(Z^n,K^n)]_i))\nonumber\\
1030: &\ge&NR_U\left(\frac{1}{N}\sum_{i=1}^N
1031: Ed'(U_i,[g_n(Z^n,K^n)]_i)\right)\nonumber\\
1032: &\ge&NR_U(D'+\epsilon),\label{5th}
1033: \end{eqnarray}
1034: where $[g_n(Z^n,K^n)]_i$ denotes the $i$-th component projection
1035: of $g_n(Z^n,K^n)$, i.e., $\hU_i$ as a function of $(Z^n,K^n)$.
1036: Combining eqs.\ (\ref{2nd}), (\ref{4th}), and (\ref{5th}), we get
1037: \begin{equation}
1038: n(R_c'+\epsilon)\ge NR_U(D'+\epsilon)+nI(X;Y,V|K).
1039: \end{equation}
1040: Dividing by $n$, we get 
1041: \begin{equation}
1042: \label{6th}
1043: R_c'+\epsilon\ge \lambda R_U(D'+\epsilon)+I(X;Y,V|K).
1044: \end{equation}
1045: Using the arbitrariness of $\epsilon$ together
1046: with the continuity of 
1047: $R_U(\cdot)$, we get condition (e) of Theorem 4.
1048: 
1049: Condition (d) is derived in the very same manner except that
1050: the starting point is the inequality $n(R_c+\epsilon)\ge H(Y^n)$,
1051: and when $H(Y^n)$ is further bounded from below, in analogy
1052: to the chain of inequalities (\ref{2nd}), there is 
1053: an additional term, $I(K^n;Y^n)$,
1054: that is in turn
1055: lower bounded in the following manner:
1056: \begin{eqnarray}
1057: I(K^n;Y^n)&\ge&\sum_{t=1}^n I(K_t;Y_t)\nonumber\\
1058: &=&nI(K;Y|J)\nonumber\\
1059: &=&n[H(K|J)-H(K|J,Y)]\nonumber\\
1060: &\ge&n[H(K)-H(K|Y)]\nonumber\\
1061: &=&nI(K;Y),
1062: \end{eqnarray}
1063: where the first inequality is because of the memorylessness of $\{K_t\}$,
1064: and the second inequality comes from the facts that 
1065: conditioning reduces entropy (in the second term) 
1066: and that $K$ is independent of
1067: $J$ (again, due to the stationarity of $\{K_t\}$).
1068: This gives the additional term, $I(K;Y)$, in condition (d).
1069: 
1070: Condition (c) is obtained as follows:
1071: \begin{eqnarray}
1072: NR_U(D'+\epsilon)&\le&I(U^N;K^n,Z^n)\nonumber\\
1073: &=&I(U^N;K^n,Z^n)-I(U^N;K^n,X^n)\nonumber\\
1074: &\le&\sum_{t=1}^n[I(\tilde{V}_t;K_t,Z_t)-I(\tilde{V}_t;K_t,X_t)]\\
1075: &=&n[I(V';K,Z|J)-I(V';K,X|J)]\nonumber\\
1076: &\le&n[I(V',J;K,Z)-I(V',J;K,X)]\\
1077: &=&n[I(V;K,Z)-I(V;K,X)]\nonumber\\
1078: &=&n[I(V;Z|K)-I(V;X|K)],
1079: \end{eqnarray}
1080: where 
1081: the first inequality is (\ref{5th}),
1082: the first equality is due to the independence between $U^N$ and $(K^n,X^n)$,
1083: the second inequality is 
1084: an application of \cite[Lemma 4]{GP80},
1085: the third inequality is due to the fact 
1086: that $I(K,Z;J)\ge 0$ and $I(K,X;J)=0$ (due to the
1087: stationarity of $\{(K_t,X_t)\}$), and 
1088: the last equality is obtained by adding and subtracting
1089: $I(V;K)$. Again, since this is true for every $\epsilon > 0$,
1090: it holds also for $\epsilon=0$, due to continuity. 
1091: 
1092: As for condition (f), we have:
1093: \begin{equation}
1094: D+\epsilon\ge\frac{1}{n}\sum_{t=1}^nEd(X_t,Y_t)=Ed(X,Y),
1095: \end{equation}
1096: and we use once again the arbitrariness of $\epsilon$.
1097: Regarding condition (b), we have:
1098: \begin{eqnarray}
1099: nH(K|Y)&\ge&nH(K|Y,J)\nonumber\\
1100: &=&\sum_{t=1}^nH(K_t|Y_t)\nonumber\\
1101: &\ge&\sum_{t=1}^nH(K_t|K^{t-1},Y^n)\nonumber\\
1102: &=& H(K^n|Y^n)\nonumber\\
1103: &=& H(K^n|Y^n,Z^n)\nonumber\\
1104: &\ge&I(K^n;\hat{U}^N|Y^n,Z^n)\nonumber\\
1105: &=&H(\hat{U}^N|Y^n,Z^n)-H(\hat{U}^N|Y^n,Z^n,K^n)\nonumber\\
1106: &=&H(\hat{U}^N|Y^n,Z^n)\nonumber\\
1107: &\ge&N(h'-\epsilon),
1108: \end{eqnarray}
1109: where the last equality is due to the fact that $\hat{U}^N$
1110: is, by definition, a function of $(Z^n,K^n)$, and the last
1111: inequality is by the hypothesis that the code achieves an equivocation of
1112: at least $N(h'-\epsilon)$. Dividing by $N$ 
1113: and taking the limit $\epsilon\to 0$,
1114: leads to  
1115: $h'\le H(K|Y)/\lambda$, which is condition (b).
1116: Finally, to prove condition (a), consider the inequality 
1117: $nH(K|Y)\ge H(\hat{U}^N|Y^n,Z^n)$, that we have just proved,
1118: and proceed as follows (see also \cite{Yamamoto97}):
1119: \begin{eqnarray}
1120: \label{8th}
1121: nH(K|Y)&\ge&H(\hat{U}^N|Y^n,Z^n)\nonumber\\
1122: &\ge&H(\hat{U}^N|Y^n,Z^n)+N(h-\epsilon)-H(U^N|Y^n,Z^n)\nonumber\\
1123: &=&N(h-\epsilon)-H(U^N)+I(U^N;Y^n,Z^n)-\nonumber\\
1124: & &I(\hat{U}^N;Y^n,Z^n)+I(\hat{U}^N;U^N)+H(\hat{U}^N|U^N)\nonumber\\
1125: &\ge&N[h-\epsilon-H(U)+R_U(D'+\epsilon)]+\nonumber\\
1126: & &[I(U^N;Y^n,Z^n)-I(\hat{U}^N;Y^n,Z^n)+H(\hat{U}^N|U^N)],
1127: \end{eqnarray}
1128: where the second inequality follows from the hypothesis that
1129: the code satisfies $H(U^N|Y^n,Z^n)\ge N(h-\epsilon)$,
1130: and the third inequality is due to the memorylessness of $\{U_i\}$, the
1131: hypothesis that $\sum_{i=1}^NEd'(U_i,\hU_i)\le N(D'+\epsilon)$,
1132: and the converse to the rate--distortion coding theorem.
1133: Now, to see that the second bracketed term is non--negative, we have the
1134: following chain of inequalities:
1135: \begin{eqnarray}
1136: \label{bra}
1137: && I(U^N;Y^n,Z^n)-I(\hat{U}^N;Y^n,Z^n)+H(\hat{U}^N|U^N)\nonumber\\
1138: &=&I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n|\hat{U}^N)
1139: +H(\hat{U}^N|U^N)\nonumber\\
1140: &\ge& I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n|U^N,\hat{U}^N)
1141: +H(\hat{U}^N|U^N)\nonumber\\
1142: &=& I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n,\hat{U}^N|U^N)\nonumber\\
1143: &\ge& I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n|U^N)\nonumber\\
1144: &=& 0.
1145: \end{eqnarray}
1146: Combining this with eq.\ (\ref{8th}), we have
1147: \begin{equation}
1148: nH(K|Y)\ge
1149: N[h-\epsilon-H(U)+R_U(D'+\epsilon)].
1150: \end{equation}
1151: Dividing again by $N$, and letting $\epsilon$ vanish, we obtain 
1152: $h\le H(K|Y)/\lambda+H(U)-R_U(D')$, which 
1153: completes the proof of condition (a).
1154: 
1155: To complete the proof of the converse part, it remains to show that the
1156: alphabet size of $V$ can be reduced to $|\calK|\cdot|\calX|\cdot|\calY|+1$.
1157: To this end, we extend the proof of the parallel argument in \cite{MM04}
1158: by using the support lemma (cf.\ \cite{CK81}), which is based on
1159: Carath\'{e}odory's theorem. According to this lemma, given $J$ real
1160: valued continuous functionals $f_{j}$, $j=1,...,J$ on the set
1161: $\calP(\calX)$ of probability distributions over the alphabets
1162: $\calX$, and given any probability measure $\mu$ on the Borel
1163: $\sigma$-algebra of $\calP(\calX)$, there exist $J$ elements
1164: $Q_{1},...,Q_{J}$ of $\calP(\calX)$ and $J$ non-negative reals,
1165: $\alpha_{1},...,\alpha_{J}$, such that
1166: $\sum_{j=1}^{J}\alpha_{j}=1$ and for every $j=1,...,J$
1167: \begin {eqnarray}
1168:     \int_{\calP(\calX)}f_{j}(Q)\mu(dQ) =
1169:     \sum_{i=1}^{J}\alpha_{i}f_{j}(Q_{i}).
1170: \end {eqnarray}
1171: Before we actually apply the support lemma, we first rewrite the
1172: relevant mutual informations of Theorem 4 in a more convenient
1173: form for the use of this lemma. First, observe that
1174: \begin {eqnarray}
1175:     I(V;Z|K)-I(V;X|K)   & = & H(Z|K)-H(Z|V,K) - H(X|K) + H(X|V,K)\nonumber\\
1176:                         & = & H(Z|K)-H(X|K) + H(K,X|V)-H(K,Z|V).
1177: \end {eqnarray}
1178: and 
1179: \begin {eqnarray}
1180: I(X;Y,V|K)  & = & I(X;V|K) + I(X;Y|V,K) \\
1181:                         & = & H(X|K) - H(X|V,K) +
1182:                         H(X|V,K)-H(X|V,Y,K) \nonumber \\
1183:                         & = & H(X|K)-H(X|V,Y,K) \nonumber \\
1184:                         & = & H(X|K)-H(K,X,Y|V)+H(K,Y|V).
1185: \end {eqnarray}
1186: For a given joint distribution of 
1187: $(K,X,Y)$, and given $P_{Z|Y}$, $H(Z|K)$ and $H(X|K)$ are
1188: both given and unaffected by $V$. Therefore, in order to preserve
1189: prescribed values of $I(V;Z|K)-I(V;X|K)$ and $I(X;V,Y|K)$, it is
1190: sufficient to preserve the associated values $H(K,X|V) - H(K,Z|V)$
1191: and $H(K,X,Y|V) - H(K,Y|V)$.
1192: Let us define then the following functionals of a generic
1193: distribution $Q$ over $\calK\times\calX\times\calY$, 
1194: where $\calK\times\calX \times
1195: \calY$ is assumed, without loss of generality, to be
1196: $\{1,2,...,m\}$, $m = |\calK|\cdot|\calX|\cdot|\calY|$:
1197: \begin{eqnarray}
1198:     &&f_{i}(Q) = Q(k,x,y), ~~~i\dfn (k,x,y)=1,...,m-1\\
1199:     &&f_{m}(Q) =
1200:     \sum_{k,x,y}Q(k,x,y)\sum_{z}P_{Z|Y}(z|y)
1201: \log\frac{\sum_{x,y}Q(k,x,y)P_{Z|Y}(z|y)}{Q(k,x)}.
1202: \end{eqnarray}
1203: Next define
1204: \begin{eqnarray}
1205:     &&f_{m+1}(Q) =
1206:     \sum_{k,x,y}Q(k,x,y)\log\frac{Q(k,y)}{Q(k,x,y)}.
1207: \end{eqnarray}
1208: Applying now the support lemma, we find that there exists a random
1209: variable $V$ (jointly distributed with $(K,X,Y)$), whose alphabet
1210: size is $|\calV| = m+1 = |\calK|\cdot|\calX|\cdot|\calY|+1$ and it satisfies
1211: simultaneously:
1212: \begin{eqnarray}
1213:     \sum_{v}\Pr\{V = v\}f_{i}(P(\cdot|v)) = P_{KXY}(k,x,y), \textrm{ }
1214:     i=1,...,m-1,
1215: \end{eqnarray}
1216: \begin{eqnarray}
1217:     \sum_{v}\Pr\{V = v\}f_{m}(P(\cdot|v)) = H(K,X|V) - H(K,Z|V),
1218: \end{eqnarray}
1219: and
1220: \begin{eqnarray}
1221:     \sum_{u}\Pr\{V = v\}f_{m+1}(P(\cdot|v)) = H(K,X,Y|V) -
1222:     H(K,Y|V).
1223: \end{eqnarray}
1224: It should be pointed out that this random variable maintains the
1225: prescribed distortion level $Ed(X,Y)$ since 
1226: $P_{XY}$ is preserved. By the same token, $H(K|Y)$ and $I(K;Y)$, which depend
1227: only on $P_{KY}$, are preserved as well.
1228: This completes the proof of the
1229: converse part of Theorem 4.
1230: 
1231: \section{Proof of the Direct Part of Theorem 4}
1232: 
1233: In this section, we show that if there exist RV's $(V,Y)$ that satisfy the
1234: conditions of Theorem 4, then for every $\epsilon > 0$,
1235: there is a sufficiently large $n$ for which 
1236: $(n,\lambda,D+\epsilon,D'+\epsilon,R_c+\epsilon,
1237: R_c'+\epsilon,h-\epsilon,h'-\epsilon)$ codes exist. 
1238: One part of the proof is strongly based
1239: on a straightforward extension of the proof of the direct part of 
1240: \cite{MM04} to the case 
1241: of additional SI present at both encoder and decoder. Nonetheless,
1242: for the sake of completeness, the full details are provided here.
1243: It should be pointed out that for the attack--free case, an analogous
1244: extension can easily be offered to the direct part of \cite{MM03}.
1245: 
1246: We first digress to establish some additional notation
1247: conventions associated with the method of types \cite{CK81}. For a
1248: given generic 
1249: finite--alphabet random variable (RV) $A \in \calA$ (or a vector of
1250: RV's taking on values in $\calA$), and a vector $a^\ell \in
1251: \calA^{\ell}$ ($\ell$ -- positive integer), 
1252: the empirical probability mass function (EPMF) is a
1253: vector $P_{a^\ell}=\{P_{a^\ell}(a'),~a' \in \calA\}$, where $P_{a^\ell}(a')$ is
1254: the relative frequency of the letter $a' \in \calA$ in the vector
1255: $a^\ell$. Given $\delta > 0$, let us denote the set of all
1256: $\delta$-typical sequences of length $\ell$ by
1257: $T_{P_A}^\delta$, or by $T_A^\delta$ 
1258: (if there is no ambiguity regarding the 
1259: PMF that governs $A$), i.e., $T_A^\delta$ 
1260: is the set of the sequences $a^\ell \in
1261: \calA^\ell$ such that
1262: \begin{equation}
1263: \label{Px}
1264:     (1-\delta)P_{A}(a') \leq P_{a^\ell}(a') \leq (1+\delta)P_{A}(a')
1265: \end{equation}
1266: for every $a' \in \calA$. For sufficiently large $\ell$,
1267: the size of $T_A^\delta$ is well--known \cite{CK81} to be bounded by
1268: \begin {equation}
1269: \label{TgxSize}
1270:     2^{\ell[(1-\delta)H(A)-\delta]} \leq
1271:     |T_A^\delta| \leq 2^{\ell(1+\delta)H(A)}.
1272: \end{equation}
1273: It is also well--known (by the weak law of large numbers)
1274: that:
1275: \begin{equation}
1276: \label{PrTgx}
1277:     \Pr \big\{ A^\ell \notin T_A^\delta \big\} \leq \delta
1278: \end{equation}
1279: for all $\ell$ sufficiently large.
1280: For a given generic channel $P_{B|A}(b|a)$ and for each $a^\ell \in
1281: T_A^\delta$, the set 
1282: of all sequences $b^l$ that are jointly
1283: $\delta$-typical with $a^\ell$, will be denoted by
1284: $T_{P_{B|A}}^\delta(a^\ell)$, 
1285: or by $T_{B|A}^\delta(a^\ell)$ if there is no ambiguity, i.e.,
1286: $T_{B|A}^\delta(a^\ell)$ is the set of all $b^\ell$ such that:
1287: \begin{equation}
1288: \label{Pygx}
1289:     (1-\delta)P_{a^\ell}(a')P_{B|A}(b'|a') \leq P_{a^\ell b^\ell}(a',b') \leq
1290:     (1+\delta)P_{a^\ell}(a')P_{B|A}(b'|a'),
1291: \end{equation}
1292: for all $a'\in \calA, b'\in \calB$, where $P_{a^\ell b^\ell}(a',b')$
1293: denotes the fraction of occurrences of the pair $(a',b')$ in
1294: $(a^\ell,b^\ell)$. Similarly as in eq.\ (\ref{Px}), for all sufficiently large $\ell$ and
1295: $a^\ell \in T_A^\delta$, the size of $T_{B|A}^\delta(a^\ell)$ is bounded as follows:
1296: \begin {equation}
1297: \label{TgyxSize}
1298:     2^{\ell[(1-\delta)H(B|A)-\delta]} \leq
1299:     |T_{B|A}^\delta(a^\ell)| \leq 2^{\ell(1+\delta)H(B|A)}.
1300: \end{equation}
1301: Finally, observe that for all $a^\ell \in T_A^\delta$ and $b^\ell
1302: \in T_{B|A}^\delta(a^\ell)$, the distortion 
1303: $d(a^\ell,b^\ell)=\sum_{j=1}^\ell d(a_j,b_j)$ is upper bounded by:
1304: \begin{equation}
1305: \label{d_theoretic}
1306:     d(a^\ell,b^\ell) \leq \ell(1+\delta)^{2}\sum_{a',b'}P_{A}(a')P_{B|A}(b'|a')d(a',b') 
1307: \dfn \ell(1+\delta)^{2}Ed(A,B).
1308: \end{equation}
1309: 
1310: Let $(K,X,V,Y,Z)$ be a given random vector that 
1311: satisfies the conditions of Theorem 4.
1312: We now describe the mechanisms of random code selection
1313: and the encoding and decoding operations. For a given $\epsilon > 0$, fix $\delta$ such that
1314: $2\delta+\max\{2\cdot\exp\{-2^{n\delta}\}+2^{-n\delta},\delta^{2}\}
1315: \leq \epsilon$. Define also
1316: \begin{equation}
1317: \epsilon_1\dfn \delta[1+H(V|K)+H(V|K,X)],
1318: \end{equation}
1319: \begin{equation}
1320: \epsilon_2\dfn \delta[1+H(Y|K,V)+H(Y|K,X,V)],
1321: \end{equation}
1322: and 
1323: \begin{equation}
1324: \epsilon_3\dfn\delta[1+H(V|K)+H(V|Z,K)].
1325: \end{equation}
1326: \\
1327: \\
1328: \noindent \textsl{Generation of a rate--distortion code}: \\
1329: Apply the type--covering lemma \cite{CK81} and
1330: construct a rate--distortion codebook that covers $T_U^\delta$
1331: within distortion 
1332: $N(D'+\epsilon)$ w.r.t.\ $d'$, using $2^{NR_U(D')}$ codewords.
1333: \\
1334: \\
1335: \noindent \textsl{Generation of the encrypting bitstream}: \\
1336: For every $k^n\in T_K^\delta$, randomly select an index in the
1337: set $\{0,1,\ldots,2^{n[H(K|Y)+\delta]}-1\}$ with a uniform
1338: distribution. Denote by
1339: $s^J(k^n)=(s_1(k^n),\ldots,s_J(k^n))$, $s_j(k^n)\in\{0,1\}$, $j=1,\ldots,J$, 
1340: the binary string of length $J=n[H(K|Y)+\delta]$ that represents
1341: this index. (Note that $s^J(k^n)$ can be interpreted as the output
1342: of the Slepian--Wolf encoder for $K^n$, where $Y^n$ plays the role
1343: of SI at the decoder \cite{SW73}.)
1344: \\
1345: \\
1346: \noindent \textsl{Generation of an auxiliary embedding code}: \\
1347: We first construct an auxiliary code capable of
1348: embedding $2^{NR_U(D')}$ watermarks by a random selection technique.
1349: First, $M_1=2^{nR_1}$, $R_1 = I(V;Z|K)-\epsilon_3-\delta$,
1350: sequences $\{V^n(i,k^n)\}$, $i\in\{1,\ldots,M_1\}$, are drawn
1351: independently from $T_{V|K}^\delta(k^n)$ for every $k^n\in T_{K}^\delta$. 
1352: For every such $k^n$, let us denote the set of these
1353: sequences by $\calC(k^n)$. The elements of $\calC(k^n)$ are evenly
1354: distributed among $M_U \dfn 2^{NR_U(D')}$ bins, each of size $M_2
1355: = 2^{nR_2}$, $R_2 = I(X;V|K)+\epsilon_1 + \delta$ (this is possible
1356: thanks to condition (c) of Theorem 4, provided 
1357: that the inequality therein is strict). A different
1358: (encrypted) message of length $L=NR_U(D')=n\lambda R_U(D')$ bits
1359: is attached to each bin, identifying a sub-code
1360: that represents this message. We denote the codewords in bin number $m$ ($m
1361: \in \{1,2,\ldots,M_U\}$), by $\{V^n(m,j,k^n)\}$, $j \in \{1,2,\ldots,M_2\}$.
1362: \\
1363: \\
1364: \noindent \textsl{Stegotext sequence generation}: \\
1365: \noindent For each auxiliary sequence (in the above auxiliary codebook
1366: of each $\delta$--typical $k^n$),
1367: $V^n(m,j,k^n)=v^n$, a set of $M_3 \dfn 2^{nR_3}$, $R_3 =
1368: I(X;Y|V,K)+\epsilon_2 + \delta$, stegotext sequences
1369: $\{Y^n(j',v^n,k^n)\}$, $j' \in \{1,\ldots,M_3\}$, are independently drawn from
1370: $T_{Y|VK}^\delta(v^n,k^n)$. We denote this set by $\calC(v^n,k^n)$.
1371: \\
1372: \\
1373: \noindent \textsl{Encoding}:\\
1374: \noindent Upon receiving a triple $(u^N,x^n,k^n)$, the encoder acts as
1375: follows:
1376: \begin{enumerate}
1377: \item If $u^N\in T_U^\delta$, 
1378: let $w^L=(w_1,\ldots,w_L)$, $w_i\in\{0,1\}$, $i=1,\ldots,L$
1379: be the binary representation of the index of the rate--distortion
1380: codeword for the message source. 
1381: For $k^n\in T_K^\delta$, let $s^J(k^n)=(s_1(k^n),\ldots,s_J(k^n))$
1382: denote binary representation string of the index of $k^n$.
1383: Let $\tilde{w}^L=(\tilde{w}_1,\ldots,\tilde{w}_L)$, where
1384: $\tilde{w}_j=w_j\oplus s_j(k^n)$, 
1385: $j=1,\ldots,J$, and $\tilde{w}_j=w_j$, $j=J+1,\ldots,L$,
1386: and where $\oplus$ denotes modulo 2
1387: addition i.e., the XOR operation.\footnote{Note that since $H(K)$ is 
1388: assumed smaller than $\lambda R_U(D')$,
1389: then so is $H(K|Y)$,
1390: and therefore $J\le L$.} 
1391: The binary vector $\tilde{w}^L$ is the (partially) encrypted message to be
1392: embedded. Let $m=\sum_{l=1}^L\tilde{w}_l2^{l-1}+1$ 
1393: denote the index of this message.
1394: If $u^N\notin T_{U}^\delta$ or $k^n\notin T_{K}^\delta$, an arbitrary
1395: (error) message $\tilde{w}^L$ is generated (say, the all--zero message).
1396: \item If $(k^n,x^n) \in T_{KX}^\delta$ 
1397: find, in bin number $m$, the first $j$ such that
1398: $V^n(m,j,k^n)=v^n$ is jointly typical, i.e.,
1399: $(k^n,x^n,v^n) \in T_{KXV}^\delta$, and then find the 
1400: first $j'$ such that $Y^n(j',v^n,k^n)=y^n 
1401: \in \calC(v^n,k^n)$ is jointly typical, i.e.,
1402: $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$. 
1403: This vector $y^n$ is chosen for transmission.
1404: If $(k^n,x^n) \notin T_{KX}^\delta$, 
1405: or if there is no $V^n(m,j,k^n)=v^n$ and $Y^n(j',v^n,k^n)=y^n$ such that
1406: $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$, 
1407: an arbitrary vector $y^n\in\calY^n$ is transmitted.
1408: \end {enumerate}
1409: 
1410: \noindent \textsl{Decoding}:\\
1411: \noindent Upon receiving $Z^n = z^n$ 
1412: and $K^n=k^n$, the decoder finds all
1413: sequences $\{v^n\}$ in $\calC(k^n)$ such that $(k^n,v^n,z^n) \in
1414: T_{KVZ}^\delta$. If all $\{v^n\}$ 
1415: found belong to the same bin, say, $\hat{m}$,
1416: then $\hat{m}$ is decoded as the 
1417: embedded message, and then the binary representation
1418: vector $\hat{w}^L=(\hat{w}_1,\ldots,\hat{w}_L)$ corresponding to $\hat{m}$ is 
1419: decrypted, again, by modulo 2 addition
1420: of its first $J$ bits with $s^J(k^n)$. 
1421: This decrypted binary $L$--vector is
1422: then mapped to the corresponding reproduction 
1423: vector $\tilde{u}^N$ of the rate--distortion codebook
1424: for the message source.
1425: If there is no $v^n\in\calC(k^n)$ 
1426: such that $(k^n,v^n,z^n) \in T_{KVZ}^\delta$ or if
1427: there exist two or more bins that contain such a sequence, an
1428: error is declared.
1429: \\
1430: \\
1431: \noindent We now turn to the performance 
1432: analysis of this code in all relevant aspects.
1433: For each triple $(k^n,x^n,u^N)$ and particular choices of
1434: the codes, the possible causes for incorrect
1435: watermark decoding are the following:
1436: 
1437: \begin{enumerate}
1438:   \item $(k^n,x^n,u^N) \notin T_{KX}^\delta\times T_{U}^\delta$.
1439:    Let the probability of this event be defined as $P_{e_{1}}$.
1440:   \item $(k^n,x^n,u^N) 
1441:     \in T_{KX}^\delta\times T_{U}^\delta$, but in bin no.\ $m$ 
1442:     there is no $v^n$ s.t. $(k^n,x^n,v^n) \in T_{KXV}^\delta$.
1443:     Let the probability of this event be defined as $P_{e_{2}}$.
1444:   \item $(k^n,x^n,u^N) 
1445:     \in T_{KX}^\delta\times T_{U}^\delta$ and in bin no.\ $m$ 
1446:     there is $v^n$ s.t. $(k^n,x^n,v^n) \in T_{KXV}^\delta$, but there is no
1447:     $y^n \in \calC(v^n,k^n)$ s.t. $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$.
1448:     Let the probability of this event be defined as $P_{e_{3}}$.
1449:   \item $(k^n,x^n,u^N) 
1450:     \in T_{KX}^\delta\times T_{U}^\delta$ and in bin no.\ $m$ 
1451:     there is $v^n$ and $y^n
1452:     \in \calC(v^n,k^n)$ such that $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$,
1453:     but $(k^n,v^n,z^n) \notin T_{KVZ}^\delta$.
1454:     Let the probability of this event be defined as $P_{e_{4}}$.
1455:   \item $(k^n,x^n,u^N) 
1456:     \in T_{KX}^\delta\times T_{U}^\delta$ and in bin no.\ $m$ 
1457:     there is $v^n$ and $y^n
1458:     \in \calC(v^n,k^n)$ such that $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$,
1459:     and $(k^n,v^n,z^n) \in T_{KVZ}^\delta$, but
1460:     there exists another bin, say, no.\ $\tilde{m}$,
1461:     that contains $\tilde{v}^n$ s.t. $(k^n,\tilde{v}^n,z^n)\in T_{KVZ}^\delta$.
1462:     Let the probability of this event be defined as $P_{e_{5}}$.
1463: \end{enumerate}
1464: If none of these events occur, the message 
1465: $\tilde{w}^L$ (or, equivalently, $m$) is decoded
1466: correctly from $z^n$, the distortion constraint between $x^n$
1467: and $y^n$ is within $n(D+\epsilon)$ (as follows from (\ref{d_theoretic})), and
1468: the distortion between $u^N$ and its rate--distortion codeword,
1469: $\tilde{u}^N=\hat{u}^N$, does not exceed $N(D'+\epsilon)$. 
1470: Thus, requirements 1 and 4 (modified according
1471: to eq.\ (\ref{distp}), with $D'+\epsilon$ replacing $D'$) are both satisfied.
1472: Therefore, we first prove that the probability for none of the
1473: events 1--5 to occur, tends to unity as $n\to\infty$.
1474: 
1475: The average probability of error $P_{e}$ in decoding $m$ is bounded by
1476: \begin {equation}
1477: \label{Pe}
1478:     P_{e} \leq \sum_{i=1}^5 P_{e_i}.
1479: \end {equation}
1480: The fact that $P_{e_{1}}\rightarrow 0$ follows immediately from
1481: (\ref{PrTgx}). As for $P_{e_{2}}$, we have:
1482: \begin {equation}
1483: \label{P_e2a}
1484: P_{e_{2}} \dfn \prod_{j=1}^{M_2}
1485: \Pr\{(k^n,x^n,V^n(m,j,k^n)) \notin T_{KXV}^\delta\}.
1486: \end {equation}
1487: Now, by (\ref{TgxSize}), for every $j$ and every 
1488: $(k^n,x^n)\in T_{KX}^\delta$:
1489: \begin {eqnarray}
1490: \label{P_e2i}
1491:     \Pr\{V^n(m,j,k^n) \notin T_{V|KX}^\delta(k^n,x^n)\} & = & 1 -
1492: \Pr\{V^n(m,j,k^n) \in T_{V|KX}^\delta(k^n,x^n)\} \nonumber\\
1493: & = & 1 - \frac{|T_{V|KX}^\delta(k^n,x^n)|}{|T_{V|K}^\delta(k^n)|} \nonumber\\
1494:     & \leq & 1 -
1495: \frac{2^{n[(1-\delta)H(V|K,X)-\delta]}}
1496: {2^{n(1+\delta)H(V|K)}} \nonumber\\
1497:     & = &1 - 2^{-n[I(X;V|K)+\epsilon_1]}.
1498: \end {eqnarray}
1499: Substitution of
1500: (\ref{P_e2i}) into (\ref{P_e2a}) provides us with the following
1501: upper bound:
1502: \begin {equation}
1503: P_{e_{2}} \leq \Big[1 - 2^{-n[I(X;V|K)+\epsilon_1]}\Big]^{M_2} \leq
1504: \exp\bigg\{-2^{nR_2}\cdot2^{-n[I(X;V|K)+\epsilon_1]}\bigg\} \rightarrow 0,
1505: \end {equation}
1506: double--exponentially rapidly since $R_2 = I(X;V|K)+\epsilon_1 +
1507: \delta$.
1508: To estimate $P_{e_{3}}$, we repeat the same technique:
1509: \begin {equation}
1510: \label{P_e3a}
1511: P_{e_{3}} \dfn \prod_{j'=1}^{M_3}\Pr\{(k^n,x^n,v^n,Y^n(j',v^n,k^n))
1512: \notin T_{KXVY}^\delta\}.
1513: \end {equation}
1514: Again, by the property of the typical sequences, for every $j'$ and 
1515: $(k^n,x^n,v^n)\in T_{KXV}^\delta$:
1516: \begin {eqnarray}
1517: \label{P_e3Cu}
1518:     \Pr\{Y^n(j',v^n,k^n) \notin T_{Y|KXV}^\delta(k^n,x^n,v^n)\} 
1519: \leq 1 - 2^{-n[I(X;Y|V,K)+\epsilon_2]},
1520: \end {eqnarray}
1521: and therefore,
1522: substitution of (\ref{P_e3Cu}) into (\ref{P_e3a}) gives
1523: \begin {equation}
1524: P_{e_{3}} \leq \Big[1 - 2^{-n[I(X;Y|V,K)+\epsilon_2]}\Big]^{M_3} \leq
1525: \exp\bigg\{-2^{nR_3}\cdot2^{-n[I(X;Y|V,K)+
1526: \epsilon_2]}\bigg\} \rightarrow 0,
1527: \end {equation}
1528: double--exponentially rapidly since $R_3 =
1529: I(X;Y|V,K)+\epsilon_2 + \delta$.
1530: The estimation of $P_{e_{4}}$ is again based on properties
1531: of typical sequences. Since $Z^n$ is the output of
1532: a memoryless channel $P_{Z|Y}$ with input
1533: $y^n=Y^n(j',v^n,k^n)$ and by the assumption of this step
1534: $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$, from (\ref{PrTgx}) 
1535: and the Markov lemma \cite[Lemma 14.8.1]{CT91}, we obtain
1536: \begin {equation}
1537: \label{Pe4}
1538:     P_{e_{4}} = \Pr\{(k^n,x^n,v^n,y^n,Z^n)
1539:                 \notin T_{KXVYZ}^\delta\} \leq \delta,
1540: \end {equation}
1541: and similarly to $P_{e_{1}}$, $P_{e_4}$ can be made as small as desired by an
1542: appropriate choice of $\delta$.
1543: 
1544: Finally, we estimate $P_{e_{5}}$ as follows:
1545: \begin {eqnarray}
1546:     P_{e_{5}} & = & \Pr\{\exists \tilde{m} \neq m:
1547:         (k^n,V^n(\tilde{m},j,k^n),z^n) \in T_{KVZ}^\delta\} \\
1548:     & \leq & \sum_{\tilde{m} \neq m,~j\in\{1,2,...,M_2\}}
1549: \Pr\{(k^n,V^n(\tilde{m},j,k^n),z^n) \in T_{KVZ}^\delta\}
1550:         \nonumber \\
1551: &=& (2^{NR_U(D')}-1)2^{nR_2}\Pr\{(k^n,V^n(\tilde{m},j,k^n),z^n) 
1552: \in T_{KVZ}^\delta\} 
1553: \nonumber \\
1554:     & \leq & 2^{nR_1}2^{-n[I(V;Z|K)-\epsilon_3]}.
1555: \end {eqnarray}
1556: Now, since $R_1
1557: = I(V;Z|K)-\epsilon_3-\delta$, $P_{e_{5}} \rightarrow 0$.
1558: Since $P_{e_{i}}\rightarrow 0$ for $i=1,\ldots,5$,
1559: their sum tends to zero as well, implying that there exist at
1560: least one choice of an auxiliary code and related stegotext codes
1561: that give rise to the reliable 
1562: decoding of $\tilde{W}^L$.
1563:  
1564: Now, let us denote by $N_{c}$ the total number of composite
1565: sequences in a codebook that corresponds 
1566: to a $\delta$--typical $k^n$. Then,
1567: \begin {eqnarray}
1568: N_c&=&M_U\cdot M_2\cdot M_3\nonumber\\
1569: &=&2^{n[\lambda R_U(D')+I(X;V|K)+I(X;Y|V,K)
1570: +\epsilon_1+\epsilon_2+2\delta]}\nonumber\\
1571: &=&2^{n[\lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+2\delta]}.
1572: \end {eqnarray}
1573: Thus, 
1574: \begin{eqnarray}
1575: H(Y^n|K^n)&\leq&\log N_c\nonumber\\
1576: &=&n[\lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+2\delta]\nonumber\\
1577: &\le& n(R_c'+\epsilon_1+\epsilon_2+2\delta),
1578: \end{eqnarray}
1579: where in the last inequality we have used condition (e).
1580: For sufficiently small values of $\delta$ (and hence of $\epsilon_1$ and $\epsilon_2$)
1581: $\epsilon_1+\epsilon_2+2\delta\le \epsilon$ and so, the compressibility
1582: requirement in the presence of $K^n$ is satisfied.
1583: 
1584: We next prove the achievability of $R_c$. Let us consider the set of 
1585: $\delta$--typical key sequences $T_K^\delta$, and view it as the
1586: union of $0$--typical sets
1587: (i.e., $\delta$--typical sets
1588: with $\delta=0$),
1589: $\{T_{Q_K}^0\}$, where $Q_K$ exhausts the set of all rational PMF's
1590: with denominator $n$, and with the property
1591: \begin{equation}
1592: (1-\delta)P_K(k)\le Q_K(k)\le (1+\delta)P_K(k),~~~\forall k\in\calK .
1593: \end{equation}
1594: Suppose that we have already randomly selected a codebook for
1595: one {\it representative} member $\hat{k}^n$ of each
1596: type class $T_{Q_K}^0\subset
1597: T_K^\delta$ 
1598: using the mechanism described above.
1599: Now, consider the
1600: set of all permutations from $\hat{k}^n$ to every other member of 
1601: $T_{Q_K}^0$.
1602: The auxiliary codebook and the stegotext 
1603: codebooks for every other key sequence, $k^n\in T_{Q_K}^0$
1604: will be obtained by permuting all (auxiliary and stegotext) codewords of those
1605: corresponding to $\hat{k}^n$ according
1606: to the same permutation that leads from $\hat{k}^n$ to $k^n$ (thus preserving
1607: all the necessary joint typicality properties).
1608: Now, in the {\it union} of all stegotext codebooks, 
1609: corresponding to all typical key
1610: sequences, each codeword will appear 
1611: at least $(n+1)^{-|\calK|\cdot|\calY|}
1612: \cdot 2^{n[(1-\delta)H(K|Y)-\delta]}$ times,
1613: which is a lower bound to the number of permutations of 
1614: $\hat{k}^n$ which leave a
1615: given stegotext codeword $y^n$ unaltered. 
1616: The total number of stegotext codewords, $N_Y$,
1617: in all codebooks of all $\delta$--typical key sequences (including
1618: repetitions) is upper bounded by
1619: \begin{eqnarray}
1620: \label{Nyu}
1621: N_Y&=&|T_K^\delta|\cdot N_c\nonumber\\
1622: &\le&2^{n[(1+\delta)H(K)+\delta]}\cdot 
1623: 2^{n[\lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+2\delta]}\nonumber\\
1624: &=&2^{n[H(K)+
1625: \lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+\delta(H(K)+3)]}.
1626: \end{eqnarray}
1627: Let $\calC$ denote the 
1628: union of all stegotext codebooks, namely, the set of all
1629: {\it distinct} stegotext vectors
1630: across all codebooks corresponding to all $k^n\in T_K^\delta$, and
1631: let $N(y^n)$ denote the number of occurrences of a given vector 
1632: $y^n\in\calY^n$ in all stegotext codebooks. Then,
1633: in view of the above combinatorial consideration, we have
1634: \begin{equation}
1635: \label{Nyl}
1636: N_Y=\sum_{y^n\in\calC} N(y^n)\ge |\calC|\cdot
1637: (n+1)^{-|\calK|\cdot|\calY|}\cdot
1638: 2^{n[(1-\delta)H(K|Y)-\delta]}.
1639: \end{equation}
1640: Combining eqs.\ (\ref{Nyu}) and (\ref{Nyl}), we have
1641: \begin{equation}
1642: \label{66}
1643: \log|\calC|\le n[\lambda R_U(D')+I(X;Y,V|K)+I(K;Y)+\delta'],
1644: \end{equation}
1645: where
1646: \begin{equation}
1647: \delta'=\epsilon_1+\epsilon_2+\delta(H(K)+H(K|Y)+4)+
1648: |\calK|\cdot|\calY|\cdot\frac{\log(n+1)}{n},
1649: \end{equation}
1650: which is arbitrarily small provided that
1651: $\delta$ is sufficiently small and $n$ is sufficiently large.
1652: Thus, the rate required for 
1653: public compression of $Y^n$ (without the key),
1654: which is $(\log|\calC|)/n$,
1655: is arbitrarily close to
1656: $[\lambda R_U(D_1)+I(X;Y,V|K)+I(K;Y)]$, 
1657: which in turn is upper bounded by $R_c$,
1658: by condition (d) of Theorem 4.
1659: 
1660: Before we proceed to evaluate 
1661: the equivocation levels,
1662: an important comment is in order in the 
1663: context of public compression (and a similar
1664: comment will apply to private compression): 
1665: Note that a straightforward 
1666: (and not necessary optimal) method for public compression
1667: of $Y^n$ is simply according to its 
1668: index within $T_Y^\delta$, which requires about
1669: $nH(Y)$ bits. On the other hand, 
1670: the converse theorem tells us that the compressed
1671: representation of $Y^n$ cannot be much shorter 
1672: than $n[\lambda R_U(D')+I(X;Y,V|K)+I(K;Y)]$ bits
1673: (cf.\ the necessity of condition 
1674: (d) of Theorem 4). Thus, contradiction 
1675: between these two facts is avoided
1676: only if 
1677: \begin{equation}
1678: \label{inherent1}
1679: \lambda R_U(D')+I(X;Y,V|K)+I(K;Y)\le H(Y),
1680: \end{equation}
1681: or, equivalently,
1682: \begin{equation}
1683: \label{inherent2}
1684: \lambda R_U(D')+I(X;Y,V|K)\le H(Y|K).
1685: \end{equation}
1686: This means that any achievable point $(D,D',R_c,R_c',h,h')$
1687: corresponds to a choice of random variables $(K,X,Y,V)$ that must 
1688: inherently satisfy eq.\ (\ref{inherent2}).
1689: This observation will now help us also in estimating the equivocation levels.
1690: 
1691: Consider first the equivocation w.r.t.\ the reproduction, 
1692: for which we have the
1693: following chain of inequalities:
1694: \begin{eqnarray}
1695: Nh'&\le&nH(K|Y)\label{cc}\\
1696: &=&nH(K)-nI(K;Y)\nonumber\\
1697: &=&H(K^n)-nI(K;Y)\label{dd}\\
1698: &=&H(K^n|Y^n,Z^n)+I(K^n;Y^n,Z^n)-nI(K;Y)\nonumber\\
1699: &=&H(K^n|Y^n,Z^n)+I(K^n;Y^n)-nI(K;Y)\label{aa}\\
1700: &=&H(K^n|Y^n,Z^n)+H(Y^n)-H(Y^n|K^n)-nI(K;Y)\nonumber\\
1701: &\le&H(K^n|Y^n,Z^n)+n[\lambda R_U(D')+I(X;Y,V|K)+I(K;Y)+\epsilon]-\nonumber\\
1702: & &-n[\lambda R_U(D'+\epsilon)+I(X;Y,V|K)-\epsilon]-nI(K;Y)\label{bb}\\
1703: &=&H(K^n|Y^n,Z^n)+n\lambda[R_U(D')-R_U(D'+\epsilon)]+n\epsilon\nonumber\\
1704: &\dfn&H(K^n|Y^n,Z^n)+n\epsilon'\nonumber\\
1705: &=&I(K^n;\hU^N|Y^n,Z^n)+H(K^n|Y^n,Z^n,\hU^N)+n\epsilon'\nonumber\\
1706: &\le&H(\hU^N|Y^n,Z^n)+H(K^n|Y^n,Z^n,\hU^N)+n\epsilon'\label{last}
1707: \end{eqnarray}
1708: where (\ref{cc}) is based on condition (b),
1709: (\ref{dd}) is due to the memorylessness of $K^n$,
1710: (\ref{aa}) follows from the fact that $K^n\to Y^n\to Z^n$ is a Markov
1711: chain, (\ref{bb}) is due to the 
1712: sufficiency of condition (d) (that we have just proved)
1713: and the necessity of condition (e), 
1714: and $\epsilon'$ vanishes as $\epsilon\to 0$ due to the
1715: continuity of $R_U(\cdot)$. 
1716: Comparing the left--most side and the right--most side of the
1717: above chain of inequalities, we see that 
1718: to prove that $H(\hU^N|Y^n,Z^n)$ is essentially
1719: at least as large as $Nh'$, it remains to show 
1720: that $H(K^n|Y^n,Z^n,\hU^N)$ is small, say, 
1721: \begin{equation}
1722: \label{small}
1723: H(K^n|Y^n,Z^n,\hU^N)\le
1724: n\epsilon' 
1725: \end{equation}
1726: for large $n$. We next focus then on the proof of eq.\ (\ref{small}).
1727: 
1728: First, consider the following chain of inequalities:
1729: \begin{eqnarray}
1730: \label{equivbound}
1731: H(K^n|Y^n,Z^n,\hat{U}^N)&\le& 
1732: H(K^n,S^J(K^n)|Y^n,Z^n,\hat{U}^N)\nonumber\\
1733: &=&H(S^J(K^n)|Y^n,Z^n,\hat{U}^N)+
1734: H(K^n|S^J(K^n),Y^n,Z^n,\hat{U}^N)\nonumber\\
1735: &\le&H(S^J(K^n)|Y^n,\hat{U}^N,W^L)+
1736: H(K^n|S^J(K^n),Y^n),
1737: \end{eqnarray}
1738: where the second inequality follows from the fact that $W^L$ is function of $\hat{U}^N$
1739: and the fact that conditioning reduces entropy.
1740: As for the second term of the right--most side, we have by Fano's inequality
1741: \begin{equation}
1742: H(K^n|S^J(K^n),Y^n)\le 1+P_{\mbox{err}}\cdot n\log|\calK|\le 
1743: n\epsilon'/2 ~~~\mbox{for large enough $n$},
1744: \end{equation}
1745: as $P_{\mbox{err}}\to 0$ is the probability of error associated with the
1746: Slepian--Wolf decoder that estimates $K^n$ from its compressed version, $S^J(K^n)$, and the
1747: ``side information,'' $Y^n$. 
1748: As for the first term of the right--most side of (\ref{equivbound}), we have
1749: \begin{eqnarray}
1750: H(S^J(K^n)|Y^n,\hat{U}^N,W^L)&=&H(W^L\oplus\tilde{W}^L|Y^n,\hat{U}^N,W^L)\nonumber\\
1751: &\le& H(\tilde{W}^L|Y^n).
1752: \end{eqnarray}
1753: It remains to show that $H(\tilde{W}^L|Y^n)\le n\epsilon'/2$ as well. In order to show this, we
1754: have to demonstrate that for a good code, once $Y^n$ is given, there is very little uncertainty
1755: with regard to $\tilde{W}^L$, which is the index of the bin.
1756: 
1757: To this end, let us
1758: suppose that the inequality in 
1759: (\ref{inherent2}) is strict (otherwise, we can slightly increase
1760: the allowable distortion level $D'$ and thus reduce $R_U(D')$).
1761: As we prove in the Appendix, for any given (arbitrarily small) $\gamma > 0$, 
1762: \begin{equation}
1763: \label{doubleexp}
1764: \mbox{Pr}\{\exists~y^n~\mbox{in the code of $\hat{k}^n$ that appears 
1765: in more than $2^{n\gamma}$ bins}\}
1766: \le |\calY|^n2^{-(n\gamma-\log e)2^{n\gamma}},
1767: \end{equation}
1768: that is, a double--exponential decay. The probability of the union of these events across all
1769: representatives $\{\hat{k}^n\}$ of all $T_{Q_K}^0\subset T_K^\delta$ will just be multiplied by
1770: the number of $\{T_{Q_K}^0\}$ in $T_K^\delta$, which is polynomial, and hence will continue to
1771: decay double--exponentially.
1772: Let us define then the event 
1773: $$\{\exists~y^n~\mbox{in the stego--codebook of some $\hat{k}^n$ that appears in 
1774: more than $2^{n\gamma}$ bins}\}$$
1775: as yet another
1776: error event (like the error events 1--5) that occurs with very small probability. Assume then, that
1777: the randomly selected codebook is ``good'' in the sense that 
1778: no stegovector appears in more than $2^{n\gamma}$ bins, for
1779: any of the representatives $\{\hat{k}^n\}$. 
1780: Now, given $y^n$, how many candidate bins
1781: (corresponding to encrypted messages $\{\tilde{w}^L\}$) can be expected at most?
1782: For a given $y^n$, let us confine attention to 
1783: the $\delta$--conditional type class $T_{K|Y}^\delta(y^n)$ (key sequences outside this set
1784: cannot have $y^n$ in their codebooks, as they are not jointly $\delta$--typical with $y^n$).
1785: The conditional $\delta$--type class $T_{K|Y}^\delta(y^n)$ can be partitioned into  
1786: conditional $0$--type classes $\{T_{Q_{K|Y}}^0(y^n)\}$, where $Q_{K|Y}$ exhausts the allowed
1787: $\delta$--tolerance in the conditional distribution around $P_{K|Y}$, in the same spirit 
1788: as before. Now, take an arbitrary representative $\tilde{k}^n$ from a given $T_{Q_{K|Y}}^0(y^n)$,
1789: and consider the set of all permutations that lead from $\tilde{k}^n$ to all other
1790: members $\{k^n\}$ of $T_{Q_{K|Y}}^0(y^n)$. Obviously, the stego--codebooks of all those
1791: $\{k^n\}$ have exactly the same configuration 
1792: of occurrences of $y^n$ as that of $\tilde{k}^n$ (since these permutations leave $y^n$ unaltered),
1793: therefore they belong to exactly the same bins as in the codebook of $\tilde{k}^n$, the
1794: number of which is at most $2^{\gamma n}$, by the hypothesis that we are using a good code.
1795: In other words, as $k^n$ scans $T_{Q_{K|Y}}^0(y^n)$, there will be no new bins that
1796: contain $y^n$ relative to those that are already in the codebook of $\tilde{k}^n$.
1797: New bins that contain $y^n$ can be seen then only by scanning 
1798: the other conditional $0$--types $\{T_{Q_{K|Y}}^0(y^n)\}$
1799: within $T_{K|Y}^\delta(y^n)$, but the number such conditional $0$--types does not exceed
1800: the total number of conditional $0$--types, which is upper bounded, in turn, by
1801: $(n+1)^{|\calK|\cdot|\calY|}$ \cite{CK81}. Thus, the totality of stego--codebooks, for all
1802: relevant $\{k^n\}$ cannot give more than $(n+1)^{|\calK|\cdot|\calY|}\cdot 2^{n\gamma}$ 
1803: distinct bins
1804: altogether. In other words, for a good codebook:
1805: \begin{equation}
1806: \label{zzz}
1807: H(\tilde{W}^L|Y^n)\le \log[(n+1)^{|\calK|\cdot|\calY|}\cdot 2^{n\gamma}]=
1808: n\left[\gamma+|\calK|\cdot|\calY|\cdot\frac{\log(n+1)}{n}\right]
1809: \end{equation}
1810: which is less than $n\epsilon'/2$ for an appropriate choice of $\gamma$ and for large enough $n$.
1811: 
1812: Finally, for the equivocation w.r.t.\ 
1813: the original message source, we have 
1814: the following:
1815: \begin{eqnarray}
1816: H(U^N|Y^n,Z^n)&=& 
1817: H(\hat{U}^N|Y^n,Z^n)+H(U^N|Y^n,Z^n)-H(\hat{U}^N|Y^n,Z^n)\nonumber\\
1818: &\ge&nH(K|Y)-2n\epsilon'+H(U^N|Y^n,Z^n)-H(\hat{U}^N|Y^n,Z^n)\nonumber\\
1819: &=&nH(K|Y)+H(U^N)-I(U^N;\hat{U}^N)-I(U^N;Y^n,Z^n)-\nonumber\\
1820: & &H(\hat{U}^N|U^N)+I(\hat{U}^N;Y^n,Z^n)-2n\epsilon'\nonumber\\
1821: &\ge&nH(K|Y)+H(U^N)-H(\hat{U}^N)-I(U^N;Y^n,Z^n)-\nonumber\\
1822: & &H(\hat{U}^N|U^N)+I(\hat{U}^N;Y^n,Z^n)-2n\epsilon'\nonumber\\
1823: &\ge&nH(K|Y)+NH(U)-NR_U(D')-2\epsilon']-\nonumber\\
1824: &&[I(U^N;Y^n,Z^n)+
1825: H(\hat{U}^N|U^N)-I(\hat{U}^N;Y^n,Z^n)],
1826: \end{eqnarray}
1827: where first inequality is due to the fact that
1828: $H(\hU^N|Y^n,Z^n)\ge n[H(K|Y)-2\epsilon']$, that we have just shown, 
1829: and the third
1830: is due to the memorylessness of $\{U_i\}$ and the fact
1831: that the rate--distortion codebook 
1832: size is $2^{NR_U(D')}$ and so, $H(\hat{U}^N)\le NR_U(D')$.
1833: Now, the second bracketed expression on the right--most side is the
1834: same as in eq.\ (\ref{bra}), where in the case of this specific scheme,
1835: both inequalities in (\ref{bra}) become equalities, i.e., this expression
1836: vanishes. This is because in our scheme, $U^N\to\hat{U}^N\to (Y^n,Z^n)$ 
1837: is a Markov chain (and so, the first inequality of (\ref{bra}) is tight) and
1838: because $H(\hat{U}^N|U^N,Y^n,Z^n)\le 
1839: H(\hat{U}^N|U^N)=0$ (as $\hat{U}^N$ is a deterministic
1840: function of $U^N$), which makes the second inequality of (\ref{bra}) tight.
1841: As a result, we have
1842: \begin{eqnarray}
1843: H(U^N|Y^n,Z^n)&\ge&N[H(K|Y)/\lambda+H(U)-R_U(D')-2\epsilon'/\lambda]\nonumber\\
1844: &\ge&N[h+R_U(D')-H(U)+H(U)-R_U(D')-2\epsilon/\lambda]\nonumber\\
1845: &=&N(h-2\epsilon'/\lambda),
1846: \end{eqnarray}
1847: where we have used condition (a).
1848: This completes the proof of the direct part.
1849: 
1850: \section*{Acknowledgements}
1851: The author would like to thank Dr.\ Yossi
1852: Steinberg for interesting discussions.
1853: Useful comments made by the anonymous referees
1854: are acknowledged with thanks.
1855: 
1856: \section*{Appendix}
1857: \renewcommand{\theequation}{A.\arabic{equation}}
1858:     \setcounter{equation}{0}
1859: 
1860: \noindent
1861: {\it Proof of eq.\ (\ref{doubleexp})}.
1862: The probability of obtaining $y^n$ in a single 
1863: random selection within the codebook of $\hat{k}^n$ is given by
1864: \begin{eqnarray}
1865: \label{ff}
1866: \mbox{Pr}\{Y^n(j',V^n(m,j,\hat{k}^n),\hat{k}^n)=y^n\}&=&
1867: \frac{|T_{V|KY}^\delta(k^n,y^n)|}{|T_{V|K}^\delta(k^n)|}\cdot
1868: \frac{1}{|T_{Y|KV}^\delta(k^n,v^n)|}\label{tt}\\
1869: &\le&\frac{2^{n(1+\delta)H(V|K,Y)}}{2^{n[(1-\delta)H(V|K)-\delta]}}
1870: \cdot\frac{1}{2^{n[(1-\delta)H(Y|K,V)-\delta]}}\nonumber\\
1871: &=& 2^{-n[H(Y|K)-\delta'']},
1872: \end{eqnarray}
1873: where the first factor in the right--hand side of (\ref{tt}) 
1874: is the probability
1875: of having a $V^n(m,j,\hat{k}^n)=v^n$ that is typical with 
1876: $y^n$ and $\hat{k}^n$ (a necessary condition for this $v^n$
1877: to generate the given $y^n$), the second factor
1878: is the probability of
1879: selecting a given $y^n$ in the random 
1880: selection of the steogtext code, and where
1881: \begin{equation}
1882: \delta''=\delta[H(V|K,Y)+H(V|K)+H(Y|K,V)+2].
1883: \end{equation}
1884: It now follows that the probability $q$ for at least one
1885: occurrence of $y^n$ among the stegowords corresponding to
1886: a certain bin, in the codebook of $\hat{k}^n$,
1887: is upper bounded (using the union bound) by
1888: \begin{eqnarray}
1889: q&\le&M_2\cdot M_3\cdot
1890: 2^{-n[H(Y|K)-\delta'']}\nonumber\\
1891: &=&2^{-n[H(Y|K)-I(X;V|K)-I(X;Y|V,K)-
1892: \delta''-2\delta-\epsilon_1-\epsilon_2]}\nonumber\\
1893: &=&2^{-n[H(Y|K)-I(X;V,Y|K)-
1894: \delta''-2\delta-\epsilon_1-\epsilon_2]}\nonumber\\
1895: &\dfn&2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]}.
1896: \end{eqnarray}
1897: We are interested to upper bound the probability that a given $y^n$
1898: appears as a stegoword in more than $2^{n\gamma}$ bins 
1899: in the codebook of $\hat{k}^n$, for a given
1900: $\gamma > 0$. For $i=1,\ldots,M_U$, let $A_i\in\{0,1\}$ be the
1901: indicator function of the event 
1902: $$\{y^n \mbox{appears as a stegoword in bin no.}~i~\mbox{at least once}\}.$$
1903: Then, clearly $\{A_i\}$ are i.i.d.\ with $\mbox{Pr}\{A_i=1\}=q$.
1904: Therefore,
1905: \begin{eqnarray}
1906: \mbox{Pr}\left\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\right\}
1907: &\le& \exp_2\left\{-M_UD\left(\frac{2^{n\gamma}}{M_U}\|q\right)\right\}\nonumber\\
1908: &=& \exp_2\left\{-M_UD\left(2^{-n[\lambda R_U(D')-\gamma]}\|q\right)\right\},
1909: \end{eqnarray}
1910: where for $\alpha,\beta\in[0,1]$, the function $D(\alpha\|\beta)$
1911: designates the binary divergence
1912: \begin{equation}
1913: D(\alpha\|\beta)=\alpha\log\frac{\alpha}{\beta}+
1914: (1-\alpha)\log\frac{1-\alpha}{1-\beta}.
1915: \end{equation}
1916: Now, referring to eq.\ (\ref{inherent2}), suppose that
1917: \begin{equation}
1918: H(Y|K)\ge\lambda R_U(D')+I(X;V,Y|K)+\delta_1+2\gamma.
1919: \end{equation}
1920: Then, clearly,
1921: \begin{equation}
1922: 2^{-n[\lambda R_U(D')-\gamma]} > 2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]} \ge q
1923: \end{equation}
1924: and so, $\mbox{Pr}\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\}$ is further upper bounded by
1925: \begin{equation}
1926: \mbox{Pr}\left\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\right\}\le 
1927: \exp_2\left\{-M_UD\left(2^{-n[\lambda R_U(D')-\gamma]}\|2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]}\right)\right\}.
1928: \end{equation}
1929: To further bound this expression from above, we have to get a lower bound to
1930: an expression of the form $D(e^{-na}\|e^{-nb})$ for $0< a < b$. Applying the
1931: inequality $\log(1+x)=-\log(1-\frac{x}{1+x})\ge \frac{x\log e}{1+x}$,  for $x > -1$, we have:
1932: \begin{eqnarray}
1933: D(2^{-na}\|2^{-nb})&=& 2^{-na}\log\frac{2^{-na}}{2^{-nb}}+
1934: (1- 2^{-na})\log\frac{1-2^{-na}}{1-2^{-nb}}\nonumber\\
1935: &=&n(b-a)2^{-na}+(1- 2^{-na})\log\left(1+\frac{2^{-nb}-2^{-na}}{1-2^{-nb}}\right)\nonumber\\
1936: &\ge&n(b-a)2^{-na}+(2^{-nb}-2^{-na})\log e\nonumber\\
1937: &\ge&[n(b-a)-\log e]2^{-na}.
1938: \end{eqnarray}
1939: Applying this inequality with $a=\lambda R_U(D')-\gamma$ and $b=H(Y|K)-I(X;Y,V|K)-\delta_1$,
1940: we get
1941: \begin{equation}
1942: D\left(2^{-n[\lambda R_U(D')-\gamma]}\|2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]}\right)\ge
1943: (n\gamma-\log e)2^{-n[\lambda R_U(D')-\gamma]}
1944: \end{equation}
1945: and so,
1946: \begin{equation}
1947: \mbox{Pr}\left\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\right\}\le 2^{-(n\gamma-\log e)2^{n\gamma}},
1948: \end{equation}
1949: which decays double--exponentially rapidly with $n$. While, this inequality holds for
1950: a {\it given} $y^n$, the probability that $\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}$ for {\it some}
1951: $y^n\in\calY^n$ would be upper bounded, using the union bound, by
1952: $|\calY|^n\cdot 2^{-(n\gamma-\log e)2^{n\gamma}}$, which still decays double--exponentially.
1953: Thus, with very high probability the random selection of stegovectors, for $\hat{k}^n$,
1954: is such that no stego codevector $y^n$ appears in more than $2^{n\gamma}$ bins.
1955: 
1956: 
1957: \begin{thebibliography}{AA}
1958: \bibitem{AKS02}
1959: A.~Adelsbach, S.~Katzenbeisser, 
1960: and A.-R.~ Sadeghi, ``Cryptography meets watermarking:
1961: detecting watermarks with minimal or 
1962: zero knowledge disclosure,'' preprint 2002. 
1963: Available on--line at
1964: [www-krypt.cs.uni-sb.de/download/papers]
1965: \bibitem{CC02}
1966: S.~C.~Cheung and D.~K.~W.~Chiu, ``A watermark infrastructure for enterprise
1967: document management,'' {\it Proc.\ 36th Hawaii International Conference
1968: on System Sciences (HICSS`03)}, Hawaii, 2003.
1969: \bibitem{CT91}
1970: T.~M.~Cover and J.~A.~Thomas,
1971: {\it Elements of Information Theory}, Wiley, New York, 1991.
1972: \bibitem{CK81}
1973: I.~Csisz\'ar and J.~K\"orner, {\it Information Theory: Coding Theorems
1974: for Discrete Memoryless Systems}, Academic Press, 1981.
1975: \bibitem{GP80}
1976: S.~I.~Gel'fand and M.~S.~Pinsker, ``Coding for channel with random % CC-034
1977: parameters,'' {\it Problems of Information and Control}, vol.\ 9, no.\ 1, pp. 19-31, 1980.
1978: \bibitem{JML00}
1979: A.~Jayawardena, B.~Murison, and P.~Lenders, ``Embedding multiresolution binary
1980: images into multiresolution watermark channels in wavelet domain,'' preprint
1981: 2000. Available on--line at
1982: [www.tsi.enst.fr/$\sim$maitre/tatouage/icassp00/articles].
1983: \bibitem{KNSTN02}
1984: K.~Kuroda, M.~Nishigaki, M.~Soga, A.~Takubo, and I.~Nakamura, ``A digital
1985: watermark using public--key cryptography for open algorithm,''
1986: {\it Proc.\ ICITA 2002}. Also, available on--line at
1987: [http://charybdis.mit.csu.edu.au/$\sim$mantolov/CD/ICITA2002/papers/131-21.pdf].
1988: \bibitem{MM03}
1989: A.~Maor and N.~Merhav,
1990: ``On joint information embedding and lossy compression,''
1991: submitted to {\it IEEE Trans.\ Inform.\ Theory}, July 2003.
1992: Available on--line at [www.ee.technion.ac.il/people/merhav].
1993: \bibitem{MM04}
1994: A.~Maor and N.~Merhav, 
1995: ``On joint information embedding and lossy compression
1996: in the presence of a stationary memoryless attack channel,''
1997: submitted to {\it IEEE Trans.\ Inform.\ Theory}, January 2004.
1998: Available on--line at [www.ee.technion.ac.il/people/merhav].
1999: \bibitem{MS03}
2000: N.~Merhav and S.~Shamai (Shitz), ``On joint source--channel
2001: coding for the Wyner--Ziv source and the Gel'fand--Pinsker channel,''
2002: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ 49, no.\ 11, pp.\ 2844--2855, November 2003.
2003: \bibitem{MO03}
2004: P.~Moulin and J.~A.~O'Sullivan, ``Information--theoretic analysis of information hiding,''
2005: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ 49, no. 3, pp.\ 563--593, March 2003.
2006: \bibitem{MW04}
2007: P.~Moulin and Y.~Wang, ``New results on steganographic capacity,'' %WM-088
2008: {\it Proc.\ CISS 2004},
2009: pp.\ 813--818, Princeton University, March 2004.
2010: \bibitem{SW73}
2011: D.~Slepian and J.~K.~Wolf, ``Noiseless coding of correlated information sources,''
2012: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ IT--19, pp.\ 471--480, 1973.
2013: \bibitem{SIA99}
2014: M.~Steinder, S.~Iren, and P.~D.~Amer, 
2015: ``Progressively authenticated image transmission,''
2016: preprint 1999. Available on--line at 
2017: [www.cis.udel.edu~/amer/PEL/poc/pdf/milcom99-steiner.pdf].
2018: \bibitem{Yamamoto97}
2019: H.~Yamamoto, ``Rate--distortion theory for the Shannon
2020: cipher system,''
2021: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ 43, no.\ 3, pp.\ 827--835, May 1997.
2022: \end{thebibliography}
2023: \newpage
2024: \begin{figure}[h]
2025: \hspace*{-2cm}\input{p90fig1.pstex_t}
2026: \caption{A generic watermarking/encryption system.}
2027: \label{gen}
2028: \end{figure}
2029: 
2030: \begin{figure}[h]
2031: \hspace*{-2cm}\input{p90fig2.pstex_t}
2032: \caption{The proposed watermarking/encryption scheme (general case).}
2033: \label{dir}
2034: \end{figure}
2035: \end{document}
2036: