0509:cs0509064/p90r.tex

1: % Corrected version (including the comments at the top of p90.tex and after running ispell).

2: % This is the version that is also posted in the website.

3: % After p90.tex was forwarded to Yeung, but before he sent it for review, it

4: % has been replaced by this version on 18.5.04.

5: \documentclass[11pt]{article}

6: \usepackage{amsmath}

7: \usepackage{amsfonts}

8: \usepackage{amssymb}

9: \usepackage{graphicx}

10: \newcommand{\EE}{{\Bbb E}}

11: \newcommand{\eps}{\epsilon}

12: \newcommand{\al}{\alpha}

13: \newcommand{\dsum}{\displaystyle\sum}

14: \newcommand{\dint}{\displaystyle\int}

15: \newcommand{\dfr}{\displaystyle\frac}

16: \newcommand{\bign}{\mbox{\Large\rm n}}

17:

18:

19: %\def\thesection{\arabic{section}}^M

20: %\def\thesubsection {\thesection.\arabic{subsection}}^M

21: %\renewcommand{\thesubsubsection}{\thesubsection.\arabic{subsubsection}}^M

22: %\renewcommand{\theequation}{\thesection.\arabic{equation}}^M

23: %\newcommand{\Prob}{\operatorname{Prob}\, }^M

24: %\newcommand{\snr}{\operatorname{SNR}\, }^M

25:

26:

27: \newcommand{\req}[1]{(\ref{#1})}

28: \def\le{\leq}

29: \def\ge{\geq}

30: \def\lt{<}

31: \def\gt{>}

32: \newcommand{\ls}[1]

33:    {\dimen0=\fontdimen6\the\font \lineskip=#1\dimen0

34: \advance\lineskip.5\fontdimen5\the\font \advance\lineskip-\dimen0

35: \lineskiplimit=.9\lineskip \baselineskip=\lineskip

36: \advance\baselineskip\dimen0 \normallineskip\lineskip

37: \normallineskiplimit\lineskiplimit \normalbaselineskip\baselineskip

38: \ignorespaces }

39:

40: %\ls{1} % single space ^M

41: %\ls{1.5} % double space^M

42: %\ls{2}^M

43: %\ls{1.6}^M

44: %\ls{1.8}^M

45:

46:

47: %\documentstyle[11pt,epsf]{article}

48: %\topmargin      0.25truein

49: %\oddsidemargin  -0.1truein

50: %\evensidemargin -0.1truein

51: %\textheight     8.5truein

52: %\textwidth      6.5truein

53: %\footheight     0.15truein

54: %\footskip       0.6truein

55: %\headheight     0.0truein

56: %\headsep        0.0truein

57: %\parskip 4pt plus 1pt

58:

59: \newenvironment{define}{\begin{trivlist}\item[]{\bf Definition:}\rm}{\end{trivlist}}

60: \newenvironment{corol}{\begin{trivlist}\item[]{\bf Corollary:}\rm}{\end{trivlist}}

61: \newenvironment{discus}{\begin{trivlist}\item[]{\bf Discussion:}\rm}{\end{trivlist}}

62: \newtheorem{theorem}{Theorem}

63: \newtheorem{lemma}{Lemma}

64: \newcommand {\dfn} {\stackrel{\Delta} {=}}

65: \newcommand {\exe} {\stackrel{\cdot} {=}}

66: \newcommand{\eqa}{\stackrel{\mbox{(a)}}{=}}

67: \newcommand{\eqb}{\stackrel{\mbox{(b)}}{=}}

68: \newcommand{\eqc}{\stackrel{\mbox{(c)}}{=}}

69: \newcommand{\eqd}{\stackrel{\mbox{(d)}}{=}}

70: \newcommand{\eqe}{\stackrel{\mbox{(e)}}{=}}

71: \newcommand{\eqf}{\stackrel{\mbox{(f)}}{=}}

72: \newcommand{\lea}{\stackrel{\mbox{(a)}}{\le}}

73: \newcommand{\leb}{\stackrel{\mbox{(b)}}{\le}}

74: \newcommand{\lec}{\stackrel{\mbox{(c)}}{\le}}

75: \newcommand{\led}{\stackrel{\mbox{(d)}}{\le}}

76: \newcommand{\lee}{\stackrel{\mbox{(e)}}{\le}}

77: \newcommand{\lef}{\stackrel{\mbox{(f)}}{\le}}

78: \newcommand{\gea}{\stackrel{\mbox{(a)}}{\ge}}

79: \newcommand{\geb}{\stackrel{\mbox{(b)}}{\ge}}

80: \newcommand{\gec}{\stackrel{\mbox{(c)}}{\ge}}

81: \newcommand{\ged}{\stackrel{\mbox{(d)}}{\ge}}

82: \newcommand{\gee}{\stackrel{\mbox{(e)}}{\ge}}

83: \newcommand{\gef}{\stackrel{\mbox{(f)}}{\ge}}

84: \newcommand {\reals} {{\rm I\!R}}

85: \newcommand {\ba} {\mbox{\boldmath $a$}}

86: \newcommand {\bb} {\mbox{\boldmath $b$}}

87: \newcommand {\bc} {\mbox{\boldmath $c$}}

88: \newcommand {\bd} {\mbox{\boldmath $d$}}

89: \newcommand {\be} {\mbox{\boldmath $e$}}

90: \newcommand {\Bf} {\mbox{\boldmath $f$}}

91: \newcommand {\bg} {\mbox{\boldmath $g$}}

92: \newcommand {\bh} {\mbox{\boldmath $h$}}

93: \newcommand {\bi} {\mbox{\boldmath $i$}}

94: \newcommand {\bj} {\mbox{\boldmath $j$}}

95: \newcommand {\bk} {\mbox{\boldmath $k$}}

96: \newcommand {\bl} {\mbox{\boldmath $l$}}

97: \newcommand {\bm} {\mbox{\boldmath $m$}}

98: \newcommand {\bn} {\mbox{\boldmath $n$}}

99: \newcommand {\bo} {\mbox{\boldmath $o$}}

100: \newcommand {\bp} {\mbox{\boldmath $p$}}

101: \newcommand {\bq} {\mbox{\boldmath $q$}}

102: \newcommand {\br} {\mbox{\boldmath $r$}}

103: \newcommand {\bs} {\mbox{\boldmath $s$}}

104: \newcommand {\bt} {\mbox{\boldmath $t$}}

105: \newcommand {\bu} {\mbox{\boldmath $u$}}

106: \newcommand {\bv} {\mbox{\boldmath $v$}}

107: \newcommand {\bw} {\mbox{\boldmath $w$}}

108: \newcommand {\bx} {\mbox{\boldmath $x$}}

109: \newcommand {\by} {\mbox{\boldmath $y$}}

110: \newcommand {\bz} {\mbox{\boldmath $z$}}

111: \newcommand {\bA} {\mbox{\boldmath $A$}}

112: \newcommand {\bB} {\mbox{\boldmath $B$}}

113: \newcommand {\bC} {\mbox{\boldmath $C$}}

114: \newcommand {\bD} {\mbox{\boldmath $D$}}

115: \newcommand {\bE} {\mbox{\boldmath $E$}}

116: \newcommand {\bF} {\mbox{\boldmath $F$}}

117: \newcommand {\bG} {\mbox{\boldmath $G$}}

118: \newcommand {\bH} {\mbox{\boldmath $H$}}

119: \newcommand {\bI} {\mbox{\boldmath $I$}}

120: \newcommand {\bJ} {\mbox{\boldmath $J$}}

121: \newcommand {\bK} {\mbox{\boldmath $K$}}

122: \newcommand {\bL} {\mbox{\boldmath $L$}}

123: \newcommand {\bM} {\mbox{\boldmath $M$}}

124: \newcommand {\bN} {\mbox{\boldmath $N$}}

125: \newcommand {\bO} {\mbox{\boldmath $O$}}

126: \newcommand {\bP} {\mbox{\boldmath $P$}}

127: \newcommand {\bQ} {\mbox{\boldmath $Q$}}

128: \newcommand {\bR} {\mbox{\boldmath $R$}}

129: \newcommand {\bS} {\mbox{\boldmath $S$}}

130: \newcommand {\bT} {\mbox{\boldmath $T$}}

131: \newcommand {\bU} {\mbox{\boldmath $U$}}

132: \newcommand {\hU} {\hat{U}}

133: \newcommand {\hu} {\hat{u}}

134: \newcommand {\bV} {\mbox{\boldmath $V$}}

135: \newcommand {\bW} {\mbox{\boldmath $W$}}

136: \newcommand {\bX} {\mbox{\boldmath $X$}}

137: \newcommand {\bY} {\mbox{\boldmath $Y$}}

138: \newcommand {\bZ} {\mbox{\boldmath $Z$}}

139: \newcommand{\calA}{{\cal A}}

140: \newcommand{\calB}{{\cal B}}

141: \newcommand{\calC}{{\cal C}}

142: \newcommand{\calD}{{\cal D}}

143: \newcommand{\calE}{{\cal E}}

144: \newcommand{\calF}{{\cal F}}

145: \newcommand{\calG}{{\cal G}}

146: \newcommand{\calH}{{\cal H}}

147: \newcommand{\calI}{{\cal I}}

148: \newcommand{\calJ}{{\cal J}}

149: \newcommand{\calK}{{\cal K}}

150: \newcommand{\calL}{{\cal L}}

151: \newcommand{\calM}{{\cal M}}

152: \newcommand{\calN}{{\cal N}}

153: \newcommand{\calO}{{\cal O}}

154: \newcommand{\calP}{{\cal P}}

155: \newcommand{\calQ}{{\cal Q}}

156: \newcommand{\calR}{{\cal R}}

157: \newcommand{\calS}{{\cal S}}

158: \newcommand{\calT}{{\cal T}}

159: \newcommand{\calU}{{\cal U}}

160: \newcommand{\calV}{{\cal V}}

161: \newcommand{\calW}{{\cal W}}

162: \newcommand{\calX}{{\cal X}}

163: \newcommand{\calY}{{\cal Y}}

164: \newcommand{\calZ}{{\cal Z}}

165:

166: \setlength{\textwidth}{6in}

167: \setlength{\textheight}{9in}

168: \setlength{\topmargin}{-0.5in}

169: \setlength{\oddsidemargin}{.25in}

170:

171: \begin{document}

172: \thispagestyle{empty}

173: \title{On Joint Coding for Watermarking and Encryption}

174: \author{Neri Merhav}

175: \date{}

176: \maketitle

177:

178: \begin{center}

179: Department of Electrical Engineering \\

180: Technion - Israel Institute of Technology \\

181: Haifa 32000, ISRAEL \\

182: {\tt merhav@ee.technion.ac.il}

183: \end{center}

184: \vspace{1.5\baselineskip}

185: \setlength{\baselineskip}{1.5\baselineskip}

186:

187: \begin{abstract}

188: In continuation to earlier works where the problem of joint

189: information embedding and lossless compression (of the composite signal)

190: was studied in the absence \cite{MM03} and in the presence \cite{MM04}

191: of attacks, here we consider the additional ingredient of

192: protecting the secrecy of the watermark against an unauthorized party, which

193: has no access to a secret key shared by the legitimate parties.

194: In other words, we study the problem of joint

195: coding for three objectives: information embedding, compression, and encryption.

196: Our main result is a coding theorem that provides a

197: single--letter characterization of the best achievable tradeoffs among

198: the following parameters: the distortion between the composite signal and

199: the covertext, the distortion in reconstructing the watermark by the legitimate

200: receiver, the compressibility of the composite signal (with and without the key),

201: and the equivocation of the watermark, as well as its reconstructed

202: version, given the composite signal. In the attack--free case, if the key

203: is independent of the covertext, this coding

204: theorem gives rise to a {\it threefold} separation

205: principle that tells that asymptotically, for long block codes, no optimality

206: is lost by first applying a rate--distortion code to the watermark source,

207: then encrypting the compressed codeword, and finally, embedding it into the

208: covertext using the embedding scheme of \cite{MM03}. In the more general case,

209: however, this separation principle is no longer valid, as the key plays an

210: additional role of side information used by the embedding unit.

211:

212: \vspace{1cm}

213:

214: \noindent

215: {\bf Index Terms:} Information hiding, watermarking, encryption, data compression,

216: separation principle, side information, equivocation, rate--distortion.

217: \end{abstract}

218:

219: \newpage

220: \section{Introduction}

221:

222: It is common to say that encryption and watermarking (or information hiding)

223: are related but they are substantially

224: different in the sense that in the former,

225: the goal is to protect the secrecy of

226: the {\it contents} of information, whereas in

227: the latter, it is the very {\it existence}

228: of this information that is to be kept

229: secret.

230:

231: In the last few years, however, we are witnessing increasing

232: efforts around the {\it combination} of encryption and watermarking,

233: which is motivated by the desire to

234: further enhance the security of sensitive information that is

235: being hidden in the host signal. This is to guarantee that even

236: if the watermark is somehow detected by a hostile

237: party, its contents still remain secure due to the encryption.

238: This combination of watermarking and encryption

239: can be seen both in recently reported research work (see, e.g.,

240: \cite{AKS02},\cite{CC02},\cite{JML00},\cite{KNSTN02},\cite{MW04},\cite{SIA99}

241: and references therein)

242: and in actual technologies used in

243: commercial products with a copyright protection

244: framework, such as the CD and the DVD. Also, some commercial companies

245: that provide Internet documents, have in their

246: websites links to copyright warning messages,

247: saying that their data are protected by

248: digitally encrypted watermarks (see, e.g.,

249: {\tt http://genealogy.lv/1864Lancaster/copyright.htm}).

250:

251: This paper is devoted to the information--theoretic

252: aspects of joint watermarking and encryption together with

253: lossless compression of the composite signal that

254: contains the encrypted watermark. Specifically, we extend

255: the framework studied in \cite{MM03} and \cite{MM04}

256: of joint watermarking and compression, so as to include

257: encryption using a secret key. Before we describe the

258: setting of this paper concretely, we pause then to give some

259: more detailed background on the work reported in \cite{MM03} and

260: \cite{MM04}.

261:

262: In \cite{MM03}, the following problem was studied: Given a

263: covertext source vector $X^n=(X_1,\ldots,X_n)$, generated by

264: a discrete memoryless source (DMS), and a message $m$,

265: uniformly distributed in $\{1,2,\ldots, 2^{nR_e}\}$, independently of $X^n$,

266: with $R_e$ designating the embedding rate, we wish to

267: generate a composite (stegotext) vector

268: $Y^n=(Y_1,\ldots,Y_n)$ that satisfies the

269: following requirements: (i) Similarity to the

270: covertext (for reasons of maintaining quality), in the sense that a distortion

271: constraint, $Ed(X^n,Y^n)=\sum_{t=1}^nEd(X_t,Y_t)\le nD$,

272: holds, (ii) compressibility (for reasons of saving storage space and bandwidth), in the sense

273: that the normalized entropy, $H(Y^n)/n$, does not exceed some

274: threshold $R_c$, and (iii) reliability in decoding the message $m$

275: from $Y^n$, in the sense that

276: the decoding error probability is arbitrarily

277: small for large $n$. A single--letter characterization

278: of the best achievable tradeoffs among $R_c$, $R_e$,

279: and $D$ was given in \cite{MM03}, and was

280: shown to be achievable by an extension

281: of the ordinary lossy source coding theorem, giving rise to the

282: existence of $2^{nR_e}$ {\it disjoint}

283: rate--distortion codebooks (one per each possible watermark

284: message) as long as $R_e$ does not exceed

285: a certain fundamental limit. In \cite{MM04}, this setup

286: was extended to include

287: a given memoryless attack channel,

288: $P(Z^n|Y^n)$, where item (iii) above was redefined such that

289: the decoding was based on $Z^n$ rather than on $Y^n$, and where, in view of requirement (ii),

290: it is understood that the attacker has access to the compressed version of $Y^n$,

291: and so, the attacker decompresses $Y^n$ before the attack and re--compresses it after.

292: This extension from [8] to [9] involved

293: an different approach, which was in the

294: spirit of the Gel'fand--Pinsker coding theorem for

295: a channel with non--causal side information

296: (SI) at the transmitter \cite{GP80}. The role of SI, in this case, was

297: played by the covertext.

298:

299: In this paper, we extend the settings of \cite{MM03} and \cite{MM04}

300: to include encryption. For the sake of clarity of the exposition,

301: we do that in several steps.

302:

303: In the first step, we extend the attack--free setting of

304: \cite{MM03}: In addition to including encryption,

305: we also extend the model of the watermark message

306: source to be an arbitrary DMS, $U_1,U_2,\ldots$,

307: independent of the covertext,

308: and not necessarily a binary symmetric source

309: (BSS) as in \cite{MM03} and \cite{MM04}.

310: Specifically, we now assume that the encoder

311: has three inputs (see Fig.\ \ref{gen}): The covertext source vector,

312: $X^n$, an independent (watermark) message source vector

313: $U^N=(U_1,\ldots,U_N)$, where $N$ may differ from

314: $n$ if the two sources operate

315: in different rates, and a secret key (shared also with

316: the legitimate decoder) $K^n=(K_1,\ldots,K_n)$, which, for mathematical

317: convenience, is assumed to operate at the same

318: rate as the covertext. It is assumed,

319: at this stage, that $K^n$

320: is independent of $U^N$ and $X^n$.

321: %\footnote{In fact,

322: %the choice of the conditional distribution $P(K^n|X^n)$ is a degree of freedom

323: %that can be optimizated subject to the given randomness resources.}

324: Now, in addition to requirements

325: (i)-(iii), we impose a requirement on the equivocation

326: of the message source relative to an

327: eavesdropper that has access to $Y^n$, but not

328: to $K^n$. Specifically, we would like the normalized

329: conditional entropy, $H(U^N|Y^n)/N$, to exceed

330: a prescribed threshold, $h$ (e.g., $h=H(U)$

331: for perfect secrecy). Our first result is

332: a coding theorem that gives a set of necessary

333: and sufficient conditions, in terms of single--letter inequalities,

334: such that a triple $(D,R_c,h)$ is achievable,

335: while maintaining

336: reliable reconstruction of $U^N$ at the legitimate receiver.

337:

338: In the second step, we relax the requirement of perfect (or almost

339: perfect) watermark reconstruction, and assume that we are willing to

340: tolerate a certain distortion between the watermark message $U^N$ and its

341: reconstructed version $\hat{U}^N$, that is,

342: $Ed'(U^N,\hat{U}^N)=\sum_{i=1}^NEd'(U_i,\hat{U}_i)\le ND'$. For example,

343: if $d'$ is the Hamming distortion measure then $D'$, of course, designates

344: the maximum allowable bit error probability (as opposed to the block error

345: probability requirement of \cite{MM03} and \cite{MM04}). Also, in this case,

346: it makes sense to impose a requirement regarding the

347: equivocation of the {\it reconstructed} message,

348: $\hat{U}^N$, namely, $H(\hat{U}^N|Y^n)/N\ge h'$, for some prescribed

349: constant $h'$. The rationale is that it is $\hat{U}^N$,

350: not $U^N$, that

351: is actually conveyed to the legitimate receiver, and hence

352: there is an incentive to protect the secrecy of $\hat{U}^N$.

353: We will take into account both

354: equivocation requirements, with the understanding that if one of them

355: is superfluous, then the corresponding threshold

356: ($h$ or $h'$ accordingly) can always be set to zero.

357: Our second result then extends the above--mentioned

358: coding theorem to a single--letter characterization of achievable quintuples

359: $(D,D',R_c,h,h')$. As will be seen,

360: this coding theorem gives rise

361: to a threefold separation theorem, that separates, without asymptotic loss

362: of optimality, between three stages: rate--distortion coding of $U^N$,

363: encryption of the compressed bitstream, and finally, embedding the

364: resulting encrypted version using the embedding scheme of \cite{MM03}.

365: The necessary and sufficient conditions related to the

366: encryption are completely decoupled from those of the

367: embedding and the stegotext compression.

368:

369: In the third and last step, we drop the assumption of an attack--free

370: system and we assume a given memoryless attack channel, in analogy to \cite{MM04}.

371: Again, referring to Fig.\ \ref{gen}, it should be understood that the stegotext $Y^n$ is

372: stored (or transmitted) in compressed form, and that the attacker decompresses $Y^n$

373: before the attack and decompresses after (the compression and decompression units

374: are omitted from the figure).

375: As it will turn out, in the case of a memoryless attack, there is an interaction between the

376: encryption and the embedding, even if the key is still

377: assumed independent of the covertext. In particular,

378: it will be interesting to see

379: that the key, in addition to its original role in encryption,

380: serves as SI that is available

381: to both encoder and decoder (see Fig.\ \ref{dir}).

382: Also, because of the dependence between

383: the key and the composite signal, and the fact that the key is available

384: to the legitimate decoder as well,

385: it is reasonable

386: to let the compressibility constraint correspond also to the

387: conditional entropy of $Y^n$ given $K^n$, that is, {\it private} compression as opposed

388: to the previously considered

389: {\it public} compression, without the key, which enables decompression but not decryption

390: (when these two operations are carried out by different, remote units).

391: Accordingly, we will consider both the conditional and the unconditional

392: entropies of $Y^n$, i.e.,

393: $H(Y^n)/n\le R_c$ and $H(Y^n|K^n)/n\le R_c'$.

394: Our final result then is a coding theorem that

395: provides a single--letter characterization of the

396: region of achievable six--tuples $(D,D',R_c,R_c',h,h')$.

397: Interestingly, this characterization remains essentially

398: unaltered even if there is dependence between the key and the covertext,

399: which is a reasonable thing

400: to have once the key and the stegotext interact in the first place.\footnote{In fact,

401: the choice of the conditional distribution $P(K^n|X^n)$ is a degree of freedom

402: that can be optimized subject to the given randomness resources.}

403: In this context, the system designer confronts an interesting

404: dilemma regarding the desirable degree of statistical dependence

405: between the key and the covertext, which affects the dependence

406: between the key and the stegotext.

407: On the one hand, strong dependence can reduce

408: the entropy of $Y^n$ given $K^n$ (and thereby reduce

409: $R_c'$), and can also help in the embedding

410: process: For example, the extreme case of $K^n=X^n$ (which

411: corresponds to {\it private} watermarking since

412: the decoder actually has access to the covertext) is particularly

413: interesting because in this case,

414: for the encryption key, there is no need for any external resources

415: of randomness, in addition to

416: the randomness of the covertext that is already available.

417: On the other hand, when there is strong dependence between $K^n$ and $Y^n$,

418: the secrecy of the watermark might be sacrificed since $H(K^n|Y^n)$

419: decreases as well.

420: An interesting point, in this context, is that the

421: Slepian--Wolf encoder \cite{SW73} (see Fig.\ \ref{dir}) is used to

422: generate, from $K^n$, random bits that are essentially

423: independent of $Y^n$

424: (as $Y^n$ is generated only after the encryption).

425: These aspects will be seen in detail in Section 4, and even more so, in Section 6.

426:

427: The remaining parts of this paper are organized as follows:

428: In Section 2, we set some notation conventions.

429: Section 3 will be devoted to a formal problem description

430: and to the presentation of the main result for the attack--free case

431: with distortion--free watermark reconstruction (first step

432: described above). In Section 4, the setup and the results will

433: be extended along the lines of the second and the third steps,

434: detailed above,

435: i.e., a given distortion level in the watermark reconstruction and the

436: incorporation of an attack channel. Finally, Sections 5 and 6

437: will be devoted to the proof of the last (and most general) version of the

438: coding theorem, with Section 5 focusing on the converse part,

439: and Section 6 -- on the direct part.

440:

441: \section{Notation Conventions}

442:

443: We begin by establishing some notation conventions.

444: Throughout this paper, scalar random

445: variables (RV's) will be denoted by capital

446: letters, their sample values will be denoted by

447: the respective lower case letters, and their alphabets will be denoted

448: by the respective calligraphic letters.

449: A similar convention will apply to

450: random vectors and their sample values,

451: which will be denoted with same symbols superscripted by the dimension.

452: Thus, for example, $A^\ell$ ($\ell$ -- positive integer)

453: will denote a random $\ell$-vector $(A_1,...,A_\ell)$,

454: and $a^\ell=(a_1,...,a_\ell)$ is a specific vector value in $\calA^\ell$,

455: the $\ell$-th Cartesian power of $\calA$. The

456: notations $a_i^j$ and $A_i^j$, where $i$

457: and $j$ are integers and $i\le j$, will designate segments $(a_i,\ldots,a_j)$

458: and $(A_i,\ldots,A_j)$, respectively,

459: where for $i=1$, the subscript will be omitted (as above).

460: For $i > j$, $a_i^j$ (or $A_i^j$) will be understood as the null string.

461: Sequences without specifying indices are denoted by $ \{\cdot\} $.

462:

463: Sources and channels will be denoted generically by the letter $P$, or $Q$,

464: subscripted by the name of the RV and its conditioning,

465: if applicable, e.g., $P_U(u)$ is the probability function of

466: $U$ at the point $U=u$, $P_{K|X}(k|x)$

467: is the conditional probability of $K=k$ given $X=x$, and so on.

468: Whenever clear from the context, these subscripts will be omitted.

469: Information theoretic quantities like entropies and mutual

470: informations will be denoted following the usual conventions

471: of the information theory literature, e.g., $H(U^N)$, $I(X^n;Y^n)$,

472: and so on. For single--letter

473: information quantities (i.e., when $n=1$ or $N=1$),

474: subscripts will be omitted, e.g., $H(U^1)=H(U_1)$ will

475: be denoted by $H(U)$,

476: similarly, $I(X^1;Y^1)=I(X_1;Y_1)$ will be denoted by $I(X;Y)$, and so on.

477:

478: \section{Problem Definition and Main Result for Step 1}

479:

480: We now turn to the formal description

481: of the model and the problem setting for step 1,

482: as described in the Introduction.

483: A source $P_X$, henceforth referred to as the

484: {\it covertext source} or the {\it host source}, generates a sequence of

485: independent copies, $\{X_t\}_{t=-\infty}^{\infty}$, of a finite--alphabet RV,

486: $X\in\calX$. At the same time and independently,

487: another source $P_U$, henceforth referred to as the {\it message source},

488: or the {\it watermark source}, generates a sequence of

489: independent copies, $\{U_i\}_{i=-\infty}^{\infty}$, of a finite--alphabet RV,

490: $U\in\calU$. The relative rate between the message source and the covertext

491: source is $\lambda$ message symbols per covertext symbol. This means that

492: while the covertext

493: source generates a block of $n$ symbols, say, $X^n=(X_1,\ldots,X_n)$,

494: the message source generates a block

495: of $N=\lambda n$ symbols, $U^N=(U_1,\ldots,U_N)$

496: (assuming, without essential loss of

497: generality, that $\lambda n$ is a positive integer).

498: In addition to the covertext source and

499: the message source, yet another source, $P_K$,

500: henceforth referred to as the {\it key source},

501: generates a sequence of independent copies,

502: $\{K_t\}_{t=-\infty}^{\infty}$, of a finite--alphabet RV,

503: $K\in\calK$, independently\footnote{The assumption

504: of independence between $\{K_t\}$

505: and $\{X_t\}$ is temporary and made now primarily for

506: the sake of simplicity of the exposition. It will

507: be dropped later on.}

508: of both $\{X_t\}$ and $\{U_i\}$.

509: The key source is assumed to operate at the same

510: rate as the covertext source, that is, while

511: the covertext source generates the block $X^n$

512: of length $n$,

513: the key source generates a block of $n$

514: symbols as well, $K^n=(K_1,\ldots,K_n)$.

515:

516: Given $n$ and $\lambda$, a block code

517: for {\it joint watermarking, encryption, and compression}

518: is a mapping $f_n:\calU^N\times\calX^n\times\calK^n\to\calY^n$,

519: $N=\lambda n$, whose output

520: $y^n=(y_1,\ldots,y_n)=f_n(u^N,x^n,k^n)\in\calY^n$

521: is referred to as the {\it stegotext} or the

522: {\it composite signal}, and accordingly, the finite alphabet $\calY$ is

523: referred to as the {\it stegotext alphabet}.

524: Let $d:\calX\times\calY\to \reals^+$ denote

525: a single--letter distortion measure between

526: covertext symbols and stegotext symbols,

527: and let the distortion between the vectors, $x^n\in\calX^n$ and $y^n\in\calY^n$,

528: be defined additively across the corresponding components, as usual.

529:

530: An $(n,\lambda,D,R_c,h,\delta)$ code is a block code

531: for joint watermarking, encryption, and compression,

532: with parameters $n$ and $\lambda$, that satisfies the following requirements:

533: \begin{itemize}

534: \item [1.] The expected distortion

535: between the covertext and the stegotext satisfies

536: \begin{equation}

537: \sum_{t=1}^n Ed(X_t,Y_t)\le nD.

538: \end{equation}

539: \item [2.] The entropy of the stegotext

540: satisfies

541: \begin{equation}

542: H(Y^n)\le nR_c.

543: \end{equation}

544: \item [3.] The equivocation of the message source satisfies

545: \begin{equation}

546: H(U^N|Y^n)\ge Nh.

547: \end{equation}

548: \item [4.] There exists a

549: decoder $g_n:\calY^n\times\calK^n\to\calU^N$ such that

550: \begin{equation}

551: P_e\dfn\mbox{Pr}\{g_n(Y^n,K^n)\ne U^N\}\le \delta.

552: \end{equation}

553: \end{itemize}

554: For a given $\lambda$, a triple $(D,R_c,h)$ is said to be {\it achievable}

555: if for every $\epsilon > 0$, there is a sufficiently large $n$

556: for which

557: $(n,\lambda,D+\epsilon,R_c+\epsilon,h-\epsilon,\epsilon)$ codes exist.

558: The {\it achievable region} of triples

559: $(D,R_c,h)$ is the set of all achievable

560: triples $(D,R_c,h)$. For simplicity, it is assumed\footnote{At the end of Section 4 (after Theorem 4),

561: we discuss the case where this limitation (or its analogue in lossy reconstruction of $U^N$) is dropped.}

562: that $H(K)

563: \le\lambda H(U)$ as this upper limit on $H(K)$ suffices

564: to achieve perfect secrecy.

565:

566: Our first coding theorem is the following:

567: \begin{theorem}

568: A triple $(D,R_c,h)$ is achievable if and only if the following conditions

569: are both satisfied:

570: \begin{itemize}

571: \item [(a)] $h \le H(K)/\lambda$.

572: \item [(b)] There exists a channel $\{P_{Y|X}(y|x),~x\in\calX,~y\in\calY\}$

573: such that: (i) $H(Y|X)\ge\lambda H(U)$, (ii) $R_c\ge\lambda H(U)+I(X;Y)$, and

574: (iii) $D\ge Ed(X,Y)$.

575: \end{itemize}

576: \end{theorem}

577:

578: As can be seen, the encryption, on the one hand, and the embedding and the

579: compression, on the other hand, do not interact at all in this theorem.

580: There is a complete decoupling between them: While

581: condition (a) refers solely to the key and the secrecy of the watermark,

582: condition (b) is only about the embedding--compression

583: part, and it is a replica of the conditions of

584: the coding theorem in \cite{MM03}, where the role of the embedding rate, $R_e$

585: (see Introduction above),

586: is played by the product $\lambda H(U)$. This suggests a very simple separation

587: principle, telling that in order to

588: attain a given achievable triple $(D,R_c,h)$,

589: first compress the watermark $U^N$ to its entropy, then encrypt $Nh$ bits (out

590: of the $NH(U)$) of the compressed

591: bit--string (by bit--by--bit XORing with the same number of

592: compressed key bits), and finally, embed this

593: partially encrypted compressed bit--string into

594: the covertext, using the coding theorem of \cite{MM03} (again,

595: see the Introduction

596: above for a brief description of this).

597:

598: \section{Extensions to Steps 2 and 3}

599:

600: Moving on to Step 2, we now relax requirement no.\ 4 in

601: the above definition of

602: an $(n,\lambda,D,R_c,h,\delta)$ code, and allow a certain

603: distortion between $U^N$ and its reconstruction $\hat{U}^N$

604: at the legitimate decoder.

605: More precisely, let $\hat{\calU}$ denote a finite alphabet,

606: henceforth referred to as the {\it message reconstruction

607: alphabet}. Let $d':\calU\times\hat{\calU}\to\reals^+$ denote

608: a single--letter distortion measure between message symbols

609: and message reconstruction symbols, and let the distortion

610: between vectors $u^N\in\calU^N$

611: and $\hat{u}^N\in\hat{\calU}^N$ be again, defined additively

612: across the corresponding components. Finally, let $R_U(D')$

613: denote the rate--distortion function of the source $P_U$

614: w.r.t.\ $d'$, i.e.,

615: \begin{equation}

616: R_U(D')=\min\{I(U;\hU):~Ed'(U,\hU)\le D'\}.

617: \end{equation}

618: It will now be assumed that $H(K)\le\lambda R_U(D')$, for the

619: same reasoning as before.

620:

621: Requirement no.\ 4 is now replaced by the

622: following requirement:

623: There exists a

624: decoder $g_n:\calY^n\times\calK^n\to\hat{\calU}^N$ such that

625: $\hU^N=(\hU_1,\ldots,\hU_N)=g_n(Y^n,K^n)$ satisfies:

626: \begin{equation}

627: \label{distp}

628: \sum_{i=1}^NEd'(U_i,\hat{U}_i)\le ND'.

629: \end{equation}

630: In addition to this modification of requirement no.\ 4, we

631: add, to requirement no.\ 3, a specification regarding the

632: minimum allowed equivocation w.r.t.\ the reconstructed message:

633: \begin{equation}

634: H(\hU^N|Y^n)\ge Nh',

635: \end{equation}

636: in order to guarantee that the secrecy of the reconstructed message

637: is also secure enough. Accordingly, we modify the above definition of a block

638: code as follows:

639: An $(n,\lambda,D,D',R_c,h,h')$ code is a block code

640: for joint watermarking, encryption, and compression

641: with parameters $n$ and $\lambda$ that satisfies requirements 1--4,

642: with the above modifications of requirements 3 and 4.

643: For a given $\lambda$, a quintuple $(D,D',R_c,h,h')$

644: is said to be {\it achievable}

645: if for every $\epsilon > 0$, there is a sufficiently large $n$

646: for which

647: $(n,\lambda,D+\epsilon,D'+\epsilon,R_c+\epsilon,h-\epsilon,h'-\epsilon)$

648: codes exist.

649:

650: Our second theorem extends Theorem 1 to this setting:

651:

652: \begin{theorem}

653: A quintuple $(D,D',R_c,h,h')$ is achievable

654: if and only if the following conditions

655: are all satisfied:

656: \begin{itemize}

657: \item [(a)] $h\le H(K)/\lambda+H(U)-R_U(D')$.

658: \item [(b)] $h'\le H(K)/\lambda$.

659: \item [(c)] There exists a channel $\{P_{Y|X}(y|x),~x\in\calX,~y\in\calY\}$

660: such that: (i) $\lambda R_U(D')\le H(Y|X)$,

661: (ii) $R_c\ge\lambda R_U(D')+I(X;Y)$, and

662: (iii) $D\ge Ed(X,Y)$.

663: \end{itemize}

664: \end{theorem}

665:

666: As can be seen, the passage from Theorem 1 to Theorem 2 includes

667: the following modifications:

668: In condition (c), $H(U)$ is simply replaced by $R_U(D')$ as expected.

669: This means that the lossless compression code of $U^N$,

670: in the achievability of Theorem 1, is

671: now replaced by a rate--distortion code for distortion level $D'$.

672: Conditions (a) and (b) now tell us that the key rate (in terms of entropy)

673: should be sufficiently large to satisfy both equivocation requirements.

674: Note that the condition regarding the

675: equivocation w.r.t.\ the clean message source is softer than

676: in Theorem 1 as $H(U)-R_U(D')\ge 0$. This is because the rate--distortion code

677: for $U^N$ already introduces an uncertainty of $H(U)-R_U(D')$ bits per

678: symbol, and so, the encryption

679: should only complete it to the desired level of $h$

680: bits per symbol. This point is discussed in depth in \cite{Yamamoto97}.

681: Of course, by setting $D'=0$ (and hence also $h'=h$), we are back

682: to Theorem 1.

683:

684: We also observe that the encryption and the embedding are still decoupled

685: in Theorem 2, and that an achievable quintuple can still be attained

686: by separation: First, apply a rate--distortion code to $U^N$, as mentioned

687: earlier, then encrypt $N\cdot\max\{h+R_U(D')-H(U),h'\}$ bits

688: of the compressed codeword (to satisfy both equivocation requirements),

689: and finally, embed the (partially) encrypted

690: codeword into $X^n$, again, by using the scheme of \cite{MM03}.

691: Note that without the encryption and without

692: requirement no.\ 2 of the compressibility of $Y^n$, this

693: separation principle is a special case of the one in

694: \cite{MS03}, where a separation theorem was established

695: for the Wyner--Ziv source (with SI correlated to the source at the decoder)

696: and the Gel'fand--Pinsker channel (with channel SI at the encoder).

697: Here, there is no SI correlated to the source

698: and the role of channel SI is fulfilled by the covertext.

699: Thus, the new observation here is that the separation theorem continues

700: to hold in the presence of encryption and requirement no.\ 2.

701:

702: Finally, we turn to step 3, of including an attack channel (see Fig.\

703: \ref{gen}).

704: Let $\calZ$ be a finite alphabet, henceforth referred to as

705: the {\it forgery alphabet}, and

706: let $\{P_{Z|Y}(z|y),~y\in\calY,~z\in\calZ\}$

707: denote a set of conditional PMF's from the stegotext alphabet to

708: the forgery alphabet. We now assume that the stegotext vector

709: is subjected to an attack modelled by the memoryless channel,

710: \begin{equation}

711: P_{Z^n|Y^n}(z^n|y^n)=\prod_{t=1}^n P_{Z|Y}(z_t|y_t).

712: \end{equation}

713: The output $Z^n$ of the attack channel will henceforth be referred to as

714: the {\it forgery}.

715:

716: It is now assumed

717: and that the legitimate decoder has access to $Z^n$,

718: rather than $Y^n$ (in addition, of course, to $K^n$).

719: Thus, in requirement no.\ 4, the

720: decoder is redefined again, this time,

721: as a mapping $g_n:\calZ^n\times\calK^n\to\hat{\calU}^N$

722: such that $\hat{U}^N=g_n(Z^n,K^n)$ satisfies

723: the distortion constraint (\ref{distp}). As for the equivocation requirements,

724: the conditioning will now be on both $Y^n$ and $Z^n$, i.e.,

725: \begin{equation}

726: H(U^N|Y^n,Z^n)\ge Nh~~~\mbox{and}~~~

727: H(\hU^N|Y^n,Z^n)\ge Nh',

728: \end{equation}

729: as if the

730: attacker and the eavesdropper are the same party (or if they cooperate),

731: then s/he may access both. In fact,

732: for the equivocation of $U^N$, the conditioning on $Z^n$ is immaterial

733: since $U^N\to Y^n\to Z^n$ is always a Markov chain, but it is not clear

734: that $Z^n$ is superfluous for the equivocation w.r.t.\ $\hU^N$

735: since $Z^n$ is one of the inputs to the decoder whose output is $\hU^N$.

736: Nonetheless, for the sake of uniformity and convenience (in the proof),

737: we keep the conditioning on $Z^n$ in both equivocation criteria.

738:

739: Redefining block codes and achievable quintuples $(D,D',R_C,h,h')$

740: according to the modified requirements in the same spirit, we now

741: have the following coding theorem, which is substantially

742: different from Theorems 1 and 2:

743:

744: \begin{theorem}

745: A quintuple $(D,D',R_c,h,h')$ is achievable

746: if and only if there exist RV's $V$ and $Y$ such that

747: $P_{KXVYZ}(k,x,v,y,z)=P_X(x)P_K(k)P_{VY|KX}(v,y|k,x)P_{Z|Y}(z|y)$,

748: where the alphabet size of $V$ is bounded by $|\calV|\le

749: |\calK|\cdot|\calX|\cdot|\calY|+1$, and such that the following

750: conditions are all satisfied:

751: \begin{itemize}

752: \item [(a)] $h\le H(K|Y)/\lambda+H(U)-R_U(D')$.

753: \item [(b)] $h'\le H(K|Y)/\lambda$.

754: \item [(c)] $\lambda R_U(D')\le I(V;Z|K)-I(V;X|K)$.

755: \item [(d)] $R_c\ge \lambda R_U(D')+I(X;Y,V|K)+I(K;Y)$.

756: \item [(e)] $D \ge Ed(X,Y)$.

757: \end{itemize}

758: \end{theorem}

759:

760: First, observe that here, unlike in Theorems 1 and 2, it is no longer

761: true that the encryption and the embedding (along with stegotext compression)

762: are decoupled, yet

763: the rate--distortion compression of $U^N$ is still separate and decoupled from both.

764: In other words, the separation principle applies here in partial manner only.

765: Note that now, although $K$ is still assumed independent of $X$,

766: it may, in general, depend on $Y$. On the negative side,

767: this dependence causes a reduction

768: in the equivocation of both the message source and its reconstruction,

769: and therefore $H(K|Y)$ replaces $H(K)$ in conditions (a) and (b).

770: On the positive side, on the

771: other hand, this dependence introduces new degrees of freedom in

772: enhancing the tradeoffs between the embedding performance

773: (condition (c)) and the compressibility (condition (d)).

774:

775: The achievability of Theorem 3 involves essentially

776: the same stages as before (rate--distortion coding of $U^N$, followed by

777: encryption, followed in turn by embedding), but this time,

778: the embedding scheme is a conditional version of the one proposed in

779: \cite{MM04}, where all codebooks depend on $K^n$, the SI given at

780: both ends (see Fig.\ \ref{dir}).

781: An interesting point regarding the encryption is that

782: one needs to generate, from $K^n$, essentially $nH(K|Y)$ random bits that

783: are {\it independent} of $Y^n$ (and $Z^n$), in order to protect the

784: secrecy against an eavesdropper that observes $Y^n$ and $Z^n$.

785: Clearly, if $Y^n$ was given in advance to the encrypting unit, then

786: the compressed bitstring of an optimal lossless

787: source code that compresses $K^n$, given $Y^n$ as SI, would have

788: this property (as if there was any dependence, then this bitstring could have

789: been further compressed, which is a contradiction).

790: However, such a source code

791: cannot be implemented

792: since $Y^n$ itself is generated

793: from the encrypted message, i.e., {\it after}

794: the encryption. In other words, this would

795: have required a circular mechanism, which may not be feasible.

796: A simple remedy is then to use a

797: {\it Slepian--Wolf encoder} \cite{SW73}, that generates

798: $nH(K|Y)$ bits that are essentially

799: independent of $Y^n$ (due to the same consideration),

800: without the need to access the

801: vector $Y^n$ to be generated.

802: For more details, the reader is referred to the

803: proof of the direct part (Section 6).

804:

805: Observe that in the absence of attack (i.e., $Z=Y$),

806: Theorem 2 is obtained as a special case

807: of Theorem 3 by choosing $V=Y$ and letting

808: both be independent of $K$, a choice which is simultaneously the best for

809: conditions (a)--(d) of Theorem 3. To see this, note the following simple

810: inequalities:

811: In conditions (a) and (b), $H(K|Y)\le H(K)$. In condition (c),

812: by setting $Z=Y$, we have

813: \begin{eqnarray}

814: I(V;Y|K)-I(V;X|K)&\le&I(V;X,Y|K)-I(V;X|K)\nonumber\\

815: &=&I(V;Y|X,K)\nonumber\\

816: &\le&H(Y|X,K)\nonumber\\

817: &\le&H(Y|X).

818: \end{eqnarray}

819: Finally in condition (d), clearly, $I(K;Y)\ge 0$ and

820: since $X$ is independent of $K$, then

821: $I(X;Y,V|K)=I(X;Y,V,K)\ge I(X;Y)$. Thus, for $Z=Y$, the achievable region

822: of Theorem 3 is a subset of the one given in Theorem 2. However, since

823: all these inequalities become equalities

824: at the same time by choosing $V=Y$ and letting both be independent of $K$,

825: the two regions are identical in the attack--free case.

826:

827: Returning now to Theorem 3, as we observed,

828: $K^n$ is now involved not only in the role

829: of a cipher key, but also as SI available at both encoder and decoder.

830: Two important points are now in order, in view of this fact.

831:

832: First, one may argue that, actually,

833: there is no real reason to assume that $K^n$ is necessarily independent

834: of $X^n$ (see also \cite{MO03}).

835: If the user has control of the mechanism of generating the key,

836: then s/he might implement, in general, a channel

837: $P_{K^n|X^n}(k^n|x^n)$ using the

838: available randomness resources, and taking (partial) advantage of the

839: randomness of the covertext. Let us

840: assume that this channel is stationary and memoryless, i.e.,

841: \begin{equation}

842: P_{K^n|X^n}(k^n|x^n)=\prod_{t=1}^n P_{K|X}(k_t|x_t)

843: \end{equation}

844: with the single--letter transition probabilities

845: $\{P_{K|X}(k|x)~x\in\calX,~k\in\calK\}$ left as a degree of freedom

846: for design. While so far, we assumed that $K$ was independent of $X$,

847: the other extreme is, of course, $K=X$ (corresponding to private

848: watermarking). Note, however, that in the attack--free case, in the absence of

849: the compressibility requirement no.\ 2 (say, $R_c=\infty$), no optimality

850: is lost by assuming that $K$ is independent of $X$, since the only

851: inequality where we have used the independence assumption, in the previous

852: paragraph, corresponds to condition (d).

853:

854: The second point is that in Theorems 1--3, so far, we have defined

855: the compressibility of the stegotext in terms of $H(Y^n)$, which is

856: suitable when the decompression of $Y^n$ is {\it public}, i.e., without

857: access to $K^n$. The legitimate decoder in our model, on the other hand,

858: has access to the SI $K^n$, which may depend on $Y^n$. In this context,

859: it then makes sense

860: to measure the compressibility of the stegotext

861: also in a {\it private} regime,

862: i.e., in terms of the {\it conditional} entropy, $H(Y^n|K^n)$.

863:

864: Our last (and most general) version of the

865: coding theorem below takes these two points in to account.

866: Specifically, let us impose, in requirement no.\ 2, an additional inequality,

867: \begin{equation}

868: H(Y^n|K^n)\le nR_c',

869: \end{equation}

870: where $R_c'$ is a prescribed constant, and let us redefine accordingly

871: the block codes and the achievable region in terms of six--tuples

872: $(D,D',R_c,R_c',h,h')$. We now have the following result:

873:

874: \begin{theorem}

875: A six--tuple $(D,D',R_c,R_c',h,h')$ is achievable

876: if and only if there exist RV's $V$ and $Y$ such that

877: $P_{KXVYZ}(k,x,v,y,z)=P_{XK}(x,k)P_{VY|KX}(v,y|k,x)P_{Z|Y}(z|y)$,

878: where the alphabet size of $V$ is bounded by $|\calV|\le

879: |\calK|\cdot|\calX|\cdot|\calY|+1$, and such that the following

880: conditions are all satisfied:

881: \begin{itemize}

882: \item [(a)] $h\le H(K|Y)/\lambda+H(U)-R_U(D')$.

883: \item [(b)] $h'\le H(K|Y)/\lambda$.

884: \item [(c)] $\lambda R_U(D')\le I(V;Z|K)-I(V;X|K)$.

885: \item [(d)] $R_c\ge \lambda R_U(D')+I(X;Y,V|K)+I(K;Y)$.

886: \item [(e)] $R_c'\ge \lambda R_U(D')+I(X;Y,V|K)$.

887: \item [(f)] $D \ge Ed(X,Y)$.

888: \end{itemize}

889: \end{theorem}

890: Note that

891: the additional condition, (e), is similar to condition (d) except for

892: the term $I(K;Y)$. Also, in the joint PMF of $(K,X,V,Y,Z)$

893: we are no longer assuming that $K$ and $X$ are independent.

894: It should be pointed out that

895: in the presence of the new requirement regarding $H(Y^n|K^n)$,

896: it is more clear now that introducing dependence of $(V,Y)$ upon $K$

897: is reasonable, in general.

898: In the case $K=X$,

899: that was mentioned earlier,

900: the term $I(V;X|K)$, in condition (c),

901: and the term $I(X;Y,V|K)$, in conditions (d) and (e), both vanish.

902: Thus, both embedding performance and compression

903: performance improve, like in private watermarking.

904:

905: Finally, a comment is in order regarding the assumption $H(K)\le\lambda R_U(D')$,

906: which implies that $H(K|Y)$ cannot exceed $\lambda R_U(D')$ either.

907: If this assumption is removed, and even $H(K|Y)$ is allowed to exceed $\lambda R_U(D')$,

908: then Theorem 4 can be somewhat further extended. While $h$ cannot be further improved

909: if $H(K|Y)$ is allowed to exceed $\lambda R_U(D')$ (as it already reaches the maximum possible

910: value, $h=H(U)$, for $H(K|Y)=\lambda R_U(D')$), it turns out that there is still room for

911: improvement in $h'$. Suppose that instead of one rate--distortion codebook for $U^N$, we have

912: many {\it disjoint} codebooks. In fact, it has been shown in \cite{MM03} that there are exponentially

913: $2^{NH(\hU|U)}$ disjoint codebooks,

914: each covering the set of typical source sequences by jointly typical

915: codewords. Now, if $H(K|Y) > \lambda R_U(D')$, we can use the $T=nH(K|Y)-NR_U(D')$ excess bits

916: of the compressed key

917: (beyond the $NR_U(D')$ bits that are used to encrypt the

918: binary of representation of $\hU^N$), so as to select one of

919: $2^T$ codebooks (as long as $T < NH(\hU|U)$), and thus reach a total equivocation of $nH(K|Y)$ as long

920: as $nH(K|Y)\le NH(\hU)$, or equivalently, $H(K|Y)\le\lambda H(\hU)$. The equivocation level

921: $h'=H(\hU)$ is now the ``saturation value'' that cannot be further improved (in analogy to $h=H(U)$

922: for the original source). This means that

923: condition (b) of Theorem 4 would now be replaced by the condition

924: \begin{equation}

925: \label{13}

926: h'\le \min\{H(\hU),H(K|Y)/\lambda\}.

927: \end{equation}

928: But with this condition, it is no longer clear that the best test channel for lossy compression of

929: $U^N$ is the one that achieves $R_U(D')$, because for the above modified version of condition (b),

930: it would be best to have $H(\hU)$ as large as possible (as long as it is below $H(K|Y)/\lambda$),

931: which is in partial conflict with the minimization

932: of $I(U;\hU)$ that leads to $R_U(D')$. Therefore, a restatement of Theorem 4 would

933: require the existence of a channel $\{P_{\hU|U}(\hu|u),~u\in\calU,~\hu\in\hat{\calU}\}$ (in addition to

934: the existing requirement of a channel $P_{VY|KX}$), such that

935: the random variable $\hU$ takes now part in the

936: compromise among {\it all} criteria of the problem. This means

937: that in conditions (a),(c),(d), and (e) of Theorem 4,

938: $R_U(D')$ should be replaced by $I(U;\hU)$, and there would be an

939: additional condition (g): $Ed'(U,\hU)\le D'$. Condition (a), in view of the earlier discussion above,

940: would now be of the form:

941: \begin{equation}

942: \label{14}

943: h\le \min\{H(U),H(K|Y)/\lambda+H(U)-I(U;\hU)\}\equiv H(U)-[I(U;\hU)-H(K|Y)/\lambda]_+,

944: \end{equation}

945: where $[z]_+\dfn\max\{0,z\}$.

946: Of course, under the assumption $H(K)\le\lambda R_U(D')$, that we have used thus far,

947: \begin{equation}

948: H(\hU)\ge I(U;\hU)\ge R_U(D')\ge H(K)/\lambda\ge H(K|Y)/\lambda,

949: \end{equation}

950: in other words, $\min\{H(\hU),H(K|Y)/\lambda\}$ is always

951: attained by $H(K|Y)/\lambda$, and so, the dependence on $H(\hU)$ disappears, which means that the best

952: choice of $\hU$ (for all other conditions) is back to be the one that minimizes $I(U;\hU)$,

953: which gives us Theorem 4 as is.

954:

955: It is interesting to point out that

956: this additional extension gives rise to yet

957: another step in the direction of invalidating

958: the separation principle: While in Theorem 4 only the encryption and the

959: embedding interacted, yet the rate--distortion coding of $U^N$ was still

960: independent of all other ingredients

961: of the system, here even this is no longer true, as the choice

962: of the test channel $P_{\hU|U}$ takes into account also compromises that

963: are associated with the encryption and the embedding.

964:

965: Note that this discussion

966: applies also to the {\it classical} joint source--channel coding, where there is no

967: embedding at all: In this case, $X$ is a degenerate RV (say, $X\equiv 0$, if $0\in\calX$), and so,

968: the mutual information terms depending on $X$ in conditions (c), (d) and (e), all

969: vanish, the best choice of $V$ is $V=Y$ (thus, the r.h.s in condition (c) becomes the capacity

970: of the channel $P_{Z|Y}$ with $K$ as SI at both ends),

971: and condition (f) may be interpreted as a (generalized) power

972: constraint (with power function $\phi(y)=d(0,y)$). Nonetheless, the new versions of conditions (a) and (b)

973: remain the same as in eqs.\ (\ref{13})

974: and (\ref{14}). This is to say that the violation of the separation principle

975: occurs even in the classical model of a communication system,

976: once security becomes an issue

977: and one is interested in the security of the reconstructed source.

978:

979: \section{Proof of the Converse Part of Theorem 4}

980:

981: Let an $(n,\lambda,D+\epsilon,D'+\epsilon,R_c+\epsilon,

982: R_c'+\epsilon,h-\epsilon,h'-\epsilon)$ code be given.

983: First, from the requirement $H(Y^n|K^n)\le n (R_c'+\epsilon)$, we have:

984: \begin{eqnarray}

985: n(R_c'+\epsilon) &\ge& H(Y^n|K^n)\label{1st}\\

986: &=&H(Y^n|U^N,K^n)+I(U^N;Y^n|K^n)\nonumber\\

987: &\ge&H(Y^n|U^N,K^n)+I(U^N;Z^n|K^n)\nonumber\\

988: &=&H(Y^n|U^N,K^n)+I(U^N;Z^n,K^n) \label{2nd}

989: \end{eqnarray}

990: where the second inequality comes from the data processing theorem

991: ($U^N\to Y^n\to Z^n$ is a Markov chain given $K^n$)

992: and the last equality comes from the

993: chain rule and the fact that $U^N$ and $K^n$

994: are independent. Define

995: $\tilde{V}_t=(X_{t+1}^n,U^N,K^{t-1},Z^{t-1})$,

996: $J$ -- as a uniform RV over $\{1,\ldots,n\}$, $X=X_J$, $K=K_J$, $Y=Y_J$,

997: $V'=\tilde{V}_J$, and $V=(\tilde{V}_J,J)=(V',J)$.

998: Now, the first term on the

999: right--most side of eq.\ (\ref{2nd}) is further lower bounded

1000: in the following manner.

1001: \begin{eqnarray}

1002: H(Y^n|U^N,K^n)&\ge&I(X^n;Y^n|U^N,K^n)\nonumber\\

1003: &=&I(X^n;Y^n,U^N,K^n)-I(X^n;U^N,K^n)\nonumber\\

1004: &=&\sum_{t=1}^n I(X_t;Y^n,U^N,K^n|X_{t+1}^n)-I(X^n;K^n)\label{xp1}\\

1005: &=&\sum_{t=1}^n I(X_t;Y^n,U^N,K^n,X_{t+1}^n)-nI(X;K)\label{xp2}\\

1006: &\ge&\sum_{t=1}^n

1007: I(X_t;K_t,Y_t,U^N,K^{t-1},Z^{t-1},X_{t+1}^n)-nI(X;K)\label{xp3}\\

1008: &=&\sum_{t=1}^n I(X_t;K_t,Y_t,\tilde{V}_t)-nI(X;K)\nonumber\\

1009: &=&n[I(X;K,Y,V'|J)-I(X;K)]\nonumber\\

1010: &=&n[I(X;K,Y,V',J)-I(X;K)]\label{xp4}\\

1011: &=&nI(X;Y,V|K)\label{4th}

1012: \end{eqnarray}

1013: where (\ref{xp1}) is due to the chain

1014: rule and fact that $(X^n,K^n)$ is independent of

1015: $U^N$ (hence $U^N\to K^n\to X^n$ is trivially a Markov chain),

1016: (\ref{xp2}) is due to the memorylessness of $\{(X_t,K_t)\}$,

1017: (\ref{xp3}) is due to the data processing theorem,

1018: and (\ref{xp4}) follows from the fact that $\{X_t\}$ is stationary

1019: and so, $X=X_J$ is independent of $J$.

1020: The second term on the right--most

1021: side of eq.\ (\ref{2nd}) is in turn lower bounded following

1022: essentially the same ideas as in the proof of the converse

1023: to the rate--distortion coding theorem (see, e.g., \cite{CT91}):

1024: \begin{eqnarray}

1025: I(U^N;Z^n,K^n)&=&H(U^N)-H(U^N|Z^n,K^n)\nonumber\\

1026: &=&\sum_{i=1}^N[H(U_i)-H(U_i|U^{i-1},Z^n,K^n)]\nonumber\\

1027: &=&\sum_{i=1}^N I(U_i;U^{i-1},Z^n,K^n)\nonumber\\

1028: &\ge&\sum_{i=1}^N I(U_i;[g_n(Z^n,K^n)]_i)\nonumber\\

1029: &\ge&\sum_{i=1}^N R_U(Ed'(U_i,[g_n(Z^n,K^n)]_i))\nonumber\\

1030: &\ge&NR_U\left(\frac{1}{N}\sum_{i=1}^N

1031: Ed'(U_i,[g_n(Z^n,K^n)]_i)\right)\nonumber\\

1032: &\ge&NR_U(D'+\epsilon),\label{5th}

1033: \end{eqnarray}

1034: where $[g_n(Z^n,K^n)]_i$ denotes the $i$-th component projection

1035: of $g_n(Z^n,K^n)$, i.e., $\hU_i$ as a function of $(Z^n,K^n)$.

1036: Combining eqs.\ (\ref{2nd}), (\ref{4th}), and (\ref{5th}), we get

1037: \begin{equation}

1038: n(R_c'+\epsilon)\ge NR_U(D'+\epsilon)+nI(X;Y,V|K).

1039: \end{equation}

1040: Dividing by $n$, we get

1041: \begin{equation}

1042: \label{6th}

1043: R_c'+\epsilon\ge \lambda R_U(D'+\epsilon)+I(X;Y,V|K).

1044: \end{equation}

1045: Using the arbitrariness of $\epsilon$ together

1046: with the continuity of

1047: $R_U(\cdot)$, we get condition (e) of Theorem 4.

1048:

1049: Condition (d) is derived in the very same manner except that

1050: the starting point is the inequality $n(R_c+\epsilon)\ge H(Y^n)$,

1051: and when $H(Y^n)$ is further bounded from below, in analogy

1052: to the chain of inequalities (\ref{2nd}), there is

1053: an additional term, $I(K^n;Y^n)$,

1054: that is in turn

1055: lower bounded in the following manner:

1056: \begin{eqnarray}

1057: I(K^n;Y^n)&\ge&\sum_{t=1}^n I(K_t;Y_t)\nonumber\\

1058: &=&nI(K;Y|J)\nonumber\\

1059: &=&n[H(K|J)-H(K|J,Y)]\nonumber\\

1060: &\ge&n[H(K)-H(K|Y)]\nonumber\\

1061: &=&nI(K;Y),

1062: \end{eqnarray}

1063: where the first inequality is because of the memorylessness of $\{K_t\}$,

1064: and the second inequality comes from the facts that

1065: conditioning reduces entropy (in the second term)

1066: and that $K$ is independent of

1067: $J$ (again, due to the stationarity of $\{K_t\}$).

1068: This gives the additional term, $I(K;Y)$, in condition (d).

1069:

1070: Condition (c) is obtained as follows:

1071: \begin{eqnarray}

1072: NR_U(D'+\epsilon)&\le&I(U^N;K^n,Z^n)\nonumber\\

1073: &=&I(U^N;K^n,Z^n)-I(U^N;K^n,X^n)\nonumber\\

1074: &\le&\sum_{t=1}^n[I(\tilde{V}_t;K_t,Z_t)-I(\tilde{V}_t;K_t,X_t)]\\

1075: &=&n[I(V';K,Z|J)-I(V';K,X|J)]\nonumber\\

1076: &\le&n[I(V',J;K,Z)-I(V',J;K,X)]\\

1077: &=&n[I(V;K,Z)-I(V;K,X)]\nonumber\\

1078: &=&n[I(V;Z|K)-I(V;X|K)],

1079: \end{eqnarray}

1080: where

1081: the first inequality is (\ref{5th}),

1082: the first equality is due to the independence between $U^N$ and $(K^n,X^n)$,

1083: the second inequality is

1084: an application of \cite[Lemma 4]{GP80},

1085: the third inequality is due to the fact

1086: that $I(K,Z;J)\ge 0$ and $I(K,X;J)=0$ (due to the

1087: stationarity of $\{(K_t,X_t)\}$), and

1088: the last equality is obtained by adding and subtracting

1089: $I(V;K)$. Again, since this is true for every $\epsilon > 0$,

1090: it holds also for $\epsilon=0$, due to continuity.

1091:

1092: As for condition (f), we have:

1093: \begin{equation}

1094: D+\epsilon\ge\frac{1}{n}\sum_{t=1}^nEd(X_t,Y_t)=Ed(X,Y),

1095: \end{equation}

1096: and we use once again the arbitrariness of $\epsilon$.

1097: Regarding condition (b), we have:

1098: \begin{eqnarray}

1099: nH(K|Y)&\ge&nH(K|Y,J)\nonumber\\

1100: &=&\sum_{t=1}^nH(K_t|Y_t)\nonumber\\

1101: &\ge&\sum_{t=1}^nH(K_t|K^{t-1},Y^n)\nonumber\\

1102: &=& H(K^n|Y^n)\nonumber\\

1103: &=& H(K^n|Y^n,Z^n)\nonumber\\

1104: &\ge&I(K^n;\hat{U}^N|Y^n,Z^n)\nonumber\\

1105: &=&H(\hat{U}^N|Y^n,Z^n)-H(\hat{U}^N|Y^n,Z^n,K^n)\nonumber\\

1106: &=&H(\hat{U}^N|Y^n,Z^n)\nonumber\\

1107: &\ge&N(h'-\epsilon),

1108: \end{eqnarray}

1109: where the last equality is due to the fact that $\hat{U}^N$

1110: is, by definition, a function of $(Z^n,K^n)$, and the last

1111: inequality is by the hypothesis that the code achieves an equivocation of

1112: at least $N(h'-\epsilon)$. Dividing by $N$

1113: and taking the limit $\epsilon\to 0$,

1114: leads to

1115: $h'\le H(K|Y)/\lambda$, which is condition (b).

1116: Finally, to prove condition (a), consider the inequality

1117: $nH(K|Y)\ge H(\hat{U}^N|Y^n,Z^n)$, that we have just proved,

1118: and proceed as follows (see also \cite{Yamamoto97}):

1119: \begin{eqnarray}

1120: \label{8th}

1121: nH(K|Y)&\ge&H(\hat{U}^N|Y^n,Z^n)\nonumber\\

1122: &\ge&H(\hat{U}^N|Y^n,Z^n)+N(h-\epsilon)-H(U^N|Y^n,Z^n)\nonumber\\

1123: &=&N(h-\epsilon)-H(U^N)+I(U^N;Y^n,Z^n)-\nonumber\\

1124: & &I(\hat{U}^N;Y^n,Z^n)+I(\hat{U}^N;U^N)+H(\hat{U}^N|U^N)\nonumber\\

1125: &\ge&N[h-\epsilon-H(U)+R_U(D'+\epsilon)]+\nonumber\\

1126: & &[I(U^N;Y^n,Z^n)-I(\hat{U}^N;Y^n,Z^n)+H(\hat{U}^N|U^N)],

1127: \end{eqnarray}

1128: where the second inequality follows from the hypothesis that

1129: the code satisfies $H(U^N|Y^n,Z^n)\ge N(h-\epsilon)$,

1130: and the third inequality is due to the memorylessness of $\{U_i\}$, the

1131: hypothesis that $\sum_{i=1}^NEd'(U_i,\hU_i)\le N(D'+\epsilon)$,

1132: and the converse to the rate--distortion coding theorem.

1133: Now, to see that the second bracketed term is non--negative, we have the

1134: following chain of inequalities:

1135: \begin{eqnarray}

1136: \label{bra}

1137: && I(U^N;Y^n,Z^n)-I(\hat{U}^N;Y^n,Z^n)+H(\hat{U}^N|U^N)\nonumber\\

1138: &=&I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n|\hat{U}^N)

1139: +H(\hat{U}^N|U^N)\nonumber\\

1140: &\ge& I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n|U^N,\hat{U}^N)

1141: +H(\hat{U}^N|U^N)\nonumber\\

1142: &=& I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n,\hat{U}^N|U^N)\nonumber\\

1143: &\ge& I(U^N;Y^n,Z^n)-H(Y^n,Z^n)+H(Y^n,Z^n|U^N)\nonumber\\

1144: &=& 0.

1145: \end{eqnarray}

1146: Combining this with eq.\ (\ref{8th}), we have

1147: \begin{equation}

1148: nH(K|Y)\ge

1149: N[h-\epsilon-H(U)+R_U(D'+\epsilon)].

1150: \end{equation}

1151: Dividing again by $N$, and letting $\epsilon$ vanish, we obtain

1152: $h\le H(K|Y)/\lambda+H(U)-R_U(D')$, which

1153: completes the proof of condition (a).

1154:

1155: To complete the proof of the converse part, it remains to show that the

1156: alphabet size of $V$ can be reduced to $|\calK|\cdot|\calX|\cdot|\calY|+1$.

1157: To this end, we extend the proof of the parallel argument in \cite{MM04}

1158: by using the support lemma (cf.\ \cite{CK81}), which is based on

1159: Carath\'{e}odory's theorem. According to this lemma, given $J$ real

1160: valued continuous functionals $f_{j}$, $j=1,...,J$ on the set

1161: $\calP(\calX)$ of probability distributions over the alphabets

1162: $\calX$, and given any probability measure $\mu$ on the Borel

1163: $\sigma$-algebra of $\calP(\calX)$, there exist $J$ elements

1164: $Q_{1},...,Q_{J}$ of $\calP(\calX)$ and $J$ non-negative reals,

1165: $\alpha_{1},...,\alpha_{J}$, such that

1166: $\sum_{j=1}^{J}\alpha_{j}=1$ and for every $j=1,...,J$

1167: \begin {eqnarray}

1168:     \int_{\calP(\calX)}f_{j}(Q)\mu(dQ) =

1169:     \sum_{i=1}^{J}\alpha_{i}f_{j}(Q_{i}).

1170: \end {eqnarray}

1171: Before we actually apply the support lemma, we first rewrite the

1172: relevant mutual informations of Theorem 4 in a more convenient

1173: form for the use of this lemma. First, observe that

1174: \begin {eqnarray}

1175:     I(V;Z|K)-I(V;X|K)   & = & H(Z|K)-H(Z|V,K) - H(X|K) + H(X|V,K)\nonumber\\

1176:                         & = & H(Z|K)-H(X|K) + H(K,X|V)-H(K,Z|V).

1177: \end {eqnarray}

1178: and

1179: \begin {eqnarray}

1180: I(X;Y,V|K)  & = & I(X;V|K) + I(X;Y|V,K) \\

1181:                         & = & H(X|K) - H(X|V,K) +

1182:                         H(X|V,K)-H(X|V,Y,K) \nonumber \\

1183:                         & = & H(X|K)-H(X|V,Y,K) \nonumber \\

1184:                         & = & H(X|K)-H(K,X,Y|V)+H(K,Y|V).

1185: \end {eqnarray}

1186: For a given joint distribution of

1187: $(K,X,Y)$, and given $P_{Z|Y}$, $H(Z|K)$ and $H(X|K)$ are

1188: both given and unaffected by $V$. Therefore, in order to preserve

1189: prescribed values of $I(V;Z|K)-I(V;X|K)$ and $I(X;V,Y|K)$, it is

1190: sufficient to preserve the associated values $H(K,X|V) - H(K,Z|V)$

1191: and $H(K,X,Y|V) - H(K,Y|V)$.

1192: Let us define then the following functionals of a generic

1193: distribution $Q$ over $\calK\times\calX\times\calY$,

1194: where $\calK\times\calX \times

1195: \calY$ is assumed, without loss of generality, to be

1196: $\{1,2,...,m\}$, $m = |\calK|\cdot|\calX|\cdot|\calY|$:

1197: \begin{eqnarray}

1198:     &&f_{i}(Q) = Q(k,x,y), ~~~i\dfn (k,x,y)=1,...,m-1\\

1199:     &&f_{m}(Q) =

1200:     \sum_{k,x,y}Q(k,x,y)\sum_{z}P_{Z|Y}(z|y)

1201: \log\frac{\sum_{x,y}Q(k,x,y)P_{Z|Y}(z|y)}{Q(k,x)}.

1202: \end{eqnarray}

1203: Next define

1204: \begin{eqnarray}

1205:     &&f_{m+1}(Q) =

1206:     \sum_{k,x,y}Q(k,x,y)\log\frac{Q(k,y)}{Q(k,x,y)}.

1207: \end{eqnarray}

1208: Applying now the support lemma, we find that there exists a random

1209: variable $V$ (jointly distributed with $(K,X,Y)$), whose alphabet

1210: size is $|\calV| = m+1 = |\calK|\cdot|\calX|\cdot|\calY|+1$ and it satisfies

1211: simultaneously:

1212: \begin{eqnarray}

1213:     \sum_{v}\Pr\{V = v\}f_{i}(P(\cdot|v)) = P_{KXY}(k,x,y), \textrm{ }

1214:     i=1,...,m-1,

1215: \end{eqnarray}

1216: \begin{eqnarray}

1217:     \sum_{v}\Pr\{V = v\}f_{m}(P(\cdot|v)) = H(K,X|V) - H(K,Z|V),

1218: \end{eqnarray}

1219: and

1220: \begin{eqnarray}

1221:     \sum_{u}\Pr\{V = v\}f_{m+1}(P(\cdot|v)) = H(K,X,Y|V) -

1222:     H(K,Y|V).

1223: \end{eqnarray}

1224: It should be pointed out that this random variable maintains the

1225: prescribed distortion level $Ed(X,Y)$ since

1226: $P_{XY}$ is preserved. By the same token, $H(K|Y)$ and $I(K;Y)$, which depend

1227: only on $P_{KY}$, are preserved as well.

1228: This completes the proof of the

1229: converse part of Theorem 4.

1230:

1231: \section{Proof of the Direct Part of Theorem 4}

1232:

1233: In this section, we show that if there exist RV's $(V,Y)$ that satisfy the

1234: conditions of Theorem 4, then for every $\epsilon > 0$,

1235: there is a sufficiently large $n$ for which

1236: $(n,\lambda,D+\epsilon,D'+\epsilon,R_c+\epsilon,

1237: R_c'+\epsilon,h-\epsilon,h'-\epsilon)$ codes exist.

1238: One part of the proof is strongly based

1239: on a straightforward extension of the proof of the direct part of

1240: \cite{MM04} to the case

1241: of additional SI present at both encoder and decoder. Nonetheless,

1242: for the sake of completeness, the full details are provided here.

1243: It should be pointed out that for the attack--free case, an analogous

1244: extension can easily be offered to the direct part of \cite{MM03}.

1245:

1246: We first digress to establish some additional notation

1247: conventions associated with the method of types \cite{CK81}. For a

1248: given generic

1249: finite--alphabet random variable (RV) $A \in \calA$ (or a vector of

1250: RV's taking on values in $\calA$), and a vector $a^\ell \in

1251: \calA^{\ell}$ ($\ell$ -- positive integer),

1252: the empirical probability mass function (EPMF) is a

1253: vector $P_{a^\ell}=\{P_{a^\ell}(a'),~a' \in \calA\}$, where $P_{a^\ell}(a')$ is

1254: the relative frequency of the letter $a' \in \calA$ in the vector

1255: $a^\ell$. Given $\delta > 0$, let us denote the set of all

1256: $\delta$-typical sequences of length $\ell$ by

1257: $T_{P_A}^\delta$, or by $T_A^\delta$

1258: (if there is no ambiguity regarding the

1259: PMF that governs $A$), i.e., $T_A^\delta$

1260: is the set of the sequences $a^\ell \in

1261: \calA^\ell$ such that

1262: \begin{equation}

1263: \label{Px}

1264:     (1-\delta)P_{A}(a') \leq P_{a^\ell}(a') \leq (1+\delta)P_{A}(a')

1265: \end{equation}

1266: for every $a' \in \calA$. For sufficiently large $\ell$,

1267: the size of $T_A^\delta$ is well--known \cite{CK81} to be bounded by

1268: \begin {equation}

1269: \label{TgxSize}

1270:     2^{\ell[(1-\delta)H(A)-\delta]} \leq

1271:     |T_A^\delta| \leq 2^{\ell(1+\delta)H(A)}.

1272: \end{equation}

1273: It is also well--known (by the weak law of large numbers)

1274: that:

1275: \begin{equation}

1276: \label{PrTgx}

1277:     \Pr \big\{ A^\ell \notin T_A^\delta \big\} \leq \delta

1278: \end{equation}

1279: for all $\ell$ sufficiently large.

1280: For a given generic channel $P_{B|A}(b|a)$ and for each $a^\ell \in

1281: T_A^\delta$, the set

1282: of all sequences $b^l$ that are jointly

1283: $\delta$-typical with $a^\ell$, will be denoted by

1284: $T_{P_{B|A}}^\delta(a^\ell)$,

1285: or by $T_{B|A}^\delta(a^\ell)$ if there is no ambiguity, i.e.,

1286: $T_{B|A}^\delta(a^\ell)$ is the set of all $b^\ell$ such that:

1287: \begin{equation}

1288: \label{Pygx}

1289:     (1-\delta)P_{a^\ell}(a')P_{B|A}(b'|a') \leq P_{a^\ell b^\ell}(a',b') \leq

1290:     (1+\delta)P_{a^\ell}(a')P_{B|A}(b'|a'),

1291: \end{equation}

1292: for all $a'\in \calA, b'\in \calB$, where $P_{a^\ell b^\ell}(a',b')$

1293: denotes the fraction of occurrences of the pair $(a',b')$ in

1294: $(a^\ell,b^\ell)$. Similarly as in eq.\ (\ref{Px}), for all sufficiently large $\ell$ and

1295: $a^\ell \in T_A^\delta$, the size of $T_{B|A}^\delta(a^\ell)$ is bounded as follows:

1296: \begin {equation}

1297: \label{TgyxSize}

1298:     2^{\ell[(1-\delta)H(B|A)-\delta]} \leq

1299:     |T_{B|A}^\delta(a^\ell)| \leq 2^{\ell(1+\delta)H(B|A)}.

1300: \end{equation}

1301: Finally, observe that for all $a^\ell \in T_A^\delta$ and $b^\ell

1302: \in T_{B|A}^\delta(a^\ell)$, the distortion

1303: $d(a^\ell,b^\ell)=\sum_{j=1}^\ell d(a_j,b_j)$ is upper bounded by:

1304: \begin{equation}

1305: \label{d_theoretic}

1306:     d(a^\ell,b^\ell) \leq \ell(1+\delta)^{2}\sum_{a',b'}P_{A}(a')P_{B|A}(b'|a')d(a',b')

1307: \dfn \ell(1+\delta)^{2}Ed(A,B).

1308: \end{equation}

1309:

1310: Let $(K,X,V,Y,Z)$ be a given random vector that

1311: satisfies the conditions of Theorem 4.

1312: We now describe the mechanisms of random code selection

1313: and the encoding and decoding operations. For a given $\epsilon > 0$, fix $\delta$ such that

1314: $2\delta+\max\{2\cdot\exp\{-2^{n\delta}\}+2^{-n\delta},\delta^{2}\}

1315: \leq \epsilon$. Define also

1316: \begin{equation}

1317: \epsilon_1\dfn \delta[1+H(V|K)+H(V|K,X)],

1318: \end{equation}

1319: \begin{equation}

1320: \epsilon_2\dfn \delta[1+H(Y|K,V)+H(Y|K,X,V)],

1321: \end{equation}

1322: and

1323: \begin{equation}

1324: \epsilon_3\dfn\delta[1+H(V|K)+H(V|Z,K)].

1325: \end{equation}

1326: \\

1327: \\

1328: \noindent \textsl{Generation of a rate--distortion code}: \\

1329: Apply the type--covering lemma \cite{CK81} and

1330: construct a rate--distortion codebook that covers $T_U^\delta$

1331: within distortion

1332: $N(D'+\epsilon)$ w.r.t.\ $d'$, using $2^{NR_U(D')}$ codewords.

1333: \\

1334: \\

1335: \noindent \textsl{Generation of the encrypting bitstream}: \\

1336: For every $k^n\in T_K^\delta$, randomly select an index in the

1337: set $\{0,1,\ldots,2^{n[H(K|Y)+\delta]}-1\}$ with a uniform

1338: distribution. Denote by

1339: $s^J(k^n)=(s_1(k^n),\ldots,s_J(k^n))$, $s_j(k^n)\in\{0,1\}$, $j=1,\ldots,J$,

1340: the binary string of length $J=n[H(K|Y)+\delta]$ that represents

1341: this index. (Note that $s^J(k^n)$ can be interpreted as the output

1342: of the Slepian--Wolf encoder for $K^n$, where $Y^n$ plays the role

1343: of SI at the decoder \cite{SW73}.)

1344: \\

1345: \\

1346: \noindent \textsl{Generation of an auxiliary embedding code}: \\

1347: We first construct an auxiliary code capable of

1348: embedding $2^{NR_U(D')}$ watermarks by a random selection technique.

1349: First, $M_1=2^{nR_1}$, $R_1 = I(V;Z|K)-\epsilon_3-\delta$,

1350: sequences $\{V^n(i,k^n)\}$, $i\in\{1,\ldots,M_1\}$, are drawn

1351: independently from $T_{V|K}^\delta(k^n)$ for every $k^n\in T_{K}^\delta$.

1352: For every such $k^n$, let us denote the set of these

1353: sequences by $\calC(k^n)$. The elements of $\calC(k^n)$ are evenly

1354: distributed among $M_U \dfn 2^{NR_U(D')}$ bins, each of size $M_2

1355: = 2^{nR_2}$, $R_2 = I(X;V|K)+\epsilon_1 + \delta$ (this is possible

1356: thanks to condition (c) of Theorem 4, provided

1357: that the inequality therein is strict). A different

1358: (encrypted) message of length $L=NR_U(D')=n\lambda R_U(D')$ bits

1359: is attached to each bin, identifying a sub-code

1360: that represents this message. We denote the codewords in bin number $m$ ($m

1361: \in \{1,2,\ldots,M_U\}$), by $\{V^n(m,j,k^n)\}$, $j \in \{1,2,\ldots,M_2\}$.

1362: \\

1363: \\

1364: \noindent \textsl{Stegotext sequence generation}: \\

1365: \noindent For each auxiliary sequence (in the above auxiliary codebook

1366: of each $\delta$--typical $k^n$),

1367: $V^n(m,j,k^n)=v^n$, a set of $M_3 \dfn 2^{nR_3}$, $R_3 =

1368: I(X;Y|V,K)+\epsilon_2 + \delta$, stegotext sequences

1369: $\{Y^n(j',v^n,k^n)\}$, $j' \in \{1,\ldots,M_3\}$, are independently drawn from

1370: $T_{Y|VK}^\delta(v^n,k^n)$. We denote this set by $\calC(v^n,k^n)$.

1371: \\

1372: \\

1373: \noindent \textsl{Encoding}:\\

1374: \noindent Upon receiving a triple $(u^N,x^n,k^n)$, the encoder acts as

1375: follows:

1376: \begin{enumerate}

1377: \item If $u^N\in T_U^\delta$,

1378: let $w^L=(w_1,\ldots,w_L)$, $w_i\in\{0,1\}$, $i=1,\ldots,L$

1379: be the binary representation of the index of the rate--distortion

1380: codeword for the message source.

1381: For $k^n\in T_K^\delta$, let $s^J(k^n)=(s_1(k^n),\ldots,s_J(k^n))$

1382: denote binary representation string of the index of $k^n$.

1383: Let $\tilde{w}^L=(\tilde{w}_1,\ldots,\tilde{w}_L)$, where

1384: $\tilde{w}_j=w_j\oplus s_j(k^n)$,

1385: $j=1,\ldots,J$, and $\tilde{w}_j=w_j$, $j=J+1,\ldots,L$,

1386: and where $\oplus$ denotes modulo 2

1387: addition i.e., the XOR operation.\footnote{Note that since $H(K)$ is

1388: assumed smaller than $\lambda R_U(D')$,

1389: then so is $H(K|Y)$,

1390: and therefore $J\le L$.}

1391: The binary vector $\tilde{w}^L$ is the (partially) encrypted message to be

1392: embedded. Let $m=\sum_{l=1}^L\tilde{w}_l2^{l-1}+1$

1393: denote the index of this message.

1394: If $u^N\notin T_{U}^\delta$ or $k^n\notin T_{K}^\delta$, an arbitrary

1395: (error) message $\tilde{w}^L$ is generated (say, the all--zero message).

1396: \item If $(k^n,x^n) \in T_{KX}^\delta$

1397: find, in bin number $m$, the first $j$ such that

1398: $V^n(m,j,k^n)=v^n$ is jointly typical, i.e.,

1399: $(k^n,x^n,v^n) \in T_{KXV}^\delta$, and then find the

1400: first $j'$ such that $Y^n(j',v^n,k^n)=y^n

1401: \in \calC(v^n,k^n)$ is jointly typical, i.e.,

1402: $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$.

1403: This vector $y^n$ is chosen for transmission.

1404: If $(k^n,x^n) \notin T_{KX}^\delta$,

1405: or if there is no $V^n(m,j,k^n)=v^n$ and $Y^n(j',v^n,k^n)=y^n$ such that

1406: $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$,

1407: an arbitrary vector $y^n\in\calY^n$ is transmitted.

1408: \end {enumerate}

1409:

1410: \noindent \textsl{Decoding}:\\

1411: \noindent Upon receiving $Z^n = z^n$

1412: and $K^n=k^n$, the decoder finds all

1413: sequences $\{v^n\}$ in $\calC(k^n)$ such that $(k^n,v^n,z^n) \in

1414: T_{KVZ}^\delta$. If all $\{v^n\}$

1415: found belong to the same bin, say, $\hat{m}$,

1416: then $\hat{m}$ is decoded as the

1417: embedded message, and then the binary representation

1418: vector $\hat{w}^L=(\hat{w}_1,\ldots,\hat{w}_L)$ corresponding to $\hat{m}$ is

1419: decrypted, again, by modulo 2 addition

1420: of its first $J$ bits with $s^J(k^n)$.

1421: This decrypted binary $L$--vector is

1422: then mapped to the corresponding reproduction

1423: vector $\tilde{u}^N$ of the rate--distortion codebook

1424: for the message source.

1425: If there is no $v^n\in\calC(k^n)$

1426: such that $(k^n,v^n,z^n) \in T_{KVZ}^\delta$ or if

1427: there exist two or more bins that contain such a sequence, an

1428: error is declared.

1429: \\

1430: \\

1431: \noindent We now turn to the performance

1432: analysis of this code in all relevant aspects.

1433: For each triple $(k^n,x^n,u^N)$ and particular choices of

1434: the codes, the possible causes for incorrect

1435: watermark decoding are the following:

1436:

1437: \begin{enumerate}

1438:   \item $(k^n,x^n,u^N) \notin T_{KX}^\delta\times T_{U}^\delta$.

1439:    Let the probability of this event be defined as $P_{e_{1}}$.

1440:   \item $(k^n,x^n,u^N)

1441:     \in T_{KX}^\delta\times T_{U}^\delta$, but in bin no.\ $m$

1442:     there is no $v^n$ s.t. $(k^n,x^n,v^n) \in T_{KXV}^\delta$.

1443:     Let the probability of this event be defined as $P_{e_{2}}$.

1444:   \item $(k^n,x^n,u^N)

1445:     \in T_{KX}^\delta\times T_{U}^\delta$ and in bin no.\ $m$

1446:     there is $v^n$ s.t. $(k^n,x^n,v^n) \in T_{KXV}^\delta$, but there is no

1447:     $y^n \in \calC(v^n,k^n)$ s.t. $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$.

1448:     Let the probability of this event be defined as $P_{e_{3}}$.

1449:   \item $(k^n,x^n,u^N)

1450:     \in T_{KX}^\delta\times T_{U}^\delta$ and in bin no.\ $m$

1451:     there is $v^n$ and $y^n

1452:     \in \calC(v^n,k^n)$ such that $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$,

1453:     but $(k^n,v^n,z^n) \notin T_{KVZ}^\delta$.

1454:     Let the probability of this event be defined as $P_{e_{4}}$.

1455:   \item $(k^n,x^n,u^N)

1456:     \in T_{KX}^\delta\times T_{U}^\delta$ and in bin no.\ $m$

1457:     there is $v^n$ and $y^n

1458:     \in \calC(v^n,k^n)$ such that $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$,

1459:     and $(k^n,v^n,z^n) \in T_{KVZ}^\delta$, but

1460:     there exists another bin, say, no.\ $\tilde{m}$,

1461:     that contains $\tilde{v}^n$ s.t. $(k^n,\tilde{v}^n,z^n)\in T_{KVZ}^\delta$.

1462:     Let the probability of this event be defined as $P_{e_{5}}$.

1463: \end{enumerate}

1464: If none of these events occur, the message

1465: $\tilde{w}^L$ (or, equivalently, $m$) is decoded

1466: correctly from $z^n$, the distortion constraint between $x^n$

1467: and $y^n$ is within $n(D+\epsilon)$ (as follows from (\ref{d_theoretic})), and

1468: the distortion between $u^N$ and its rate--distortion codeword,

1469: $\tilde{u}^N=\hat{u}^N$, does not exceed $N(D'+\epsilon)$.

1470: Thus, requirements 1 and 4 (modified according

1471: to eq.\ (\ref{distp}), with $D'+\epsilon$ replacing $D'$) are both satisfied.

1472: Therefore, we first prove that the probability for none of the

1473: events 1--5 to occur, tends to unity as $n\to\infty$.

1474:

1475: The average probability of error $P_{e}$ in decoding $m$ is bounded by

1476: \begin {equation}

1477: \label{Pe}

1478:     P_{e} \leq \sum_{i=1}^5 P_{e_i}.

1479: \end {equation}

1480: The fact that $P_{e_{1}}\rightarrow 0$ follows immediately from

1481: (\ref{PrTgx}). As for $P_{e_{2}}$, we have:

1482: \begin {equation}

1483: \label{P_e2a}

1484: P_{e_{2}} \dfn \prod_{j=1}^{M_2}

1485: \Pr\{(k^n,x^n,V^n(m,j,k^n)) \notin T_{KXV}^\delta\}.

1486: \end {equation}

1487: Now, by (\ref{TgxSize}), for every $j$ and every

1488: $(k^n,x^n)\in T_{KX}^\delta$:

1489: \begin {eqnarray}

1490: \label{P_e2i}

1491:     \Pr\{V^n(m,j,k^n) \notin T_{V|KX}^\delta(k^n,x^n)\} & = & 1 -

1492: \Pr\{V^n(m,j,k^n) \in T_{V|KX}^\delta(k^n,x^n)\} \nonumber\\

1493: & = & 1 - \frac{|T_{V|KX}^\delta(k^n,x^n)|}{|T_{V|K}^\delta(k^n)|} \nonumber\\

1494:     & \leq & 1 -

1495: \frac{2^{n[(1-\delta)H(V|K,X)-\delta]}}

1496: {2^{n(1+\delta)H(V|K)}} \nonumber\\

1497:     & = &1 - 2^{-n[I(X;V|K)+\epsilon_1]}.

1498: \end {eqnarray}

1499: Substitution of

1500: (\ref{P_e2i}) into (\ref{P_e2a}) provides us with the following

1501: upper bound:

1502: \begin {equation}

1503: P_{e_{2}} \leq \Big[1 - 2^{-n[I(X;V|K)+\epsilon_1]}\Big]^{M_2} \leq

1504: \exp\bigg\{-2^{nR_2}\cdot2^{-n[I(X;V|K)+\epsilon_1]}\bigg\} \rightarrow 0,

1505: \end {equation}

1506: double--exponentially rapidly since $R_2 = I(X;V|K)+\epsilon_1 +

1507: \delta$.

1508: To estimate $P_{e_{3}}$, we repeat the same technique:

1509: \begin {equation}

1510: \label{P_e3a}

1511: P_{e_{3}} \dfn \prod_{j'=1}^{M_3}\Pr\{(k^n,x^n,v^n,Y^n(j',v^n,k^n))

1512: \notin T_{KXVY}^\delta\}.

1513: \end {equation}

1514: Again, by the property of the typical sequences, for every $j'$ and

1515: $(k^n,x^n,v^n)\in T_{KXV}^\delta$:

1516: \begin {eqnarray}

1517: \label{P_e3Cu}

1518:     \Pr\{Y^n(j',v^n,k^n) \notin T_{Y|KXV}^\delta(k^n,x^n,v^n)\}

1519: \leq 1 - 2^{-n[I(X;Y|V,K)+\epsilon_2]},

1520: \end {eqnarray}

1521: and therefore,

1522: substitution of (\ref{P_e3Cu}) into (\ref{P_e3a}) gives

1523: \begin {equation}

1524: P_{e_{3}} \leq \Big[1 - 2^{-n[I(X;Y|V,K)+\epsilon_2]}\Big]^{M_3} \leq

1525: \exp\bigg\{-2^{nR_3}\cdot2^{-n[I(X;Y|V,K)+

1526: \epsilon_2]}\bigg\} \rightarrow 0,

1527: \end {equation}

1528: double--exponentially rapidly since $R_3 =

1529: I(X;Y|V,K)+\epsilon_2 + \delta$.

1530: The estimation of $P_{e_{4}}$ is again based on properties

1531: of typical sequences. Since $Z^n$ is the output of

1532: a memoryless channel $P_{Z|Y}$ with input

1533: $y^n=Y^n(j',v^n,k^n)$ and by the assumption of this step

1534: $(k^n,x^n,v^n,y^n) \in T_{KXVY}^\delta$, from (\ref{PrTgx})

1535: and the Markov lemma \cite[Lemma 14.8.1]{CT91}, we obtain

1536: \begin {equation}

1537: \label{Pe4}

1538:     P_{e_{4}} = \Pr\{(k^n,x^n,v^n,y^n,Z^n)

1539:                 \notin T_{KXVYZ}^\delta\} \leq \delta,

1540: \end {equation}

1541: and similarly to $P_{e_{1}}$, $P_{e_4}$ can be made as small as desired by an

1542: appropriate choice of $\delta$.

1543:

1544: Finally, we estimate $P_{e_{5}}$ as follows:

1545: \begin {eqnarray}

1546:     P_{e_{5}} & = & \Pr\{\exists \tilde{m} \neq m:

1547:         (k^n,V^n(\tilde{m},j,k^n),z^n) \in T_{KVZ}^\delta\} \\

1548:     & \leq & \sum_{\tilde{m} \neq m,~j\in\{1,2,...,M_2\}}

1549: \Pr\{(k^n,V^n(\tilde{m},j,k^n),z^n) \in T_{KVZ}^\delta\}

1550:         \nonumber \\

1551: &=& (2^{NR_U(D')}-1)2^{nR_2}\Pr\{(k^n,V^n(\tilde{m},j,k^n),z^n)

1552: \in T_{KVZ}^\delta\}

1553: \nonumber \\

1554:     & \leq & 2^{nR_1}2^{-n[I(V;Z|K)-\epsilon_3]}.

1555: \end {eqnarray}

1556: Now, since $R_1

1557: = I(V;Z|K)-\epsilon_3-\delta$, $P_{e_{5}} \rightarrow 0$.

1558: Since $P_{e_{i}}\rightarrow 0$ for $i=1,\ldots,5$,

1559: their sum tends to zero as well, implying that there exist at

1560: least one choice of an auxiliary code and related stegotext codes

1561: that give rise to the reliable

1562: decoding of $\tilde{W}^L$.

1563:

1564: Now, let us denote by $N_{c}$ the total number of composite

1565: sequences in a codebook that corresponds

1566: to a $\delta$--typical $k^n$. Then,

1567: \begin {eqnarray}

1568: N_c&=&M_U\cdot M_2\cdot M_3\nonumber\\

1569: &=&2^{n[\lambda R_U(D')+I(X;V|K)+I(X;Y|V,K)

1570: +\epsilon_1+\epsilon_2+2\delta]}\nonumber\\

1571: &=&2^{n[\lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+2\delta]}.

1572: \end {eqnarray}

1573: Thus,

1574: \begin{eqnarray}

1575: H(Y^n|K^n)&\leq&\log N_c\nonumber\\

1576: &=&n[\lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+2\delta]\nonumber\\

1577: &\le& n(R_c'+\epsilon_1+\epsilon_2+2\delta),

1578: \end{eqnarray}

1579: where in the last inequality we have used condition (e).

1580: For sufficiently small values of $\delta$ (and hence of $\epsilon_1$ and $\epsilon_2$)

1581: $\epsilon_1+\epsilon_2+2\delta\le \epsilon$ and so, the compressibility

1582: requirement in the presence of $K^n$ is satisfied.

1583:

1584: We next prove the achievability of $R_c$. Let us consider the set of

1585: $\delta$--typical key sequences $T_K^\delta$, and view it as the

1586: union of $0$--typical sets

1587: (i.e., $\delta$--typical sets

1588: with $\delta=0$),

1589: $\{T_{Q_K}^0\}$, where $Q_K$ exhausts the set of all rational PMF's

1590: with denominator $n$, and with the property

1591: \begin{equation}

1592: (1-\delta)P_K(k)\le Q_K(k)\le (1+\delta)P_K(k),~~~\forall k\in\calK .

1593: \end{equation}

1594: Suppose that we have already randomly selected a codebook for

1595: one {\it representative} member $\hat{k}^n$ of each

1596: type class $T_{Q_K}^0\subset

1597: T_K^\delta$

1598: using the mechanism described above.

1599: Now, consider the

1600: set of all permutations from $\hat{k}^n$ to every other member of

1601: $T_{Q_K}^0$.

1602: The auxiliary codebook and the stegotext

1603: codebooks for every other key sequence, $k^n\in T_{Q_K}^0$

1604: will be obtained by permuting all (auxiliary and stegotext) codewords of those

1605: corresponding to $\hat{k}^n$ according

1606: to the same permutation that leads from $\hat{k}^n$ to $k^n$ (thus preserving

1607: all the necessary joint typicality properties).

1608: Now, in the {\it union} of all stegotext codebooks,

1609: corresponding to all typical key

1610: sequences, each codeword will appear

1611: at least $(n+1)^{-|\calK|\cdot|\calY|}

1612: \cdot 2^{n[(1-\delta)H(K|Y)-\delta]}$ times,

1613: which is a lower bound to the number of permutations of

1614: $\hat{k}^n$ which leave a

1615: given stegotext codeword $y^n$ unaltered.

1616: The total number of stegotext codewords, $N_Y$,

1617: in all codebooks of all $\delta$--typical key sequences (including

1618: repetitions) is upper bounded by

1619: \begin{eqnarray}

1620: \label{Nyu}

1621: N_Y&=&|T_K^\delta|\cdot N_c\nonumber\\

1622: &\le&2^{n[(1+\delta)H(K)+\delta]}\cdot

1623: 2^{n[\lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+2\delta]}\nonumber\\

1624: &=&2^{n[H(K)+

1625: \lambda R_U(D')+I(X;Y,V|K)+\epsilon_1+\epsilon_2+\delta(H(K)+3)]}.

1626: \end{eqnarray}

1627: Let $\calC$ denote the

1628: union of all stegotext codebooks, namely, the set of all

1629: {\it distinct} stegotext vectors

1630: across all codebooks corresponding to all $k^n\in T_K^\delta$, and

1631: let $N(y^n)$ denote the number of occurrences of a given vector

1632: $y^n\in\calY^n$ in all stegotext codebooks. Then,

1633: in view of the above combinatorial consideration, we have

1634: \begin{equation}

1635: \label{Nyl}

1636: N_Y=\sum_{y^n\in\calC} N(y^n)\ge |\calC|\cdot

1637: (n+1)^{-|\calK|\cdot|\calY|}\cdot

1638: 2^{n[(1-\delta)H(K|Y)-\delta]}.

1639: \end{equation}

1640: Combining eqs.\ (\ref{Nyu}) and (\ref{Nyl}), we have

1641: \begin{equation}

1642: \label{66}

1643: \log|\calC|\le n[\lambda R_U(D')+I(X;Y,V|K)+I(K;Y)+\delta'],

1644: \end{equation}

1645: where

1646: \begin{equation}

1647: \delta'=\epsilon_1+\epsilon_2+\delta(H(K)+H(K|Y)+4)+

1648: |\calK|\cdot|\calY|\cdot\frac{\log(n+1)}{n},

1649: \end{equation}

1650: which is arbitrarily small provided that

1651: $\delta$ is sufficiently small and $n$ is sufficiently large.

1652: Thus, the rate required for

1653: public compression of $Y^n$ (without the key),

1654: which is $(\log|\calC|)/n$,

1655: is arbitrarily close to

1656: $[\lambda R_U(D_1)+I(X;Y,V|K)+I(K;Y)]$,

1657: which in turn is upper bounded by $R_c$,

1658: by condition (d) of Theorem 4.

1659:

1660: Before we proceed to evaluate

1661: the equivocation levels,

1662: an important comment is in order in the

1663: context of public compression (and a similar

1664: comment will apply to private compression):

1665: Note that a straightforward

1666: (and not necessary optimal) method for public compression

1667: of $Y^n$ is simply according to its

1668: index within $T_Y^\delta$, which requires about

1669: $nH(Y)$ bits. On the other hand,

1670: the converse theorem tells us that the compressed

1671: representation of $Y^n$ cannot be much shorter

1672: than $n[\lambda R_U(D')+I(X;Y,V|K)+I(K;Y)]$ bits

1673: (cf.\ the necessity of condition

1674: (d) of Theorem 4). Thus, contradiction

1675: between these two facts is avoided

1676: only if

1677: \begin{equation}

1678: \label{inherent1}

1679: \lambda R_U(D')+I(X;Y,V|K)+I(K;Y)\le H(Y),

1680: \end{equation}

1681: or, equivalently,

1682: \begin{equation}

1683: \label{inherent2}

1684: \lambda R_U(D')+I(X;Y,V|K)\le H(Y|K).

1685: \end{equation}

1686: This means that any achievable point $(D,D',R_c,R_c',h,h')$

1687: corresponds to a choice of random variables $(K,X,Y,V)$ that must

1688: inherently satisfy eq.\ (\ref{inherent2}).

1689: This observation will now help us also in estimating the equivocation levels.

1690:

1691: Consider first the equivocation w.r.t.\ the reproduction,

1692: for which we have the

1693: following chain of inequalities:

1694: \begin{eqnarray}

1695: Nh'&\le&nH(K|Y)\label{cc}\\

1696: &=&nH(K)-nI(K;Y)\nonumber\\

1697: &=&H(K^n)-nI(K;Y)\label{dd}\\

1698: &=&H(K^n|Y^n,Z^n)+I(K^n;Y^n,Z^n)-nI(K;Y)\nonumber\\

1699: &=&H(K^n|Y^n,Z^n)+I(K^n;Y^n)-nI(K;Y)\label{aa}\\

1700: &=&H(K^n|Y^n,Z^n)+H(Y^n)-H(Y^n|K^n)-nI(K;Y)\nonumber\\

1701: &\le&H(K^n|Y^n,Z^n)+n[\lambda R_U(D')+I(X;Y,V|K)+I(K;Y)+\epsilon]-\nonumber\\

1702: & &-n[\lambda R_U(D'+\epsilon)+I(X;Y,V|K)-\epsilon]-nI(K;Y)\label{bb}\\

1703: &=&H(K^n|Y^n,Z^n)+n\lambda[R_U(D')-R_U(D'+\epsilon)]+n\epsilon\nonumber\\

1704: &\dfn&H(K^n|Y^n,Z^n)+n\epsilon'\nonumber\\

1705: &=&I(K^n;\hU^N|Y^n,Z^n)+H(K^n|Y^n,Z^n,\hU^N)+n\epsilon'\nonumber\\

1706: &\le&H(\hU^N|Y^n,Z^n)+H(K^n|Y^n,Z^n,\hU^N)+n\epsilon'\label{last}

1707: \end{eqnarray}

1708: where (\ref{cc}) is based on condition (b),

1709: (\ref{dd}) is due to the memorylessness of $K^n$,

1710: (\ref{aa}) follows from the fact that $K^n\to Y^n\to Z^n$ is a Markov

1711: chain, (\ref{bb}) is due to the

1712: sufficiency of condition (d) (that we have just proved)

1713: and the necessity of condition (e),

1714: and $\epsilon'$ vanishes as $\epsilon\to 0$ due to the

1715: continuity of $R_U(\cdot)$.

1716: Comparing the left--most side and the right--most side of the

1717: above chain of inequalities, we see that

1718: to prove that $H(\hU^N|Y^n,Z^n)$ is essentially

1719: at least as large as $Nh'$, it remains to show

1720: that $H(K^n|Y^n,Z^n,\hU^N)$ is small, say,

1721: \begin{equation}

1722: \label{small}

1723: H(K^n|Y^n,Z^n,\hU^N)\le

1724: n\epsilon'

1725: \end{equation}

1726: for large $n$. We next focus then on the proof of eq.\ (\ref{small}).

1727:

1728: First, consider the following chain of inequalities:

1729: \begin{eqnarray}

1730: \label{equivbound}

1731: H(K^n|Y^n,Z^n,\hat{U}^N)&\le&

1732: H(K^n,S^J(K^n)|Y^n,Z^n,\hat{U}^N)\nonumber\\

1733: &=&H(S^J(K^n)|Y^n,Z^n,\hat{U}^N)+

1734: H(K^n|S^J(K^n),Y^n,Z^n,\hat{U}^N)\nonumber\\

1735: &\le&H(S^J(K^n)|Y^n,\hat{U}^N,W^L)+

1736: H(K^n|S^J(K^n),Y^n),

1737: \end{eqnarray}

1738: where the second inequality follows from the fact that $W^L$ is function of $\hat{U}^N$

1739: and the fact that conditioning reduces entropy.

1740: As for the second term of the right--most side, we have by Fano's inequality

1741: \begin{equation}

1742: H(K^n|S^J(K^n),Y^n)\le 1+P_{\mbox{err}}\cdot n\log|\calK|\le

1743: n\epsilon'/2 ~~~\mbox{for large enough $n$},

1744: \end{equation}

1745: as $P_{\mbox{err}}\to 0$ is the probability of error associated with the

1746: Slepian--Wolf decoder that estimates $K^n$ from its compressed version, $S^J(K^n)$, and the

1747: ``side information,'' $Y^n$.

1748: As for the first term of the right--most side of (\ref{equivbound}), we have

1749: \begin{eqnarray}

1750: H(S^J(K^n)|Y^n,\hat{U}^N,W^L)&=&H(W^L\oplus\tilde{W}^L|Y^n,\hat{U}^N,W^L)\nonumber\\

1751: &\le& H(\tilde{W}^L|Y^n).

1752: \end{eqnarray}

1753: It remains to show that $H(\tilde{W}^L|Y^n)\le n\epsilon'/2$ as well. In order to show this, we

1754: have to demonstrate that for a good code, once $Y^n$ is given, there is very little uncertainty

1755: with regard to $\tilde{W}^L$, which is the index of the bin.

1756:

1757: To this end, let us

1758: suppose that the inequality in

1759: (\ref{inherent2}) is strict (otherwise, we can slightly increase

1760: the allowable distortion level $D'$ and thus reduce $R_U(D')$).

1761: As we prove in the Appendix, for any given (arbitrarily small) $\gamma > 0$,

1762: \begin{equation}

1763: \label{doubleexp}

1764: \mbox{Pr}\{\exists~y^n~\mbox{in the code of $\hat{k}^n$ that appears

1765: in more than $2^{n\gamma}$ bins}\}

1766: \le |\calY|^n2^{-(n\gamma-\log e)2^{n\gamma}},

1767: \end{equation}

1768: that is, a double--exponential decay. The probability of the union of these events across all

1769: representatives $\{\hat{k}^n\}$ of all $T_{Q_K}^0\subset T_K^\delta$ will just be multiplied by

1770: the number of $\{T_{Q_K}^0\}$ in $T_K^\delta$, which is polynomial, and hence will continue to

1771: decay double--exponentially.

1772: Let us define then the event

1773: $$\{\exists~y^n~\mbox{in the stego--codebook of some $\hat{k}^n$ that appears in

1774: more than $2^{n\gamma}$ bins}\}$$

1775: as yet another

1776: error event (like the error events 1--5) that occurs with very small probability. Assume then, that

1777: the randomly selected codebook is ``good'' in the sense that

1778: no stegovector appears in more than $2^{n\gamma}$ bins, for

1779: any of the representatives $\{\hat{k}^n\}$.

1780: Now, given $y^n$, how many candidate bins

1781: (corresponding to encrypted messages $\{\tilde{w}^L\}$) can be expected at most?

1782: For a given $y^n$, let us confine attention to

1783: the $\delta$--conditional type class $T_{K|Y}^\delta(y^n)$ (key sequences outside this set

1784: cannot have $y^n$ in their codebooks, as they are not jointly $\delta$--typical with $y^n$).

1785: The conditional $\delta$--type class $T_{K|Y}^\delta(y^n)$ can be partitioned into

1786: conditional $0$--type classes $\{T_{Q_{K|Y}}^0(y^n)\}$, where $Q_{K|Y}$ exhausts the allowed

1787: $\delta$--tolerance in the conditional distribution around $P_{K|Y}$, in the same spirit

1788: as before. Now, take an arbitrary representative $\tilde{k}^n$ from a given $T_{Q_{K|Y}}^0(y^n)$,

1789: and consider the set of all permutations that lead from $\tilde{k}^n$ to all other

1790: members $\{k^n\}$ of $T_{Q_{K|Y}}^0(y^n)$. Obviously, the stego--codebooks of all those

1791: $\{k^n\}$ have exactly the same configuration

1792: of occurrences of $y^n$ as that of $\tilde{k}^n$ (since these permutations leave $y^n$ unaltered),

1793: therefore they belong to exactly the same bins as in the codebook of $\tilde{k}^n$, the

1794: number of which is at most $2^{\gamma n}$, by the hypothesis that we are using a good code.

1795: In other words, as $k^n$ scans $T_{Q_{K|Y}}^0(y^n)$, there will be no new bins that

1796: contain $y^n$ relative to those that are already in the codebook of $\tilde{k}^n$.

1797: New bins that contain $y^n$ can be seen then only by scanning

1798: the other conditional $0$--types $\{T_{Q_{K|Y}}^0(y^n)\}$

1799: within $T_{K|Y}^\delta(y^n)$, but the number such conditional $0$--types does not exceed

1800: the total number of conditional $0$--types, which is upper bounded, in turn, by

1801: $(n+1)^{|\calK|\cdot|\calY|}$ \cite{CK81}. Thus, the totality of stego--codebooks, for all

1802: relevant $\{k^n\}$ cannot give more than $(n+1)^{|\calK|\cdot|\calY|}\cdot 2^{n\gamma}$

1803: distinct bins

1804: altogether. In other words, for a good codebook:

1805: \begin{equation}

1806: \label{zzz}

1807: H(\tilde{W}^L|Y^n)\le \log[(n+1)^{|\calK|\cdot|\calY|}\cdot 2^{n\gamma}]=

1808: n\left[\gamma+|\calK|\cdot|\calY|\cdot\frac{\log(n+1)}{n}\right]

1809: \end{equation}

1810: which is less than $n\epsilon'/2$ for an appropriate choice of $\gamma$ and for large enough $n$.

1811:

1812: Finally, for the equivocation w.r.t.\

1813: the original message source, we have

1814: the following:

1815: \begin{eqnarray}

1816: H(U^N|Y^n,Z^n)&=&

1817: H(\hat{U}^N|Y^n,Z^n)+H(U^N|Y^n,Z^n)-H(\hat{U}^N|Y^n,Z^n)\nonumber\\

1818: &\ge&nH(K|Y)-2n\epsilon'+H(U^N|Y^n,Z^n)-H(\hat{U}^N|Y^n,Z^n)\nonumber\\

1819: &=&nH(K|Y)+H(U^N)-I(U^N;\hat{U}^N)-I(U^N;Y^n,Z^n)-\nonumber\\

1820: & &H(\hat{U}^N|U^N)+I(\hat{U}^N;Y^n,Z^n)-2n\epsilon'\nonumber\\

1821: &\ge&nH(K|Y)+H(U^N)-H(\hat{U}^N)-I(U^N;Y^n,Z^n)-\nonumber\\

1822: & &H(\hat{U}^N|U^N)+I(\hat{U}^N;Y^n,Z^n)-2n\epsilon'\nonumber\\

1823: &\ge&nH(K|Y)+NH(U)-NR_U(D')-2\epsilon']-\nonumber\\

1824: &&[I(U^N;Y^n,Z^n)+

1825: H(\hat{U}^N|U^N)-I(\hat{U}^N;Y^n,Z^n)],

1826: \end{eqnarray}

1827: where first inequality is due to the fact that

1828: $H(\hU^N|Y^n,Z^n)\ge n[H(K|Y)-2\epsilon']$, that we have just shown,

1829: and the third

1830: is due to the memorylessness of $\{U_i\}$ and the fact

1831: that the rate--distortion codebook

1832: size is $2^{NR_U(D')}$ and so, $H(\hat{U}^N)\le NR_U(D')$.

1833: Now, the second bracketed expression on the right--most side is the

1834: same as in eq.\ (\ref{bra}), where in the case of this specific scheme,

1835: both inequalities in (\ref{bra}) become equalities, i.e., this expression

1836: vanishes. This is because in our scheme, $U^N\to\hat{U}^N\to (Y^n,Z^n)$

1837: is a Markov chain (and so, the first inequality of (\ref{bra}) is tight) and

1838: because $H(\hat{U}^N|U^N,Y^n,Z^n)\le

1839: H(\hat{U}^N|U^N)=0$ (as $\hat{U}^N$ is a deterministic

1840: function of $U^N$), which makes the second inequality of (\ref{bra}) tight.

1841: As a result, we have

1842: \begin{eqnarray}

1843: H(U^N|Y^n,Z^n)&\ge&N[H(K|Y)/\lambda+H(U)-R_U(D')-2\epsilon'/\lambda]\nonumber\\

1844: &\ge&N[h+R_U(D')-H(U)+H(U)-R_U(D')-2\epsilon/\lambda]\nonumber\\

1845: &=&N(h-2\epsilon'/\lambda),

1846: \end{eqnarray}

1847: where we have used condition (a).

1848: This completes the proof of the direct part.

1849:

1850: \section*{Acknowledgements}

1851: The author would like to thank Dr.\ Yossi

1852: Steinberg for interesting discussions.

1853: Useful comments made by the anonymous referees

1854: are acknowledged with thanks.

1855:

1856: \section*{Appendix}

1857: \renewcommand{\theequation}{A.\arabic{equation}}

1858:     \setcounter{equation}{0}

1859:

1860: \noindent

1861: {\it Proof of eq.\ (\ref{doubleexp})}.

1862: The probability of obtaining $y^n$ in a single

1863: random selection within the codebook of $\hat{k}^n$ is given by

1864: \begin{eqnarray}

1865: \label{ff}

1866: \mbox{Pr}\{Y^n(j',V^n(m,j,\hat{k}^n),\hat{k}^n)=y^n\}&=&

1867: \frac{|T_{V|KY}^\delta(k^n,y^n)|}{|T_{V|K}^\delta(k^n)|}\cdot

1868: \frac{1}{|T_{Y|KV}^\delta(k^n,v^n)|}\label{tt}\\

1869: &\le&\frac{2^{n(1+\delta)H(V|K,Y)}}{2^{n[(1-\delta)H(V|K)-\delta]}}

1870: \cdot\frac{1}{2^{n[(1-\delta)H(Y|K,V)-\delta]}}\nonumber\\

1871: &=& 2^{-n[H(Y|K)-\delta'']},

1872: \end{eqnarray}

1873: where the first factor in the right--hand side of (\ref{tt})

1874: is the probability

1875: of having a $V^n(m,j,\hat{k}^n)=v^n$ that is typical with

1876: $y^n$ and $\hat{k}^n$ (a necessary condition for this $v^n$

1877: to generate the given $y^n$), the second factor

1878: is the probability of

1879: selecting a given $y^n$ in the random

1880: selection of the steogtext code, and where

1881: \begin{equation}

1882: \delta''=\delta[H(V|K,Y)+H(V|K)+H(Y|K,V)+2].

1883: \end{equation}

1884: It now follows that the probability $q$ for at least one

1885: occurrence of $y^n$ among the stegowords corresponding to

1886: a certain bin, in the codebook of $\hat{k}^n$,

1887: is upper bounded (using the union bound) by

1888: \begin{eqnarray}

1889: q&\le&M_2\cdot M_3\cdot

1890: 2^{-n[H(Y|K)-\delta'']}\nonumber\\

1891: &=&2^{-n[H(Y|K)-I(X;V|K)-I(X;Y|V,K)-

1892: \delta''-2\delta-\epsilon_1-\epsilon_2]}\nonumber\\

1893: &=&2^{-n[H(Y|K)-I(X;V,Y|K)-

1894: \delta''-2\delta-\epsilon_1-\epsilon_2]}\nonumber\\

1895: &\dfn&2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]}.

1896: \end{eqnarray}

1897: We are interested to upper bound the probability that a given $y^n$

1898: appears as a stegoword in more than $2^{n\gamma}$ bins

1899: in the codebook of $\hat{k}^n$, for a given

1900: $\gamma > 0$. For $i=1,\ldots,M_U$, let $A_i\in\{0,1\}$ be the

1901: indicator function of the event

1902: $$\{y^n \mbox{appears as a stegoword in bin no.}~i~\mbox{at least once}\}.$$

1903: Then, clearly $\{A_i\}$ are i.i.d.\ with $\mbox{Pr}\{A_i=1\}=q$.

1904: Therefore,

1905: \begin{eqnarray}

1906: \mbox{Pr}\left\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\right\}

1907: &\le& \exp_2\left\{-M_UD\left(\frac{2^{n\gamma}}{M_U}\|q\right)\right\}\nonumber\\

1908: &=& \exp_2\left\{-M_UD\left(2^{-n[\lambda R_U(D')-\gamma]}\|q\right)\right\},

1909: \end{eqnarray}

1910: where for $\alpha,\beta\in[0,1]$, the function $D(\alpha\|\beta)$

1911: designates the binary divergence

1912: \begin{equation}

1913: D(\alpha\|\beta)=\alpha\log\frac{\alpha}{\beta}+

1914: (1-\alpha)\log\frac{1-\alpha}{1-\beta}.

1915: \end{equation}

1916: Now, referring to eq.\ (\ref{inherent2}), suppose that

1917: \begin{equation}

1918: H(Y|K)\ge\lambda R_U(D')+I(X;V,Y|K)+\delta_1+2\gamma.

1919: \end{equation}

1920: Then, clearly,

1921: \begin{equation}

1922: 2^{-n[\lambda R_U(D')-\gamma]} > 2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]} \ge q

1923: \end{equation}

1924: and so, $\mbox{Pr}\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\}$ is further upper bounded by

1925: \begin{equation}

1926: \mbox{Pr}\left\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\right\}\le

1927: \exp_2\left\{-M_UD\left(2^{-n[\lambda R_U(D')-\gamma]}\|2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]}\right)\right\}.

1928: \end{equation}

1929: To further bound this expression from above, we have to get a lower bound to

1930: an expression of the form $D(e^{-na}\|e^{-nb})$ for $0< a < b$. Applying the

1931: inequality $\log(1+x)=-\log(1-\frac{x}{1+x})\ge \frac{x\log e}{1+x}$,  for $x > -1$, we have:

1932: \begin{eqnarray}

1933: D(2^{-na}\|2^{-nb})&=& 2^{-na}\log\frac{2^{-na}}{2^{-nb}}+

1934: (1- 2^{-na})\log\frac{1-2^{-na}}{1-2^{-nb}}\nonumber\\

1935: &=&n(b-a)2^{-na}+(1- 2^{-na})\log\left(1+\frac{2^{-nb}-2^{-na}}{1-2^{-nb}}\right)\nonumber\\

1936: &\ge&n(b-a)2^{-na}+(2^{-nb}-2^{-na})\log e\nonumber\\

1937: &\ge&[n(b-a)-\log e]2^{-na}.

1938: \end{eqnarray}

1939: Applying this inequality with $a=\lambda R_U(D')-\gamma$ and $b=H(Y|K)-I(X;Y,V|K)-\delta_1$,

1940: we get

1941: \begin{equation}

1942: D\left(2^{-n[\lambda R_U(D')-\gamma]}\|2^{-n[H(Y|K)-I(X;Y,V|K)-\delta_1]}\right)\ge

1943: (n\gamma-\log e)2^{-n[\lambda R_U(D')-\gamma]}

1944: \end{equation}

1945: and so,

1946: \begin{equation}

1947: \mbox{Pr}\left\{\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}\right\}\le 2^{-(n\gamma-\log e)2^{n\gamma}},

1948: \end{equation}

1949: which decays double--exponentially rapidly with $n$. While, this inequality holds for

1950: a {\it given} $y^n$, the probability that $\sum_{i=1}^{M_U}A_i \ge 2^{n\gamma}$ for {\it some}

1951: $y^n\in\calY^n$ would be upper bounded, using the union bound, by

1952: $|\calY|^n\cdot 2^{-(n\gamma-\log e)2^{n\gamma}}$, which still decays double--exponentially.

1953: Thus, with very high probability the random selection of stegovectors, for $\hat{k}^n$,

1954: is such that no stego codevector $y^n$ appears in more than $2^{n\gamma}$ bins.

1955:

1956:

1957: \begin{thebibliography}{AA}

1958: \bibitem{AKS02}

1959: A.~Adelsbach, S.~Katzenbeisser,

1960: and A.-R.~ Sadeghi, ``Cryptography meets watermarking:

1961: detecting watermarks with minimal or

1962: zero knowledge disclosure,'' preprint 2002.

1963: Available on--line at

1964: [www-krypt.cs.uni-sb.de/download/papers]

1965: \bibitem{CC02}

1966: S.~C.~Cheung and D.~K.~W.~Chiu, ``A watermark infrastructure for enterprise

1967: document management,'' {\it Proc.\ 36th Hawaii International Conference

1968: on System Sciences (HICSS`03)}, Hawaii, 2003.

1969: \bibitem{CT91}

1970: T.~M.~Cover and J.~A.~Thomas,

1971: {\it Elements of Information Theory}, Wiley, New York, 1991.

1972: \bibitem{CK81}

1973: I.~Csisz\'ar and J.~K\"orner, {\it Information Theory: Coding Theorems

1974: for Discrete Memoryless Systems}, Academic Press, 1981.

1975: \bibitem{GP80}

1976: S.~I.~Gel'fand and M.~S.~Pinsker, ``Coding for channel with random % CC-034

1977: parameters,'' {\it Problems of Information and Control}, vol.\ 9, no.\ 1, pp. 19-31, 1980.

1978: \bibitem{JML00}

1979: A.~Jayawardena, B.~Murison, and P.~Lenders, ``Embedding multiresolution binary

1980: images into multiresolution watermark channels in wavelet domain,'' preprint

1981: 2000. Available on--line at

1982: [www.tsi.enst.fr/$\sim$maitre/tatouage/icassp00/articles].

1983: \bibitem{KNSTN02}

1984: K.~Kuroda, M.~Nishigaki, M.~Soga, A.~Takubo, and I.~Nakamura, ``A digital

1985: watermark using public--key cryptography for open algorithm,''

1986: {\it Proc.\ ICITA 2002}. Also, available on--line at

1987: [http://charybdis.mit.csu.edu.au/$\sim$mantolov/CD/ICITA2002/papers/131-21.pdf].

1988: \bibitem{MM03}

1989: A.~Maor and N.~Merhav,

1990: ``On joint information embedding and lossy compression,''

1991: submitted to {\it IEEE Trans.\ Inform.\ Theory}, July 2003.

1992: Available on--line at [www.ee.technion.ac.il/people/merhav].

1993: \bibitem{MM04}

1994: A.~Maor and N.~Merhav,

1995: ``On joint information embedding and lossy compression

1996: in the presence of a stationary memoryless attack channel,''

1997: submitted to {\it IEEE Trans.\ Inform.\ Theory}, January 2004.

1998: Available on--line at [www.ee.technion.ac.il/people/merhav].

1999: \bibitem{MS03}

2000: N.~Merhav and S.~Shamai (Shitz), ``On joint source--channel

2001: coding for the Wyner--Ziv source and the Gel'fand--Pinsker channel,''

2002: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ 49, no.\ 11, pp.\ 2844--2855, November 2003.

2003: \bibitem{MO03}

2004: P.~Moulin and J.~A.~O'Sullivan, ``Information--theoretic analysis of information hiding,''

2005: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ 49, no. 3, pp.\ 563--593, March 2003.

2006: \bibitem{MW04}

2007: P.~Moulin and Y.~Wang, ``New results on steganographic capacity,'' %WM-088

2008: {\it Proc.\ CISS 2004},

2009: pp.\ 813--818, Princeton University, March 2004.

2010: \bibitem{SW73}

2011: D.~Slepian and J.~K.~Wolf, ``Noiseless coding of correlated information sources,''

2012: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ IT--19, pp.\ 471--480, 1973.

2013: \bibitem{SIA99}

2014: M.~Steinder, S.~Iren, and P.~D.~Amer,

2015: ``Progressively authenticated image transmission,''

2016: preprint 1999. Available on--line at

2017: [www.cis.udel.edu~/amer/PEL/poc/pdf/milcom99-steiner.pdf].

2018: \bibitem{Yamamoto97}

2019: H.~Yamamoto, ``Rate--distortion theory for the Shannon

2020: cipher system,''

2021: {\it IEEE Trans.\ Inform.\ Theory}, vol.\ 43, no.\ 3, pp.\ 827--835, May 1997.

2022: \end{thebibliography}

2023: \newpage

2024: \begin{figure}[h]

2025: \hspace*{-2cm}\input{p90fig1.pstex_t}

2026: \caption{A generic watermarking/encryption system.}

2027: \label{gen}

2028: \end{figure}

2029:

2030: \begin{figure}[h]

2031: \hspace*{-2cm}\input{p90fig2.pstex_t}

2032: \caption{The proposed watermarking/encryption scheme (general case).}

2033: \label{dir}

2034: \end{figure}

2035: \end{document}

2036: