0504:cs0504020/cs0504020

1: %PHVA.tex (2/21/05)

2: %

3: \documentclass[11pt]{article}

4: \usepackage{amstext,amssymb}

5: \usepackage{latexsym,epsfig}

6:

7: %--------------- Various Style Declarations ----------------------------

8:

9: \textheight         9.00in

10: \textwidth          6.30in

11: \oddsidemargin      0.00in

12: \evensidemargin     0.00in

13: \topmargin         -0.75in

14: \topskip            0.50in

15: \footskip           0.50in

16:

17: \parskip               4pt

18: \parindent             8pt

19: \renewcommand{\arraystretch}{1.2}

20:

21: %%% For equation numbering   depth to only sections use

22: \renewcommand{\theequation}{\thesection.\arabic{equation}}

23:

24: %%% For numbering of theorems, definitions, lemmas, etc --

25: %%% if you only want section numbering, use

26:

27:  \newtheorem{definition}{Definition}[section]

28:  \newtheorem{example}{Example}[section]

29:  \newtheorem{theorem}{Theorem}[section]

30:  \newtheorem{lemma}[theorem]{Lemma}

31:  \newtheorem{remark}{Remark}[section]

32:  \newtheorem{proposition}{Proposition}[section]

33:  \newtheorem{corollary}[theorem]{Corollary}

34:  \newtheorem{problem}{Problem}[section]

35:  \newtheorem{conversion}{Conversion}[section]

36:

37: %% various definitions

38:

39: \newcommand{\Z}{{\mathbb{Z}}}

40: \newcommand{\F}{{\mathbb{F}}}

41: \newcommand{\R}{{\mathbb{R}}}

42: \newcommand{\C}{{\mathbb{\CC}}}

43: \newcommand{\K}{{\mathbb{K}}}

44: \newcommand{\A}{{\mathcal{A}}}

45: \newcommand{\B}{{\mathcal{B}}}

46: \newcommand{\Bf}{{\mathfrak{B}}}

47: \newcommand{\CC}{{\mathcal{C}}}

48: \newcommand{\D}{{\mathcal{D}}}

49: \newcommand{\E}{{\mathsf{E}}}

50: \newcommand{\EE}{{\mathcal{E}}}

51: \newcommand{\FF}{{\mathcal{F}}}

52: \newcommand{\HH}{{\mathcal{H}}}

53: \newcommand{\I}{{\mathcal{I}}}

54: \newcommand{\J}{{\mathcal{J}}}

55: \newcommand{\KK}{{\mathcal{K}}}

56: \renewcommand{\L}{{\mathcal{L}}}

57: \newcommand{\N}{{\mathcal{N}}}

58: \renewcommand{\P}{{\mathcal{P}}}

59: \newcommand{\Q}{Q^{\sqrt{}}}

60: \newcommand{\PP}{{\mathcal{P}}}

61: \newcommand{\RR}{{\mathcal{R}}}

62: \renewcommand{\S}{{\mathcal{S}}}

63: \newcommand{\SSS}{{\mathcal{S}}}

64: \newcommand{\T}{{\mathcal{T}}}

65: \newcommand{\U}{{\mathcal{U}}}

66: \newcommand{\V}{{\mathcal{V}}}

67: \newcommand{\W}{{\mathcal{W}}}

68: \newcommand{\X}{{\mathcal{X}}}

69: \newcommand{\Y}{{\mathcal{Y}}}

70: \newcommand{\Ah}{\A\hat{\ }}

71: \newcommand{\Gh}{G\hat{\ }}

72: \newcommand{\Ghh}{G\hat{\ }\hat{\ }}

73: \newcommand{\Hh}{H\hat{\ }}

74: \newcommand{\Uh}{U\hat{\ }}

75: \newcommand{\Vh}{V\hat{\ }}

76: \newcommand{\Sh}{S\hat{\ }}

77: \newcommand{\ab}{\mathbf a}

78: \newcommand{\Ab}{\mathbf A}

79: \newcommand{\bb}{\mathbf b}

80: \newcommand{\cb}{\mathbf c}

81: \newcommand{\db}{\mathbf d}

82: \newcommand{\eb}{\mathbf e}

83: \newcommand{\fb}{\mathbf f}

84: \newcommand{\gb}{\mathbf g}

85: \newcommand{\hb}{\mathbf h}

86: \newcommand{\ib}{\mathbf i}

87: \newcommand{\mb}{\mathbf m}

88: \newcommand{\nb}{\mathbf n}

89: \newcommand{\pb}{\mathbf p}

90: \newcommand{\qb}{\mathbf q}

91: \newcommand{\rb}{\mathbf r}

92: \renewcommand{\sb}{\mathbf s}

93: \newcommand{\tb}{\mathbf t}

94: \newcommand{\ub}{\mathbf u}

95: \newcommand{\vb}{\mathbf v}

96: \newcommand{\wb}{\mathbf w}

97: \newcommand{\xb}{\mathbf x}

98: \newcommand{\yb}{\mathbf y}

99: \newcommand{\zb}{\mathbf z}

100: \newcommand{\zerob}{\mathbf 0}

101: \newcommand{\oneb}{\mathbf 1}

102:

103: \newcommand{\sigmab}{\mbox{\boldmath$\sigma$}}

104: \newcommand{\bsig}{\boldmath $\sigma$\unboldmath}

105: \newcommand{\bomeg}{\boldmath $\omega$\unboldmath}

106:

107: \newcommand{\ie}{{\em i.e., }}

108: \newcommand{\eg}{{\em e.g., }}

109: \newcommand{\cf}{\emph{cf.\ }}

110: \newcommand{\etal}{\emph{et al.\ }}

111:

112: \newcommand{\inner}[2]{\langle{#1},{#2}\rangle}

113: \newcommand{\Proof}{\hspace*{0pt}{\em Proof}}

114: \newcommand{\propeq}{\equiv_\alpha}

115: \newcommand{\mapform}[3]{{#1}\colon~{#2} \rightarrow {#3}}

116: \newcommand{\flr}[1]{\left\lfloor{#1}\right\rfloor}

117: \newcommand{\ceil}[1]{\left\lceil{#1}\right\rceil}

118: \newcommand{\lra}{\leftrightarrow}

119: %\newcommand{\qed}{\hfill\rule{8pt}{8pt}} %(text mode only)

120: \newcommand{\qed}{\hspace*{1cm}\hspace*{\fill}\openbox}

121: \newcommand{\half}{\frac{1}{2}}

122: \newcommand{\dint}{\int\!\!\!\int}

123:

124: \newcommand{\df}{\textbf}

125: \newcommand{\eqr}[1]{(\ref{#1})}

126:

127: \newcommand{\mod}{\mathrm{~mod~}}  %(math mode only)

128: \newcommand{\im}{\mathrm{im~}}  %(math mode only)

129: \newcommand{\rect}{\mathrm{rect}}  %(math mode only)

130: \newcommand{\sinc}{\mathrm{sinc}}  %(math mode only)

131: \newcommand{\rank}{\mathrm{rank}}  %(math mode only)

132: \newcommand{\argmin}{\mathrm{arg\,min}}  %(math mode only)

133: \newcommand{\eps}{\varepsilon} %(math mode only)

134: \newcommand{\remove}[1]{}

135:

136: \newcommand{\h}{\hat{\ }}

137: \newcommand{\cl}{^\mathrm{cl}}

138:

139: %%% AMS proof environment

140: \newcommand{\openbox}{\leavevmode

141:   \hbox to.77778em{%

142:   \hfil\vrule

143:   \vbox to.675em{\hrule width.6em\vfil\hrule}%

144:   \vrule\hfil}}

145: \newcommand{\proofname}{Proof}

146: \newenvironment{proof}[1][\proofname]{\par\normalfont

147:  \trivlist\item[\hskip\labelsep\itshape #1:]\ignorespaces

148: }{\hspace*{1cm}\hspace*{\fill}\openbox \medskip\endtrivlist}

149: %%% End of AMS proof environment

150:

151:

152: %************************************************************************

153: %                                                                       *

154: %            End of preamble and beginning of text.                     *

155: %                                                                       *

156: %************************************************************************

157:

158:

159: \begin{document}

160: \renewcommand{\textfraction}{0}

161:

162: \title{The Viterbi Algorithm:  A Personal History}

163:  \author{\normalsize

164: G. David Forney, Jr.

165:  \\[-5pt]

166: \small MIT \\[-5pt]�

167: \small Cambridge, MA 02139 USA \\[-5pt] \small

168: \texttt{forneyd@comcast.net} }

169: \date{}

170: \maketitle

171: \thispagestyle{empty}

172: \begin{abstract}

173: The story of the Viterbi algorithm (VA) is told from a personal

174: perspective. Applications both within and beyond communications are

175: discussed.

176: In brief summary, the VA has proved to be an extremely important algorithm

177: in a surprising variety of fields.

178: \end{abstract}�

179: \normalsize

180:

181: %{\bf Index terms:}

182:

183: %\pagebreak

184: \section{Introduction}

185:

186: Andrew J. Viterbi is rightly celebrated as one of the leading

187: communications engineers and theorists of the twentieth century.  He has

188: received almost every professional award possible, including election not

189: only to the National Academy of Engineering (USA) but also to the National

190: Academy of Sciences (USA), where he chairs the Computer

191: Science section.  His award citations usually cite ``invention of the

192: Viterbi algorithm" as his most notable accomplishment.

193:

194: On the other hand, Andy would be the first to tell you that other people

195: deserve much of the credit for recognizing its theoretical properties and

196: its practical attractiveness, and for extending its domain of

197: application.  He has often told this story himself (see, \eg  \cite{V90}).

198:

199: Nevertheless, no one doubts that Andy's awards are entirely deserved, and

200: that their focus on the Viterbi algorithm (VA) is

201: appropriate.  This article will attempt to explain why, by briefly

202: recounting the history of the VA.  It is a ``personal history," because

203: the story of the VA is so intertwined with my own history that I can

204: recount much of it from a personal perspective.

205:

206: %\pagebreak

207: \section{Invention of the Viterbi algorithm}

208:

209: The Viterbi algorithm was first presented in Andy's famous 1967 paper

210: \cite{V67a} to help prove an asymptotically optimum upper bound

211: on the error probability of convolutional codes, which had previously been

212: derived by Yudkin in the context of sequential decoding \cite{Y64}.  In

213: this paper, the VA is presented just as we understand it today.  This paper

214: introduces the important concept of \emph{survivors} (a term possibly

215: borrowed from tennis elimination tournaments), and shows that

216: only $q^K$ survivors need be retained to decode a convolutional code with

217: constraint length $K$ over the

218: $q$-ary field $GF(q)$.  Compared to a block

219: code with $q^K$ codewords, such a convolutional code is shown to have a

220: much better error exponent, particularly near

221: capacity.

222:

223: Andy recalls in a 1999 interview \cite{M99} that

224:

225: \begin{quote}

226:  ``the Viterbi algorithm

227: for convolutional codes \ldots came out of my teaching \ldots.  I found

228: information theory difficult to teach, so I started developing some tools.

229: \ldots  I wrote the first paper in March `66, but it wasn't published

230: until April `67. \ldots  At one point I was actually discouraged from

231: publishing the algorithm details.  Fortunately, one of the reviewers, Jim

232: Massey, encouraged me to include the algorithm. \ldots  Nobody thought

233: that it had any potential for practical value \ldots"

234: \end{quote}

235:

236: It is clear from the paper that at this point Andy had no idea that the VA

237: was actually an optimum (maximum likelihood) decoder, nor that it was

238: potentially practical.  Indeed, the paper states that ``this decoding

239: algorithm is clearly suboptimal," and concludes: ``Although this algorithm

240: is rendered impractical by the excessive storage requirements, it

241: contributes to a general understanding of convolutional codes and

242: sequential decoding through its simplicity of mechanization and analysis"

243: \cite{V67a}.

244:

245: %A second 1967 paper \cite{V67b} discussed the application of the Viterbi

246: %algorithm to orthogonal convolutional codes, whose analysis is

247: %particularly nice.

248:

249: %\pagebreak

250: \section{Discovery that the VA is optimum}

251:

252: I believe that I received a copy of Andy's paper prior to publication,

253: probably via Jim Massey.  At that time I was working at Codex Corp., a

254: small start-up company aiming at practical applications of convolutional

255: codes.  Our primary focus was initially on threshold decoding, which was

256: the subject of Jim's doctoral thesis \cite{M63};  Jim was a

257: consultant.  Subsequently, we developed a sequential decoding system

258: \cite{WR61} for the Pioneer deep-space satellite program, which became the

259: first code in space \cite{CHIW98}.

260:

261: I had been trying to understand why in practice convolutional codes were

262: generally superior to block codes, so I studied Andy's paper

263: with great interest.  I realized that the path-merging property of

264: convolutional codes could be depicted in what I called a \emph{trellis

265: diagram}, to contrast with the then-conventional tree diagram used in the

266: analysis of sequential decoding.  It was then only a small step to see that

267: the Viterbi algorithm was an exact recursive algorithm for finding the

268: shortest path through a trellis, and thus was actually an optimum trellis

269: decoder.  I believe that at that point I called Andy, and told him that he

270: had been too modest when he asserted that the VA was ``asymptotically

271: optimum."

272:

273: These results were written up in a 1967 technical report \cite{F67}

274: for NASA Ames Research Center.  They were not published in journal form

275: until many years later, in \cite{F73} and \cite{F74}.

276:

277: Shortly afterward, in a paper submitted in May 1968  \cite{O69},

278: Jim Omura observed that the VA was simply the standard

279: forward dynamic programming solution to maximum-likelihood decoding of a

280: discrete-time, finite-state dynamical system observed in memoryless

281: noise.  Beyond proving optimality in a different way, he thus made the

282: first connection between the VA and system and control theory.  It is

283: interesting to speculate whether the history of the VA would have been

284: different if it had simply been called ``dynamic programming" from the

285: beginning.

286:

287: At this point, none of us had recognized that the VA might be practical.

288: Jim's paper concludes:  ``\ldots the decoding algorithm discussed here

289: grows exponentially in complexity with constraint length $\nu$ and is

290: therefore impractical for large $\nu$ \ldots."  More embarrassingly, in a

291: 1970

292: \textsc{IEEE Spectrum} paper \cite{F70} describing practical coding

293: schemes for the space channel, I wrote:

294:

295: \begin{quote}

296: Sequential decoding [is] the best-performing practical technique known for

297: memoryless channels like the space channel, and will probably be the

298: general-purpose workhorse for these channels in the future \ldots.

299:

300: [The Viterbi algorithm] is competitive in performance with sequential

301: decoding for moderate error rates, but cannot achieve very low error rates

302: efficiently.  On the other hand, it [is] capable of extremely high speeds

303: (tens of megabits), where sequential decoders become uneconomic.  It

304: therefore may find application in high-data-rate systems with modest error

305: requirements, such as digitized television.

306: \end{quote}

307:

308: %\pagebreak

309: \section{Recognition that the VA is practical}

310:

311: Andy has always said that Jerry Heller was the first person to realize

312: that the VA might be practical.  Jerry simulated the performance of

313: short-constraint-length codes at the Jet Propulsion Laboratory (JPL) in

314: 1968-69

315: \cite{H68, H69}, and found that with only a 64-state code he could

316: obtain a sizable coding gain, of the order of 6 dB.

317:

318: In 1968, Andy, Irwin Jacobs, and Len Kleinrock incorporated Linkabit

319: Corp.\ in San Diego as a vehicle to pool their consulting efforts and to

320: obtain small government study contracts.  All kept their jobs as

321: professors.  In 1969, Jerry Heller was hired as Linkabit's first full-time

322: employee.  Linkabit obtained some small Navy and NASA contracts,

323: which enabled the construction of a VA prototype in 1969-70.  ``It was a

324: big monster filling a rack"

325: \cite{M99}.

326:

327: The first IEEE Communication Theory Workshop in 1970 in St.\ Petersburg

328: became famous as the ``coding is dead" workshop, after Ned Weldon and other

329: speakers worried publicly that coding theory had come to a dead end.  But

330: what I remember best from that session is Irwin Jacobs standing up in the

331: back row, flourishing an integrated circuit (a 4-bit shift register, I

332: believe), and asserting that this represented the future of coding.  He

333: was quite right. (Unfortunately, by this time Codex had made a business

334: decision to get out of coding.)

335:

336: By 1971, Linkabit had implemented a 2 Mb/s, 64-state Viterbi decoder.  In

337: a special issue on coding of the \textsc{IEEE Transactions on Communication

338: Technology} in October 1971, Heller and Jacobs

339: \cite{HJ71} discuss this decoder and many practical issues in careful

340: detail.  They compare the VA with sequential decoding, and conclude that

341: the VA will often be preferable because it can use quantized soft decisions

342: easily, and is less sensitive to channel and equipment variations.  In the

343: same issue, Cohen, Heller and Viterbi \cite{CHV71} describe a system using

344: orthogonal convolutional codes and the VA for asynchronous multiple-access

345: communications, and Viterbi \cite{V71} introduces generating-function

346: analysis techniques for the VA.

347:

348: %\pagebreak

349:

350: During the 1970s, through the leadership of Linkabit and JPL, the VA

351: became part of the coding standard for deep-space communication,

352: ultimately in a concatenated coding system with a Reed-Solomon (RS) outer

353: code.  Linkabit developed a relatively inexpensive and flexible VA chip,

354: and the VA became a nice little business for Linkabit.  It didn't hurt

355: that the inventor of the Viterbi algorithm was a Linkabit founder.  The VA

356: also began to be incorporated in many other communications applications.

357:

358: In the early 1990s, JPL built a $2^{14}$-state ``Big Viterbi Decoder"

359: (BVD) with 8192 parallel add-compare-select (ACS) units, which operated at

360: a rate of the order of 1 Mb/s \cite{C92}.  As far as I know, the BVD

361: remains the biggest Viterbi decoder ever built.

362:

363: When the primary antenna

364: failed to deploy during the Galileo mission in 1992, JPL devised an

365: elaborate concatenated coding scheme involving a $2^{14}$-state rate-1/4

366: inner convolutional code and a set of variable-strength RS outer codes,

367: and reprogrammed it into the spacecraft computers.  This scheme was

368: able to operate within about 2 dB of the Shannon limit at a bit error

369: probability of less than $10^{-6}$, which was the world record prior to

370: the advent of turbo codes \cite{CHIW98}.

371:

372: %\pagebreak

373: \section{The VA and intersymbol interference channels}

374:

375: In the late 1960s, Codex turned its attention to the voiceband

376: modem business.  Our first-generation product

377: was a single-sideband (SSB) 9600 b/s modem with a so-called Class IV or $1-

378: D^2$ ``partial response."  About 1969, I recognized that the symbol

379: correlation that was thus introduced could be exploited by an \emph{ad

380: hoc} error correction algorithm, which was able to improve the noise

381: margin by about 2--3 dB.  This little decoder extended the commercial life

382: of this marginal-performance modem by perhaps a year or two.

383:

384: It took me a while to understand that I had in fact invented a

385: maximum-likelihood sequence detector for this modem.  Over time, I

386: realized that this was nothing more than the Viterbi algorithm again,

387: streamlined for the $1 - D^2$ response.  This led to a 1972 paper

388: \cite{F72} that showed that the VA could be used as a maximum-likelihood

389: sequence detector for digital sequences in the presence of intersymbol

390: interference (ISI) and AWGN noise.

391:

392: Meanwhile, Jim Omura had recognized independently at UCLA that the VA

393: could be used on intersymbol interference channels, because of their

394: convolutional character \cite{O71}.  Indeed, a tantalizing hint in this

395: direction appears in a book review by Andy Viterbi in 1970 \cite{V70}.

396: After visiting UCLA, Hisashi Kobayashi further developed this idea,

397: particularly for practical applications in partial response modems  and

398: magnetic recording \cite{K71a, K71b}.

399:

400: The VA proved to be too complicated for general use as an equalizer on ISI

401: channels.  However, it stimulated many suboptimal approximations, and

402: analysis of its performance gave bounds on the best possible

403: performance of any sequence detector.

404:

405: However, the VA did become standard in the related application of

406: high-density magnetic recording.  In so-called PRML systems

407: (``partial-response equalization with maximum-likelihood sequence

408: detection") \cite{ISW98}, the magnetic recording channel is first equalized

409: to a simple ``partial response" such as $1 - D^2$,

410: and the resulting sequence is then detected by the VA, or by a simplified

411: version thereof, as Kobayashi had envisioned \cite{K71a}.

412: In retrospect, it seems possible that my little SSB modem

413: decoder was the first implementation of such a PRML scheme.

414:

415: %\pagebreak

416: \section{Trellis-coded modulation}

417:

418: After Gottfried Ungerboeck published his invention of trellis-coded

419: modulation in 1982 \cite{U82}, the VA became the workhorse

420: decoder for the next several generations of voiceband modems.

421: Ungerboeck extended trellis coding to multilevel constellations by

422: constructing trellis codes in which each branch of the trellis represents

423: a subset of constellation symbols, rather than a single symbol.  By clever

424: constellation partitioning and attention to distances between subsets, he

425: was able to obtain coding gains in the bandwidth-limited regime

426: comparable to those that can be obtained in the power-limited

427: regime.

428:

429: For example, the V.32 modem (1986) used an 8-state trellis code to

430: obtain a coding gain of about 3.5 dB, while the later V.34 modem (1994)

431:  used 16 to 64-state trellis codes to obtain coding gains of

432: 4.0 to 4.5 dB \cite{FBEM96}.

433:

434: \section{Applications in mobile and broadcast communications}

435:

436: The mobile communications channel is subject to fading, bursts, and

437: multiuser interference, and is a much more difficult medium than the

438: AWGN and linear Gaussian channels discussed above.

439: The designers of second-generation (2G) cellular systems used every tool

440: available at the time (early 1990s) to provide reliable communication on

441: this difficult channel.

442:

443: The CDMA system developed by Qualcomm uses a $2^8$-state, rate-1/3

444: convolutional code with interleaved 64-orthogonal modulation, and of course

445: a Viterbi decoder.  The TDMA system developed for GSM uses the VA

446: not only to decode a 16-state, rate-1/2 convolutional code, but

447: also for equalization.  A soft-output Viterbi algorithm (SOVA) is often

448: used in the latter application \cite{CHIW98}.

449:

450: VA decoders are currently used in about one billion cellphones, which is

451: probably the largest number in any application.  However, the largest

452: current consumer of VA processor cycles is probably digital video

453: broadcasting.  A recent estimate at Qualcomm is that approximately

454: $10^{15}$ bits per second are now being decoded by the VA in digital TV

455: sets around the world, every second of every day \cite{P05}.

456:

457: %\pagebreak

458: \section{General application to hidden Markov models}

459:

460: In 1973, I wrote a tutorial paper on the Viterbi algorithm for the

461: \textsc{Proceedings of the IEEE} \cite{F73} that has turned out to be my

462: most cited paper by far.  A recent search using Google Scholar shows 734

463: citations, far more than the 181 for my next-most-cited reference.

464:

465: One of the main points of that paper was that the VA can be applied to any

466: problem that involves detecting the output sequence of a discrete-time,

467: finite-state machine in memoryless noise--- \ie to detection and pattern

468: recognition problems involving hidden Markov models (HMMs).  Of course,

469: decoding of convolutional codes and sequence detection on ISI channels

470: were the main applications discussed in that paper.

471:

472: During the 70s and 80s, the VA became widely used

473: in a variety of pattern recognition problems that could be described by

474: HMMs, particularly for speech recognition;  see

475: \cite{R89}.  Here the VA is often used as the M-step of an EM algorithm,

476: which also adjusts HMM parameters.

477:

478: Indeed, a recent search of IEEE Xplore shows that most

479: current IEEE references to the VA occur in such Transactions as

480: \textsc{Pattern Analysis and Machine Intelligence} or \textsc{Systems, Man

481: and Cybernetics},

482: rather than in \textsc{Communications} or \textsc{Information Theory}.

483: It seems that everyone in these fields knows how to ``Viterbi the data."

484:

485: Finally, in the past decade, the VA has become widely used in much more

486: distant fields such as computational biology, \eg to locate genes in DNA

487: sequences.  See for example \cite{HSF97}, with its ``Viterbi Exon-Intron

488: Locator" (VEIL).

489:

490: %\pagebreak

491: \section{Related algorithms}

492:

493: In the past decade, the development of the field of ``codes on graphs" and

494: their related decoding algorithms has led to a remarkable conceptual

495: unification of a variety of detection and estimation algorithms which have

496: been introduced under various names for various applications.

497:

498: In his 1996 dissertation, generalizing the earlier work of Gallager

499: \cite{G63} and Tanner \cite{T81}, Niclas Wiberg

500: \cite{W96, WLK95} developed the generic ``sum-product" and ``min-sum"

501: decoding algorithms for cycle-free graphs which may include

502: both symbol (observable) and state (hidden) variables.  For trellis

503: graphs, he showed that these reduce to the BCJR algorithm

504: \cite{BCJR74} and an algorithm equivalent to the Viterbi algorithm,

505: respectively.  For capacity-approaching codes such as turbo codes and

506: low-density parity-check (LDPC) codes, the sum-product algorithm with an

507: appropriate schedule becomes the standard iterative decoding algorithm

508: that is normally used with such  codes.

509:

510: Later authors (\eg \cite{AM00, KFL01}) have shown that the sum-product

511: algorithm is equivalent to Pearl's ``belief propagation" algorithm for

512: statistical inference on Bayesian networks;  the Baum-Welch or

513: ``forward-backward" algorithm for inference with hidden Markov models; and

514: the Kalman smoother for linear Gaussian state-space models.

515:

516: However, it is important to note that the min-sum algorithm

517: is a two-way ``backward-forward" algorithm.  The VA obtains the same

518: result with a ``forward-only" algorithm by storing a path history with

519: each survivor.  Of course, ``forward-only" is a key simplification,

520: particularly for real-time communications;  the min-sum algorithm would

521: never have been adopted in practice as widely as the VA has

522: been.\footnote{Interestingly, Ungerboeck discovered both the sum-product

523: and the min-sum algorithms for equalization applications in his thesis

524: \cite{U71};  however, he missed the forward-only version.}

525:

526: %\pagebreak

527: \section{Conclusion}

528:

529: The Viterbi algorithm has been tremendously important in communications.

530: For moderately complex (not capacity-approaching) codes, it has proved to

531: yield the best tradeoff between performance and complexity both on

532: power-limited channels, such as space channels, and on bandwidth-limited

533: channels, such as voiceband telephone lines.  In practice, in these regimes

534: it has clearly outstripped its earlier rivals, such as sequential decoding

535: and algebraic decoding.  (However, it seems likely that it will be

536: superseded in many of its principal communications applications by

537: capacity-approaching codes with iterative decoding.)

538:

539: Moreover, the VA has become a general-purpose algorithm for

540: decoding hidden Markov models in a huge variety of applications, from

541: speech recognition to computational biology.

542:

543: Andy Viterbi clearly did not envision the full import of the VA when

544: he first introduced it.  However, he and his colleagues at Linkabit and

545: Qualcomm were largely responsible for making it practical, and for driving

546: its widespread adoption in communications.  The history might have been

547: otherwise, but it wasn't.  In actual fact, no one deserves more credit for

548: this tremendously important invention than its actual inventor.

549:

550: \section*{Acknowledgments}

551: I am very grateful for comments on drafts of this paper by Keith Chugg,

552: Dan Costello, Bob Gallager, Jim Massey, Jim Omura, Sergio Verd\'{u} and

553: Andy Viterbi.

554:

555: %\pagebreak

556: {\small

557: \begin{thebibliography}{10}

558:

559: \bibitem{AM00}

560: S. M. Aji and R. J. McEliece, ``The generalized distributive law,"

561: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ 46, pp.\ 325--343, Mar.\

562: 2000.

563:

564: \bibitem{BCJR74}

565: L. R. Bahl, J. Cocke, F. Jelinek and J. Raviv,

566: ``Optimal decoding of linear codes for minimizing symbol error rate,"

567: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ IT--20, pp.\ 284--287, Mar.\

568: 1974.

569:

570: \bibitem{CHV71}

571: A. R. Cohen, J. A. Heller and A. J. Viterbi, ``A new coding technique

572: for asynchronous multiple access communication,"

573: \emph{IEEE Trans.\ Commun.\ Tech.}, vol.\  COM--19, pp.\ 849--855, Oct.\

574: 1971.

575:

576: \bibitem{C92}

577: O. M. Collins, ``The subtleties and intricacies of building a constraint

578: length 15 convolutional decoder,"

579: \emph{IEEE Trans.\ Commun.}, vol.\  40, pp.\ 1810--1819, Dec.\ 1992.

580:

581: %\bibitem{CCSDS87}

582: %Consultative Committee for Space Data Systems,

583: %``Recommendations for space data standard:  Telemetry channel coding,"

584: %Blue Book Issue 2, CCSDS 101.0-B2, Jan. 1987.

585:

586: \bibitem{CHIW98}

587: D. J. Costello, Jr., J. Hagenauer, H. Imai and S. B. Wicker,

588: ``Applications of error-control coding,"

589: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ 44, pp.\ 2531--2560, Oct.\

590: 1998.

591:

592: \bibitem{F67}

593: G. D. Forney, Jr., ``Review of random tree codes," Appendix A, Final

594: Report, Contract NAS2-3637, NASA CR73176, NASA Ames Res.\ Ctr.,

595: Moffett Field, CA, Dec.\ 1967.

596:

597: \bibitem{F70}

598: G. D. Forney, Jr., ``Coding and its application in space communications,"

599: \emph{IEEE Spectrum}, vol.\ 7, pp. 47--58, 1970.

600:

601: \bibitem{F72}

602: G. D. Forney, Jr., ``Maximum-likelihood sequence estimation of digital

603: sequences in the presence of intersymbol interference,"  \emph{IEEE Trans.\

604: Inform.\ Theory}, vol.\ IT--18, pp.\ 363--378, May 1972.

605:

606: \bibitem{F73}

607: G. D. Forney, Jr.,  ``The Viterbi algorithm,"  \emph{Proc.\ IEEE}, vol.\

608: 61, pp.\ 268--278, March 1973.

609:

610: \bibitem{F74}

611: G. D. Forney, Jr., ``Convolutional codes II.  Maximum-likelihood

612: decoding,"  \emph{Inform.\ and Control}, vol.\ 25, pp.\ 222--266, 1974.

613:

614: %\bibitem{F94}

615: %G. D. Forney, Jr., ``Trellises old and new,"  in \emph{Communications and

616: %Cryptography} (R. E. Blahut et al., eds.), pp.\ 115--128.  Boston:  Kluwer,

617: %1994.

618:

619: \bibitem{FBEM96}

620: G. D. Forney, Jr., L. Brown, M. V. Eyuboglu, and J. L. Moran III,

621: ``The V.34 high-speed modem standard,"  \emph{IEEE Commun. Mag.}, vol.\ 34,

622: no.\ 12, pp.\ 28-33, Dec.\ 1996.

623:

624: %\bibitem{F01}

625: %G. D. Forney, Jr.,  ``Codes on graphs:  Normal realizations,"  \emph{IEEE

626: %Trans.\ Inform.\ Theory}, vol.\  IT--13, pp.\ 520--548, Feb.\ 2001.

627:

628: \bibitem{G63}

629: R. G. Gallager, \emph{Low-Density Parity-Check Codes}.  Cambridge, MA:  MIT

630: Press, 1963.

631:

632: \bibitem{H68}

633: J. A. Heller, ``Short constraint length convolutional codes,"  Jet Prop.\

634: Lab., Space Prog.\ Summary 37--54, vol.\ III, pp.\ 171--177, 1968.

635:

636: \bibitem{H69}

637: J. A. Heller, ``Improved performance of short constraint length

638: convolutional codes,"  Jet Prop.\ Lab., Space Prog.\ Summary 37--56, vol.\

639: III, pp.\ 83--84, 1969.

640:

641: \bibitem{HJ71}

642: J. A. Heller and I. M. Jacobs, ``Viterbi decoding for satellite and

643: space communication,"  \emph{IEEE Trans.\ Commun.\ Tech.}, vol.\  COM--19,

644: pp.\ 835--848, Oct.\ 1971.

645:

646: \bibitem{HSF97}

647: J. Henderson, S. Salzberg and K. H. Fasman,

648: ``Finding genes in DNA with a hidden Markov model,"

649: \emph{J. Comput. Biol.}, vol.\ 4, pp.\ 127--141, 1997.

650:

651: \bibitem{ISW98}

652: K. A. S. Immink, P. H. Siegel and J. K. Wolf,

653: ``Codes for digital recorders,"

654: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\  44, pp.\ 2260--2299, Oct.\

655: 1998.

656:

657: \bibitem{K71a}

658: H. Kobayashi, ``Application of probabilistic decoding to digital magnetic

659: recording systems,"  \emph{IBM J. Res.\ Dev.}, vol.\ 15, pp.\ pp.\ 64--74,

660: Jan.\ 1971.

661:

662: \bibitem{K71b}

663: H. Kobayashi, ``Correlative level coding and maximum likelihood decoding,"

664: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\  IT--17, pp.\ 586--594, Sept.\

665: 1971.

666:

667: \bibitem{KFL01}

668: F. R. Kschischang, B. J. Frey and H.-A. Loeliger,

669: ``Factor graphs and the sum-product algorithm,"

670: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ 47, pp.\ 498--519, Feb.\ 2001.

671:

672: \bibitem{M63}

673: J. L. Massey, \emph{Threshold Decoding}.  Cambridge, MA:  MIT Press, 1963.

674:

675: \bibitem{M99}

676: D. Morton, ``Andrew Viterbi, electrical engineer:  An oral history,"  IEEE

677: History Center, Rutgers U., New Brunswick, NJ, Oct.\ 1999.

678:

679: \bibitem{O69}

680: J. K. Omura,

681: ``On the Viterbi decoding algorithm,"  \emph{IEEE Trans.\

682: Inform.\ Theory}, vol.\ IT--15, pp.\ 177--179, 1969.

683:

684: \bibitem{O71}

685: J. K. Omura,

686: ``Optimal receiver design for convolutional codes and channels with

687: memory via control theoretical concepts,"

688: \emph{Info.\ Sci.}, vol.\ 3, pp.\ 243--266, July 1971.

689:

690: \bibitem{P05}

691: R. Padovani, ``Ten years of progress in CDMA," Viterbi Conference, Univ.\

692: So.\ Calif., Los Angeles, Mar.\ 2005.

693:

694: \bibitem{R89}

695: L. R. Rabiner,

696: ``A tutorial on hidden Markov models and selected applications in speech

697: recognition," \emph{Proc.\ IEEE}, vol.\ 77, pp.\ 257-286, Feb.\ 1989.

698:

699: \bibitem{T81}

700: R. M. Tanner, ``A recursive approach to low complexity codes,"

701: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ IT--27, pp.\ 533--547,

702: Sept.\ 1981.

703:

704: \bibitem{U71}

705: G. Ungerboeck, ``Nonlinear equalization of binary signals in Gaussian

706: noise,"

707:   \emph{IEEE Trans.\ Commun.\ Tech.}, vol.\  COM--19, pp.\ 1128--1137,

708: Dec.\ 1971.

709:

710: \bibitem{U82}

711: G. Ungerboeck, ``Channel coding with multilevel/phase signals,"

712:   \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ IT--28, pp.\ 55--67, Jan.\

713: 1982.

714:

715: %\bibitem{V86}

716: %S. Verd\'{u}, ``Minimum probability of error for asynchronous Gaussian

717: %multiple-access channels,"

718: %  \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ IT--32,  pp.\ 85--96, Jan.\

719: %1986.

720:

721: \bibitem{V67a}

722: A. J. Viterbi,  ``Error bounds for convolutional codes and an

723: asymptotically optimum decoding algorithm,"  \emph{IEEE

724: Trans.\ Inform.\ Theory}, vol.\  IT--13, pp.\ 260--269, April 1967.

725:

726: %\bibitem{V67b}

727: %A. J. Viterbi,  ``Orthogonal tree codes for communication in the

728: %presence of additive white Gaussian noise,"

729: %\emph{IEEE Trans.\ Commun.\ Tech.}, vol.\  COM--15, pp.\ 238--242,

730: %1967.

731:

732: \bibitem{V70}

733: A. J. Viterbi,  ``Review of \emph{Statistical Theory of Signal

734: Detection} (2nd ed.), by Carl W. Helstrom,"

735: \emph{IEEE Trans.\ Inform.\ Theory}, vol.\ IT--16, p.\ 653, Sept.\

736: 1970.

737:

738: \bibitem{V71}

739: A. J. Viterbi,  ``Convolutional codes and their performance in

740: communication systems,"

741: \emph{IEEE Trans.\ Commun.\ Tech.}, vol.\  COM--19, pp.\ 751--772,

742: Oct.\ 1971.

743:

744: \bibitem{V90}

745: A. J. Viterbi,  ``From proof to product," 1990 IEEE Communication Theory

746: Workshop, Ojai, CA, April 1990.

747:

748: %\bibitem{VO79}

749: %A. J. Viterbi and J. K. Omura,

750: %\emph{Principles of Digital Communication and Coding}.

751: %New York:  McGraw-Hill, 1979.

752:

753: \bibitem{W96}

754: N. Wiberg, ``Codes and decoding on general graphs,"

755: Ph.D.\ dissertation, Link\"{o}ping U., Link\"{o}ping, Sweden, 1996.

756:

757: \bibitem{WLK95}

758: N. Wiberg, H.-A.\ Loeliger and R. K\"{o}tter, ``Codes and iterative

759: decoding on general graphs," \emph{Eur.\ Trans.\ Telecomm.}, vol.\ 6, pp.\

760: 513--525, Sept./Oct.\ 1995.

761:

762: \bibitem{WR61}

763: J. M. Wozencraft and B. Reiffen, \emph{Sequential Decoding}.

764:   Cambridge, MA:  MIT Press, 1961.

765:

766: \bibitem{Y64}

767: H. Yudkin, ``Channel state testing in information decoding,"  Sc.D.\

768: dissertation, Dept.\ Elec.\ Engg., MIT, Cambridge, MA, 1964.

769:

770: \end{thebibliography}

771: }

772: \end{document}

773:

774:

775: