1: %% LyX 1.3 created this file. For more info, see http://www.lyx.org/.
2: %% Do not edit unless you really know what you are doing.
3: \documentclass[12pt,english]{article}
4: \usepackage{times}
5: \usepackage[T1]{fontenc}
6: \usepackage[latin1]{inputenc}
7: \usepackage{graphicx}
8: \usepackage{amssymb}
9:
10: \makeatletter
11:
12: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% LyX specific LaTeX commands.
13: \newcommand{\noun}[1]{\textsc{#1}}
14: %% Bold symbol macro for standard LaTeX users
15: \providecommand{\boldsymbol}[1]{\mbox{\boldmath $#1$}}
16:
17: %% Because html converters don't know tabularnewline
18: \providecommand{\tabularnewline}{\\}
19:
20: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% User specified LaTeX commands.
21: \usepackage{graphicx}
22: \usepackage{amssymb}
23:
24: \usepackage{graphicx}
25: \usepackage{amssymb}
26:
27: \usepackage{graphicx}
28: \usepackage{amssymb}
29:
30: \usepackage{graphicx}
31: \usepackage{amssymb}
32:
33: \usepackage{graphicx}
34: \usepackage{amssymb}
35:
36: \usepackage{graphicx}
37: \usepackage{amssymb}
38:
39: \usepackage{array}
40: \usepackage{amssymb}
41:
42: \headheight=0in
43: \headsep=0in
44: \oddsidemargin=0in
45: \evensidemargin=0in
46: \textheight=8.5in
47: \textwidth=6.5in
48:
49: \usepackage{amsmath,amssymb,amscd,enumerate,amsthm}
50: \usepackage{graphics}
51: \usepackage{epsfig}
52:
53: \newcommand{\pe}{\psi}
54: \newtheorem{theorem}{Theorem}[section]
55: \newtheorem{lemma}[theorem]{Lemma}
56: \newtheorem{proposition}[theorem]{Proposition}
57: \newtheorem{corollary}[theorem]{Corollary}
58: \newtheorem{algorithm}{Algorithm}[section]
59:
60: %\newenvironment{proof}[1][Proof]{\begin{trivlist}
61: %\item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}}
62: \newenvironment{definition}[1][Definition]{\begin{trivlist}
63: \item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}}
64: \newenvironment{example}[1][Example]{\begin{trivlist}
65: \item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}}
66: \newenvironment{remark}[1][Remark]{\begin{trivlist}
67: \item[\hskip \labelsep {\bfseries #1}]}{\end{trivlist}}
68:
69: \usepackage{babel}
70:
71: \usepackage{babel}
72:
73: \usepackage{babel}
74:
75: \usepackage{babel}
76:
77: \usepackage{babel}
78:
79: \usepackage{babel}
80:
81: \usepackage{babel}
82:
83: \usepackage{babel}
84: \makeatother
85: \begin{document}
86:
87: \title{A Sublinear Algorithm of Sparse Fourier Transform for Nonequispaced
88: Data\thanks{This
89: work was partially supported by NSF grant DMS-03168875 and AFOSR grant 109-6047.}}
90:
91:
92: \author{Jing Zou %
93: \footnote{Program of Applied and Computational Mathematics, Princeton University,
94: Fine Hall, Washington Road, Princeton, NJ 08544, (\texttt{jzou@math.princeton.edu})%
95: }}
96: \date{}
97: \maketitle
98: \begin{abstract}
99: We present a sublinear randomized algorithm to compute a sparse Fourier
100: transform for nonequispaced data. Suppose a signal $S$ is known to
101: consist of $N$ equispaced samples, of which only $L<N$ are available.
102: If the ratio $p=L/N$ is not close to 1, the available data are typically
103: non-equispaced samples. Then our algorithm reconstructs a near-optimal
104: $B$-term representation $R$ with high probability $1-\delta$, in
105: time and space $poly(B,\log(L),\log p,\log(1/\delta),$ $\epsilon^{-1})$,
106: such that $\Vert S-R\Vert^{2}\leq(1+\epsilon)\Vert S-R_{opt}^{B}\Vert^{2}$,
107: where $R_{opt}^{B}$ is the optimal $B$-term Fourier representation
108: of signal $S$. The sublinear $poly(\log L)$ time is compared to
109: the superlinear $O(N\log N+L)$ time requirement of the present best
110: known Inverse Nonequispaced Fast Fourier Transform (INFFT) algorithms.
111: Numerical experiments support the advantage in speed of our algorithm
112: over other methods for sparse signals: it already outperforms INFFT
113: for large but realistic size $N$ and works well even in the situation
114: of a large percentage of missing data and in the presence of noise.
115: \end{abstract}
116:
117: \section{Introduction}
118:
119: We consider the problem in which the recovery of a discrete time signal
120: $S$ of length $N$ is sought when only $L$ signal values are known.
121: In general, this is of course an insoluble problem; we consider it
122: here under the additional assumption that the signal has a sparse
123: Fourier transform. Let us fix the notations: the signal is denoted
124: by $S=(S(t))_{t=0,\ldots,N-1}$, but we have at our disposal only
125: the $(S(i))_{i\in T}$, where the set $T$ is a subset of $\{0,\ldots,N-1\}$
126: and $|T|=L$. The Fourier transform of signal $S$ is $\hat{S}=(\hat{S}(0),\ldots,\hat{S}(N-1))$,
127: defined by $\hat{S}(\omega)=\frac{1}{\sqrt{N}}\sum_{t=0}^{N-1}S(t)e^{-2\pi i\omega t/N}$.
128: In terms of the Fourier basis functions $\phi_{\omega}(t)=\frac{1}{\sqrt{N}}e^{2\pi i\omega t/N}$,
129: $S$ can be written as $S=\sum_{\omega=0}^{N-1}\hat{S}(\omega)\phi_{\omega}(t)$;
130: this is the (discrete) Fourier representation of $S$. A signal $S$
131: is said to have a $B$-sparse Fourier representation, if there exists
132: a subset $\Omega\subset\{0,\ldots,N-1\}$ with $|\Omega|=B$, and
133: values $c(\omega)\neq0$ for $\omega\in\Gamma$, such that $S(t)=\sum_{\omega\in\Omega}c(\omega)\phi_{\omega}$.
134: For a signal that does not have a $B$-sparse Fourier representation,
135: we denote by $R_{opt}^{B}(S)$ the optimal $B$-term Sparse Fourier
136: representation of $S$.
137:
138: This paper presents a sublinear algorithm to recover a $B$-sparse
139: Fourier representation of a signal $S$ from incomplete data. Our
140: algorithm also extends to the case where the Fourier transform $\hat{S}$
141: is not $B$-sparse, where we aim to find a near-optimal $B$-term
142: Fourier representation, i.e. $R=\sum_{\omega\in\Gamma}c(\omega)\phi_{\omega}$,
143: such that \begin{equation}
144: \| S-R\|=\Vert S-\sum_{\omega\in\Gamma}c(\omega)\phi_{\omega}\Vert_{2}^{2}\leq(1+\epsilon)\Vert S-R_{opt}^{B}(S)\Vert_{2}^{2}.\end{equation}
145:
146:
147: A typical situation where our study applies is the observation of
148: non-equispaced data, where the samples are nevertheless all elements
149: of $\tau\mathbb{Z}$ for some $\tau>0$. For a signal with evenly
150: spaced data, the famous Fast Fourier Transform (FFT) computes all
151: the Fourier coefficients in time $O(N\log N)$. However, the requirement
152: of equally distributed data by FFT raises challenges for many important
153: applications. For instance, because of the occurrence of instrumental
154: drop-outs, the data may be available only on a set of non-consecutive
155: integers. Another example occurs in astronomy, where the observers
156: cannot completely control the availability of observational data:
157: a telescope can only see the universe on nights when skies are not
158: cloudy. In fact, computing the Fourier representation from irregularly
159: spaced data has wide applications \cite{Ware} in processing astrophysical
160: and seismic data, the spectral method on adaptive grids, the tracking
161: of Lagrangian particles, and the implementation of semi-Lagrangian
162: methods.
163:
164: In many of these applications, a few large Fourier coefficients already
165: capture the major time-invariant wave-like information of the signal,
166: and we can thus ignore very small Fourier coefficients. To find a
167: small set of the largest Fourier coefficients and hence a (near) optimal
168: $B$-sparse Fourier representation of a signal that describes most
169: of the signal characteristics is a fundamental task in applied Fourier
170: Analysis.
171:
172: An equivalent version of this problem is as follows: define the matrix
173: $A:=(e^{2\pi ikt_{j}})_{k=0,\ldots,N;}$ $_{j=0\ldots,L-1}$, where
174: the $t_{j}$ are the locations of the available samples. Given $S(t_{j})$,
175: we want to reconstruct the signal $S$, or equivalently, its Fourier
176: coefficients $\hat{S}_{k}$, so that $A\hat{S}=S$. This linear system
177: is over-determined. Several algorithms \cite{Bjork}\cite{Hanke}
178: \cite{KP} have provided efficient approaches to solve this problem.
179: Among all INFFT algorithms, the iterative CGNE approach of \cite{FGS}
180: in the benchmark software NFFT 2.0 is one of the fastest methods;
181: it takes time $O(L^{1+(d-1)/\beta}\log L)$, where $L$ is the number
182: of available points, $d$ is the number of dimensions, and $\beta>1$
183: is the smoothness for the original signal. The super-linearity relationship
184: between the running time and $N$ (recall $L=pN$, where $p$ is the
185: percentage of available data) poses difficulties in processing large
186: dimensional signals, which have nothing to do with the unequal spacing.
187: It follows that identifying a sparse number of significant modes and
188: amplitudes is expensive for even fairly modest $N$. Our goal in this
189: paper is to discuss much faster (sublinear) algorithms that can identify
190: the sparse representation or approximation with coefficients $a_{1},\ldots,a_{B}$
191: and modes $\omega_{1},\ldots,\omega_{B}$ for unevenly spaced data.
192: These algorithms will not use all the samples $S(0),\ldots,S(N-1)$,
193: but only a very sparse subset of them.
194:
195: Our approach is based on the paper \cite{GGIMS} that shows how to
196: construct the Fourier representation for a signal $S$ with $B$-sparse
197: Fourier representation in time and space $poly(B,\log N,$ $1/\epsilon,\log(1/\delta))$
198: on equal spacing data. The algorithm contains some random elements
199: (which do not depend on the signal); their approach guarantees that
200: the error of estimation is of order $\epsilon\Vert S\Vert^{2}$ with
201: probability exceeding $1-\delta$. The ideas in \cite{GGIMS} have
202: also been applied by its authors to sparse wavelet, wavelet packet
203: representation, and histograms \cite{GGIKMS}. We have dubbed the
204: whole family of algorithms RA$\ell$STA (for Randomized Algorithm
205: for Sparse Transform Approximation); when dealing only with Fourier
206: Transforms, as is the case here, we specialize it to RA$\ell$SFA
207: (F for Fourier). Zou, Gilbert, Strauss and Daubechies \cite{Zou}
208: improved and implemented the algorithm greatly. It convincingly beats
209: FFT when the number of grid points $N$ is reasonably large. The crossover
210: point lies at $N\simeq25,000$ in one dimension, and at $N\simeq460$
211: for data on a $N\times N$ grid in two dimensions for a two-mode signal.
212: When $B=13$, RA$\ell$SFA surpasses $FFT$ at $N\geq300,000$ for
213: one dimensional signals and $1100$ for two dimensional signals.
214:
215: In this paper, we modify RA$\ell$SFA to solve the irregularly spaced
216: data problem. The new NERA$\ell$SFA (Nonequispaced RA$\ell$SFA)
217: uses sublinear time and space $poly(B,\log L,\epsilon,\log(1/\delta),$
218: $\log p)$ to find a near-optimal $B$-term Fourier representation,
219: such that $\Vert S-R\Vert^{2}\leq(1+\epsilon)\Vert S-R_{opt}\Vert^{2}$
220: with high probability $1-\delta$. Similar to the RA$\ell$SFA algorithm,
221: it outperforms existing INFFT algorithms in processing sparse signals
222: of large size.
223:
224: \textbf{Notation and Terminology} Denote by $\chi_{T}$ a signal that
225: equals 1 on a set $T$ and zero elsewhere in the time domain. We say
226: a signal $H$ is $q$ percent pure, if there exists a frequency $\omega$
227: and a signal $\rho$, such that $H=ae^{2\pi i\omega t/N}+\rho$, with
228: $|a|^{2}\geq(q\%)\| H\|^{2}$. To quantify the unevenness of the data,
229: introduce a parameter $p=L/N$ to be the percentage of the available
230: data over all the data, where $L$ is the number of available data.
231: Obviously a larger $p$ corresponds to more information about the
232: signal. We use $L^{2}$-norm throughout the paper, which is denoted
233: by $\|.\|$. The convolution $F*G$ is defined as $F*G(t)=\sum_{s}F(s)G(t-s)$.
234: It follows that $\widehat{F*G}(\omega)=\sqrt{N}\hat{F}(\omega)\hat{G}(\omega)$.
235:
236: A Box-car filter with width $2k+1$ is defined as follows: \begin{eqnarray*}
237: \chi_{k}(t) & = & \left\{ \begin{array}{cc}
238: \frac{\sqrt{N}}{2k+1} & \,\,\,\,\,\,\,\textrm{$if\,\,\,\,\,-k\leq t\leq k$ },\\
239: 0 & \,\,\,\,\,\,\,\,\,\, if\,\,\, t>k\,\, or\,\, t<-k\end{array}\right.\end{eqnarray*}
240: In the frequency domain, this filter is in the form of \begin{equation}
241: \hat{\chi}_{k}(\omega)=\left\{ \begin{array}{c}
242: \frac{sin((2k+1)\pi\omega/N)}{(2k+1)sin(\pi\omega/N)}\,\,\,\,\,\,\,\,\, if\,\,\omega\neq0\\
243: \,\,\,\,\,\,\,\,\,\,1\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, if\,\,\,\omega=0\end{array}\right.\end{equation}
244:
245:
246: A dilation operation on signal $H$ with a dilation factor $\sigma$
247: is defined as $H^{(\sigma)}(t)=H(\sigma t)$ for every points $t$.
248:
249: \textbf{Organization} The paper is organized as follows. In Section
250: 2, we give the outline of the RA$\ell$SFA algorithm. Section 3 presents
251: the modification of RA$\ell$SFA that deals with the unavailability
252: of some samples by a greedy method. In Section 4, an interpolation
253: technique is introduced for better performance. Finally, we compare
254: numerical results with existing algorithms in Section 5.
255:
256:
257: \section{Set-up of RA$\ell$SFA}
258:
259: Given a signal $S$ of length $N$, the optimal $B$-term Fourier
260: representation $R_{opt}^{B}(S)$ uses only $B$ frequencies; it is
261: simply a truncated version of the Fourier representation of $S$,
262: retaining only the $B$ largest coefficients. The following theorem
263: is the main result of \cite{GGIMS}.
264:
265: \begin{theorem}
266: Let an accuracy factor \(\epsilon\), a failure probability \(\delta\), and a sparsity target \(B \in \mathbb{N}, B \ll N\)
267: be given. Then for an arbitrary signal \(S\) of length \(N\), RA\(\ell\)SFA will find a
268: \(B\)-term approximation \(R\) to \(S\), at a cost in time and space of
269: order \(poly(B,\log(N), 1/\epsilon, \log(1/\delta) )\) and with probability exceeding \(1-\delta\), so that \(\|S-R\|^2 \leq (1+\epsilon)\|S-R_{opt}^B(S)\|^2_2\).
270: \end{theorem}\addtocounter{algorithm}{+1}
271:
272: The striking fact is that RA$\ell$SFA can build a near-optimal representation
273: $R$ in sublinear time $poly(\log N)$ instead of the $O(N\log N)$
274: time requirement of other algorithms. Its speed surpasses FFT as long
275: as the length of a signal is sufficiently large. If a signal is composed
276: of only $B$ modes, RA$\ell$SFA constructs $S$ without any error.
277:
278: The main procedure is a Greedy Pursuit with the following steps:
279:
280: \begin{algorithm} \label{alg:total1}{\textsc{Total Scheme \cite{Zou}}}
281: \begin{enumerate}
282: \item Initialize the representation signal \(R\) to 0. Set the maximum number of iterations \(ITER=B\log(N)\log(1/\delta)/\epsilon^{2}\).
283: \item Test whether \(\Vert S-R \Vert\) appears to be less than some user threshold, $\iota$.
284: If yes, return the representation signal \(R\) and the whole algorithm ends; else go to step 3..
285: \item Locate Fourier Modes \(\omega\) for the signal \(S-R\) by isolation and group test procedures.
286: \item Estimate Fourier Coefficients at \(\omega\): \(\widehat{(S-R)}(\omega)\).
287: \item Update the representation signal \(R\leftarrow R+\widehat{(S-R)}(\omega) \phi_{\omega}(t)\).
288: \item If the total number of iterations is less than \(ITER\), go to 2; else return the representation \(R\).
289: \end{enumerate}\label{alg:total1}
290:
291: \end{algorithm}\addtocounter{theorem}{+1}
292:
293: The basic idea of Algorithm \ref{alg:total1} is to identify significant
294: frequencies and then estimate their corresponding coefficients. In
295: order to locate those nonzero frequencies, we first construct a new
296: signal where a previous significant frequency becomes predominant.
297: Then a recursive approach called group test finds the exact label
298: of this predominant mode, by splitting intervals, comparing energies,
299: and keeping only intervals with large energies. After the frequency
300: is located, coefficient estimation procedures give a good estimation
301: by taking means and medians of random samples.
302:
303:
304: \section{NERA$\ell$SFA with Greedy Technique}
305:
306: RA$\ell$SFA samples from a signal, implicitly assuming that uniform
307: and random sampling is possible, with a fixed cost per sample. This
308: raises challenges for processing unevenly spaced data. Specifically
309: speaking, Fourier coefficients and norms can not be estimated properly.
310: Thus one has to modify steps 3 and 4 accordingly. In this section,
311: NERA$\ell$SFA, a modified version of RA$\ell$SFA with greedy technique,
312: is introduced to overcome these problems.
313:
314: The basic idea is a greedy pursuit for an available data point. Whenever
315: the algorithm samples at a missing data point, it searches some other
316: random indices $t$ until it finds one available data point $S(t)$
317: as the substitute. This technique is used in estimating both Fourier
318: coefficients and norms.
319:
320: A good data structure is important to save running time cost. We denote
321: the availability of a data point by a label, say +1 for available
322: and 0 for unavailable. Hence, the label is tested to see if its corresponding
323: sample is valid. An alternative solution is to store all the sorted
324: labels of available data in a long list. However, each search takes
325: time $O(\log(N))$, which introduces a $O(\log N)^{2}$ factor into
326: the whole computation. As the empirical results show, the running
327: time of NERA$\ell$SFA algorithm is linear to $\log N$. For this
328: reason, we selected the first method.
329:
330: We now give a more detailed discussion of the different procedures
331: used in steps 3 and 4 of Algorithm \ref{alg:total1}.
332:
333:
334: \subsection{Estimating Fourier Coefficients}
335:
336: First, we give the procedure for estimating Fourier coefficients for
337: unevenly spaced data as follows.
338:
339: \begin{algorithm} {\textsc{Estimating Individual Fourier Coefficients}} \label{estcoef} \\
340: Input a signal \(S\), a frequency \(\omega\), \(n=2\log(1/\delta)\), \(m=8/\epsilon^2\).
341: \begin{enumerate}
342: \item For \(i=1, \ldots, n\)
343: \item For \(j=1,\ldots, m\) \\
344: \text{}\hspace{10mm} Randomly generate the index \(t\) until \(S(t)\) is available. \\
345: \text{}\hspace{10mm} Then let \(t_{ij}=t\). Evaluate \(k(t_{ij})=<S(t_{ij}), \phi_{\omega}(t_{ij})>\).
346: \item Take the means of \(m\) samples \(k(t_{ij})\), i.e. \(p(i)=\sum_{j=1}^{m} {k(t_{ij})}\), where \(i=1,\ldots, n\).
347: \item Take the median of \(n\) samples \(c=median_i (p(i))\), where \(i=1,\ldots,n\).
348: \item Return \(c\) as the estimation of the Fourier coefficient \(\hat{S}(\omega)\).
349: \end{enumerate} \label{alg:coeff}
350: \end{algorithm}\addtocounter{theorem}{+1}
351:
352: Next, we show that using unevenly spaced data leads to a very good
353: approximation to the true coefficient. The first lemma is one of most
354: fundamental theorems in randomized algorithms. It essentially states
355: that by repeating an experiment enough times, a small probability
356: event will happen eventually.
357:
358: \begin{lemma} \label{lm:rept}
359: If an event happens with probability \(p\), then in the first \(k> \log \delta / \log(1-p)\) iterations, it happens at least once with success
360: probability \(1-\delta\).
361: \label{lm:rept}\end{lemma}\addtocounter{algorithm}{+1}
362:
363: In our case, only $p=L/N$ percentage of the data is available, so
364: that $k>\log\delta/\log(1-L/N)$ trials are needed to generate one
365: available data point with success probability at least $1-\delta$.
366:
367: In fact, most of the Fourier coefficients of a characteristic function
368: on a typical set $T$ are small, under some conditions. The following
369: lemma makes this more explicit.
370:
371: \begin{lemma} \label{lm:smallfilter}
372: Suppose the components \(X_j\) of a discrete random variable \(X=(X_j)_{j=0} ^{N-1}\) are identically and independently distributed in \( \{0,1\} \), with \(p = Prob(X_j=1)\).
373: Define the random set \(T=\{j \in \{0, \ldots, N-1 \} |X_j=1 \}\) to be the set of all available data; \(\hat{\chi}_T(\omega)\) is the Fourier transform of \(\chi_T(t)= \sum_{j=0}^{N-1}X_j\). If \(p \geq \frac{1}{1+(N-1) \lambda \tau^2}\),
374: then \\
375: \begin{equation}
376: Prob(|\hat{\chi}_T(\omega)|^2 \geq \lambda) \leq \tau^2.
377: \end{equation}
378: \label{lm:smallfilter}\end{lemma}\addtocounter{algorithm}{+1} \begin{proof}
379: First, we claim that \(E(|\hat{\chi}_T(\omega)|^2) \leq \frac{(1-p)}{p(N-1)}\). \\
380: Since \(\hat{\chi}_T(\omega) = \frac{1}{pN}\sum_{j \in T}(e^{2 \pi i \omega j /N})\),
381: we have
382: \begin{eqnarray}
383: |\hat{\chi}_{T}(\omega)|^{2}=\frac{1}{p^{2}N^{2}}\sum_{j,k\in T}e^{2\pi i\omega(j-k)/N} \\
384: =\frac{1}{p^{2}N^{2}}\sum_{j\in T}1+\frac{1}{p^{2}N^{2}}\sum_{j,k\in T,j\neq k}e^{2\pi i\omega(j-k)/N}.
385: \nonumber \end{eqnarray}
386: It follows that
387: \begin{equation}
388: E(|\hat{\chi}_{T}(\omega)|^{2})=\frac{1}{pN}+\frac{1}{p^{2}N^{2}}p\frac{pN-1}{N-1}\sum_{j,k=0,j\neq k}^{N-1}e^{2\pi i\omega(j-k)/N}.
389: \nonumber \end{equation}
390: Observe that \(\sum_{j,k=0,j\neq k}^{N-1}e^{2\pi i\omega(j-k)/N}=|\sum_{j=0}^{N-1}e^{2\pi i\omega j/N}|^{2}-\sum_{j=0}^{N-1}1=(N\delta_{\omega,0})^{2}-N\), hence
391: \begin{eqnarray}
392: E(|\hat{\chi}_{T}(\omega)|^{2}) = \frac{1}{pN}+\frac{1}{pN^{2}}\frac{pN-1}{N-1}(N^{2}\delta_{\omega,0}-N)
393: =\frac{1}{pN}\left\{ 1+\frac{pN-1}{N-1}(N \delta_{\omega,0}-1)\right\} \nonumber \\
394: =\frac{1}{pN(N-1)}\left\{ N-1+(pN-1)(N\delta_{\omega,0}-1)\right\}. \nonumber
395: \end{eqnarray}
396: By Markov's Inequality, when \(\omega \neq 0\), we have
397: \begin{equation}
398: Prob(|\hat{\chi}_{T}(\omega)|^{2}\geq\lambda)\leq\frac{E(|\hat{\chi}_{T}(\omega)|^{2})}{\lambda}=\frac{1-p}{p(N-1)\lambda}.\nonumber \end{equation}
399: Since \(p\geq\frac{1}{1+(N-1)\lambda \tau^{2}}\), it follows that
400: \begin{equation}
401: Prob(|\hat{\chi}_{T}(\omega)|^{2}\geq\lambda)\leq\tau^{2}. \nonumber \end{equation}
402: That is , for any \(\omega\neq 0\), with probability at least \(1-\tau^{2}\)
403: \begin{equation}
404: |\hat{\chi}_{T}(\omega)|\leq \sqrt{\lambda}.
405: \end{equation}
406: \qquad \end{proof}
407:
408: In particular, we want both $\lambda$ and $\tau$ to be small, meaning
409: that $p$ cannot be too small itself.
410:
411: Next, we consider the conditions for the two coefficients $\hat{S}(\omega)$
412: and $\hat{S}_{1}(\omega)=\widehat{S\cdot\chi_{T}}(\omega)$ to be
413: close.
414:
415: \begin{lemma} \label{lm:diffest}
416: Suppose the parameters \(T\), \(S\), \(\chi_T(t)\), \(\lambda\), \(\tau\), \(p\) are as stated in Lemma \ref{lm:smallfilter}, and define \(S_1(t) = S(t)\chi_T(t)\). If \( p\geq \frac{1}{1+(N-1)\lambda \tau^2}\), and \(\tau \leq \sqrt{1-(1-\delta)^\frac{1}{B}}\),
417: then, for any \(\omega\),
418: \begin{equation}
419: |\hat{S}(\omega) - \hat{S}_1(\omega)| \leq \sqrt{B \lambda} \|S\|_2.
420: \end{equation}
421: with probability exceeding \(1-\delta\).
422: \label{lm:diffest}\end{lemma}\addtocounter{algorithm}{+1} \begin{proof}
423: Suppose the significant terms of signal \(S\) are \(\omega_i\), where \(i=1, \ldots, B\).\\
424: Since \(S_1(t) = S(t) \chi_T(t)\) and thus \(\hat{S}_1(\omega) =\hat{S}(\omega)* \hat{\chi}_T(\omega)\),
425: then
426: \begin{equation}
427: \begin{split}
428: \hat{S}_{1}(\omega_{j}) = \sum_{i=1}^{B}\hat{S}(\omega_{i})\hat{\chi}_T(\omega_{j}-\omega_{i})
429: =\hat{S}(\omega_{j})\hat{\chi}_{T}(0)+\sum_{i=1,\omega_{j}\neq\omega_{i}}^{B}\hat{S}(\omega_{i})\hat{\chi}_T(\omega_{j}-\omega_{i}) \\
430: =\hat{S}(\omega_{j})+\sum_{i=1,\omega_{j}\neq\omega_{i}}^{B}\hat{S}(\omega_{i})\hat{\chi}_T(\omega_{j}-\omega_{i}). \nonumber
431: \end{split}
432: \end{equation}
433: Therefore
434: \begin{equation}
435: |\hat{S}_{1}(\omega_{j})-\hat{S}(\omega_{j})| = |\sum_{i=1,\omega_{j}\neq\omega_{i}}^{B}\hat{S}(\omega_{i})\hat{\chi}_T(\omega_{j}-\omega_{i})|
436: \end{equation}
437: \begin{equation}
438: \leq\sqrt{\sum_{i=1,\omega_{j}\neq\omega_{i}}^{B}|\hat{S}(\omega_{i})|^{2}}\sqrt{\sum_{i=1,\omega_{j}\neq\omega_{i}}^{B}|\hat{\chi}_{T}(\omega_{j}-\omega_{i})|^{2}} \nonumber \\
439: \leq\| S\|_2 \sqrt{\sum_{i=1,\omega_{j}\neq\omega_{i}}^{B}|\hat{\chi}_{T}(\omega_{j}-\omega_{i})|^{2}}.
440: \nonumber \end{equation}
441: Because \(p\geq\frac{1}{1+(N-1)\lambda \tau^{2}}\), we have \(|\hat{\chi}_{T}(\omega)|^{2}\leq\lambda\)
442: with probability at least \(1-\tau^2\) for any \(\omega\neq0\). This implies that
443: \(|\hat{S}_{1}(\omega_{j})-\hat{S}(\omega_{j})|\leq\| S\|_2 \sqrt{B \lambda}\) with probability at least \((1-\tau^2)^B \geq (1-\delta)\) \\
444: Then
445: \begin{equation}
446: |\hat{S}_{1}(\omega_{j})-\hat{S}(\omega_{j})|\leq \sqrt{B \lambda} \| S\|_2.
447: \end{equation}
448: For those \(\omega\notin\{\omega_{i},i=1,\ldots,B\}\),
449: \begin{eqnarray}
450: \hat{S}_{1}(\omega)=\sum_{i=1}^{B}\hat{S}(\omega)\hat{\chi}_T(\omega-\omega_{i}), \nonumber \\
451: \end{eqnarray}
452: and we conclude similarly that \(|\hat{S}_{1}(\omega)-\hat{S}(\omega)|\leq \sqrt{B \lambda} \| S\|_2.\), with probability at least \(1-\delta\).
453: \end{proof}
454:
455: We shall use Algorithm \ref{alg:coeff} to estimate $\hat{S}_{1}(\omega)$;
456: we now look at how close the approximation $A$ (i.e. the output of
457: Algorithm \ref{alg:coeff}) of $\hat{S}_{1}(\omega)$ is to the true
458: coefficient $\hat{S}(\omega)$.
459:
460: \begin{lemma}
461: \label{lm:coefftot} For a set of parameters \(T\), \(S\), \(\chi_T(t)\), \(\lambda\), \(\tau\), \(p\) as stated in Lemma \ref{lm:smallfilter}, if \( p\geq \frac{1}{1+(N-1)\lambda \tau^2}\), and \(\tau \leq \sqrt{1-(1-\delta)^{1/B} }\), then Algorithm \ref{alg:coeff} for signal \(S_1(t)=S(t) \* \chi_T(t)\) gives a good estimation \(A\) of \(\hat{S}(\omega)\), such that
462: \begin{equation}
463: |A-\hat{S}(\omega)|\leq (\sqrt{\lambda} + \sqrt{B \lambda})\|S\|_2.
464: \end{equation}
465: with high probability.\label{lm:coefftot}\end{lemma}\addtocounter{algorithm}{+1} \begin{proof}
466: Lemma 4.2 in \cite{Zou} says that the coefficient estimation algorithm returns \(A\), such that
467: \begin{equation}
468: |A-\hat{S}_1(\omega)|\leq \sqrt{\lambda} \|S\|_2.
469: \end{equation}
470: By Lemma \ref{lm:diffest}
471: \begin{equation}
472: |\hat{S}_{1}(\omega)-\hat{S}(\omega)|\leq \sqrt{B \lambda} \| S\|_2.
473: \end{equation}
474: Thus
475: \begin{equation}
476: |A-\hat{S}(\omega)| \leq |A-\hat{S}_1(\omega)|+|\hat{S}_{1}(\omega)-\hat{S}(\omega)|\leq(\sqrt{\lambda}+\sqrt{B \lambda})\| S\|_2.
477: \end{equation}
478: \end{proof}
479:
480: Finally, we derive the conclusion about estimating coefficients.
481:
482: \begin{theorem}
483: \label{lm:mycoeff2} For a set of parameters \(T\), \(S\), \(\chi_T(t)\), \(\lambda\), \(\tau\), \(p\) as stated in Lemma
484: \ref{lm:smallfilter}, if \(\lambda \leq \frac{\epsilon}{2(B+1)}\) and
485: \( p\geq \frac{1}{1+(N-1)\lambda \tau^2}\), then every application of Algorithm \ref{estcoef} produces, for each frequency \(\omega\) and each signal \(S\), and each \(\lambda>0\), with high
486: probability, an output \(A\) (after inputting \((S, \omega, \epsilon)\) ), such that \(|A-\hat{S}(\omega)|^2 \leq \epsilon \|S\|_2^2\).
487: \end{theorem}\addtocounter{algorithm}{+1} \begin{proof}
488: By Lemma \ref{lm:coefftot},
489: \begin{equation}
490: |A-\hat{S}(\omega)| \leq (\sqrt{\lambda} + \sqrt{B \lambda}) \|S\|_2.
491: \end{equation}
492: Thus we have
493: \begin{equation}
494: |A-\hat{S}(\omega)|^2 \leq 2(\lambda + B \lambda) \|S\|_2^2.
495: \end{equation}
496: From the conditions \(2(\lambda + B \lambda) \leq \epsilon \), it follows that
497: \begin{equation}
498: |A-\hat{S}(\omega)|^2 \leq \epsilon \|S\|_2^2.
499: \end{equation}
500: \qquad\end{proof}
501:
502: When we are able to get most of the data, the computational cost for
503: estimating Fourier coefficients on unevenly spaced data is only slightly
504: more than for the evenly spaced data case. The time to compute the
505: signal value remains almost the same as for the evenly spaced data
506: case. The extra time, in the worst case $O(\frac{\log\delta}{\epsilon_{1}^{2}p\log(1-p)})$,
507: comes from visiting unavailable data. Fortunately, the visit operation
508: is very fast and therefore contributes little to the total time, especially
509: when most of the data are available.
510:
511: Moreover, as in \cite{Zou}, one can speed up the algorithm by using
512: multi-step coarse-to-fine coefficient estimation procedures, which
513: turns out to be more efficient than single-step accurate estimation;
514: the proof is entirely analogous to Lemma 4.3 in \cite{Zou}.
515:
516:
517: \subsection{Estimating Norms}
518:
519: The basic idea for locating the label of a significant frequency is
520: to compare the energies (i.e. the $L^{2}$ norm) of signals restricted
521: in different frequency intervals. If the energy of some interval is
522: relatively large, the significant mode is in that region with higher
523: probability. We construct the following new signals to focus on certain
524: intervals \begin{equation}
525: H_{j}(t)=\chi_{1}(t)e^{\frac{2\pi ijt}{16}}\ast\chi_{[-q_{1},q_{1}]}(\sigma t)e^{\frac{2\pi it\theta}{N}}\ast S\end{equation}
526: where 2$q_{1}+1$ is the filter width, $j=0,\ldots,15$, $\sigma$
527: and $\theta$ are random dilation and modulation factors. (Please
528: see \cite{Zou} for an explanation of the role of $\sigma$ and $\theta$).
529: For convenience, we denote $H_{j}(t)$ by $H(t)$.
530:
531: We need to evaluate values $H(t)$ for random indices $t\in\{0,\ldots,N-1\}$.
532: Note that the signal $H$ results from the convolutions of two finite
533: bandwidth Box-car filters with the original signal $S$. Therefore,
534: any missing point needed by the two convolutions would lead to a failure
535: of computing $F(t)$. The total number of signal points involved depends
536: on the number of nonzero taps in these two filters. Moreover, random
537: dilation and modulation factors of the second Box-car filter make
538: computation more tricky.
539:
540: One naive way is to dive into the two convolutions and sample each
541: signal point. If it is not available, stop evaluating this $F(t)$
542: and start with a new index $t$. This definitely increases time cost
543: by wasting abundant computation. For example, suppose five data are
544: needed and only one of them is missing, then the algorithm may compute
545: four data in vain in the worst case, where the missing data point
546: is visited last in the sequence of 5.
547:
548: To avoid the above situation, we first compute the locations of all
549: the points that will be needed for the convolution; only if they are
550: all available will we start the computation. The locations related
551: to the convolution are given in the following lemma.
552:
553: \begin{lemma} \label{lm:location}
554: Suppose we have a signal \(H(t)=( \chi_1^{(\sigma_1)} * ( \chi_{q_1}^{(\sigma_2)} * S)^{(\sigma_3)} )^{(\sigma_4)})(t)\), where \(\sigma_1\), \(\sigma_2\), \(\sigma_3\), and \(\sigma_4\) are dilation factors. From the definition of Box car filter, the taps for \(\chi_1\) lies in the interval \([-1, 1]\), the taps for \(\chi_{q_1}\) in \([-q_1, q_1]\), then in order to evaluate \(H(t)\), we need values of \(S\) with indices at \(\sigma_3 \sigma_4 t - \sigma_3 \sigma_1 i - j \sigma_2\), where integers \(i=-1,\ldots, 1\), \(j=-q_1,\ldots,q_1\).
555: \begin{proof}
556: To evaluate H(t), first let signal \( r=( \chi_{q_1}^{(\sigma_2)} * S)^{(\sigma_3)} \),
557: then
558: \begin{equation}
559: H(t)=(\chi_1^{(\sigma_1)} * r)^{(\sigma_4)}(t) =\sum_{i=-1}^{ 1} \chi_1(\sigma_1 i) r(\sigma_4 t-\sigma_1 i)
560: \end{equation}
561: \begin{eqnarray}
562: r(\sigma_4 t-\sigma_1 i)=( \chi_{q_1}^{(\sigma_2)} * S)^{(\sigma_3)}(\sigma_4 t-\sigma_1 i)
563: = ( \chi_{q_1}^{(\sigma_2)} * S)(\sigma_3 \sigma_4 t - \sigma_3 \sigma_1 i) \nonumber \\
564: = \sum_{j=-q_1}^{q_1} \chi_{q_1}(\sigma_2 j) S(\sigma_3 \sigma_4 t- \sigma_3 \sigma_1 i - \sigma_2 j).
565: \end{eqnarray}
566: Thus, in order to get the value of \(H(t)\), we need values of all \(S(t^{'})\), where \(t^{'}=\sigma_3 \sigma_4 t- \sigma_3 \sigma_1 i - \sigma_2 j\), with \(i=-1,\ldots, 1\)
567: and \(j=-q_1,\ldots, q_1\).
568: \qquad\end{proof}
569: \end{lemma}\addtocounter{algorithm}{+1}
570:
571: The scheme of the norm estimation algorithm is as follows.
572:
573: \label{alg:norm}\begin{algorithm} {\textsc{Norm Estimation}} \label{estnorm} \\
574: Input: signal \(H\), \(k=0\), the number of iterations \(M=1.2\ln(1/\delta)\).\\
575: While \(k<M\):
576: \begin{enumerate}
577: \item Randomly generate the index \(t_k\).
578: \item Compute all indices needed by the two convolutions:
579: \(\Upsilon=\{t^{'}, t^{'}=\sigma_3 \sigma_4 t- \sigma_3 \sigma_1 i - \sigma_2 j \}\), where \(i=-1,\ldots, 1\)
580: and \(j=-q_1,\ldots, q_1\).
581: \item If all the points \( t^{'}\in \Upsilon\) are available, then compute \(H(t_k)\)
582: else go to step 1 and generate another index \(t_k\).
583: \item estimate = 60-th percentile of the sequence \(\{|H(t_k)|^2 N\} \), where \(k=0,\ldots,M-1\).
584: \end{enumerate}
585: \label{estnorm}\end{algorithm}\addtocounter{theorem}{+1}
586:
587: If there exist satisfactory data groups, although maybe very few,
588: the norm estimation will eventually find them. However, when most
589: data are unavailable, the program may struggle in a long loop and
590: take a huge amount of time. We introduce some tricks to avoid this.
591: For example, set an upper bound MAX on the number of the loops. If
592: it is reached, just use the sample points generated so far to estimate
593: the norms. This technique may lead to a larger error, and thus hamper
594: our frequency identification. However, by repeating the calculation,
595: as stipulated by Lemma 3.2, we reduce the inaccuracy. Anyway we cannot
596: hope to recover the signal, if $p$ is too small.
597:
598: The following lemma investigates the number of repetitions to get
599: a satisfactory data group for estimating norms.
600:
601: \begin{lemma}
602: Suppose \(\chi_{q_1}\) and \(\chi_{q_2}\) are two Box-car filters with numbers of taps \(2q_1+1\) and \(2q_2+1\) respectively. Define \(D_{q_1,q_2} = \chi_{q_1} * \chi_{q_2}\). Then $D_{q_1,q_2}$ has $2q_1+2q_2+1$ nonzero taps in the time domain.
603: \end{lemma}\addtocounter{algorithm}{+1}
604:
605: \begin{lemma}
606: Randomly choose an index for signal \(H(t)\), then after \(k>\log \delta /
607: \log(1-(1-p)^{2q_1+2q_2+1})\) iterations, we can get at least one satisfactory index with high probability
608: \(1-\delta\).
609: \end{lemma}\addtocounter{algorithm}{+1} \begin{proof}
610: It is easy to prove by Lemma \ref{lm:rept}.
611: \qquad\end{proof}
612:
613: Here is a new scheme for estimating norms, which uses much fewer samples
614: than the original one and still achieves good estimation. In \cite{Zou},
615: we propose a lemma that enabled us to achieve a good norm estimation
616: by only a few samples. The following lemma is its adaption to the
617: case of unevenly spaced data.
618:
619: \begin{lemma}
620: If a signal \(H\) is 95\% pure and if \(r>1.2 \ln (1/\delta)\), the output of Algorithm \ref{estnorm} gives an estimation of its energy
621: which exceeds \(\|H\|^2/3\) with probability exceeding \(1-\delta\).
622: \end{lemma}\addtocounter{algorithm}{+1} \begin{proof}
623: The proof is very similar to that of Lemma 4.5 in \cite{Zou}. We shall present only the difference of these two proofs. Suppose we sample \(r\) times for the signal \(H\). Let \(\kappa=\{t:N|H(t)|^2<\|H\|^2/3 \}\), with \(\kappa^c\) as its complement, we have
624: \begin{equation}
625: \left |\sum_{t \in \kappa}H(t) \right |^2 \leq |\kappa| \sum_{t \in \kappa}|H(t)|^2 \leq |\kappa|^2 \frac{1}{N}\frac{1}{3} \|H\|^2.
626: \end{equation}
627: On the other hand, we know that the signal is 95\(\%\) pure, i.e. \(|\hat{H}(\omega_0)|^2 \geq 0.95\|H\|^2\) for some \(\omega_0\). By modulating, \(\omega_0\) can be moved to 0; therefore, we can, without loss of
628: generality, suppose most of the energy concentrates at the frequency 0; then
629: \begin{equation}
630: \left |\frac{1}{\sqrt{N}} \sum_{t=1}^N H(t) \right |^2 = |\hat{H}(0)|^2 \geq 0.95 \|H\|^2.
631: \end{equation}
632: So we have
633: \begin{eqnarray}
634: \left |\sum_{t \in \kappa^C}H(t) \right | \geq \sqrt{0.95N} \|H\| - |\kappa| \frac{1}{\sqrt{3N} } \|H\|.
635: \end{eqnarray}
636: On the other hand,\(|\sum_{t\in\kappa^{C}}H(t)|\leq|\kappa^{C}|\| H\|=(N-|\kappa|)\| H\|\), so that
637: \begin{equation}
638: N-|\kappa| \geq \left (\sqrt{0.95N} - \frac{|\kappa|}{\sqrt{3N}} \right )^2.
639: \end{equation}
640: Let \(\alpha = \frac{|\kappa|}{N}\); the above inequality becomes
641: \begin{equation}
642: \alpha^2 + \left( 3-2 \sqrt{0.95*3}\right) \alpha -0.15 \leq 0.
643: \end{equation}
644: Thus \(0 \leq \alpha \leq 0.075 \).
645: Define now a random variable \(X_{\kappa}= \left (\sum_{i=1}^N \chi_{\kappa}(i) \right )\); it will be useful to estimate
646: \begin{equation}
647: E(X_{\kappa})=\frac{|\kappa|}{N} \leq 0.075,
648: \end{equation}
649: and the expectation of the random variable \(e^{z X_{\kappa}}\),
650: \begin{equation}
651: E(e^{X_{\kappa} z}) = e^0 Prob(\chi_{\kappa}(i)=0) + e^z Prob(\chi_{\kappa}(i)=1) = 1-\alpha + \alpha e^z.
652: \end{equation}
653: Suppose now we sample the signal \(H\) \(r\) times, and take the \mbox{\textit{{60-th}}} percentile of the numbers \(N|H(t_1)|^2, \ldots, N|H(t_r)|^2\).
654: By Chernoff's standard argument and similar procedure of Lemma 4.5 in \cite{Zou}, we have for \(z>0\),
655: \begin{eqnarray*}
656: Prob \left (\mbox{\textit{{60-th}}} \, percentile < \frac{1}{3} \|H\|^2 \right ) = \left [ (1-\alpha) e^{-0.6z} + \alpha e^{0.4 z} \right ]^r. \nonumber
657: \end{eqnarray*}
658: Take \(z=\ln (1.25(1-\alpha)/ \alpha)\), then
659: \begin{equation}
660: (1-\alpha)e^{-0.6z} + \alpha e^{0.4z} = 1.97 \alpha^{0.6} (1-\alpha)^{0.4}.
661: \end{equation}
662: The right hand side of (35) is increasing in \(\alpha\) on the interval \([0, 0.075]\); since \(\alpha \leq 0.075\), we obtain an upper bound by substituting \(0.075\) for \(\alpha\):
663: \begin{eqnarray}
664: \left [ (1-\alpha) e^{-0.6z} + \alpha e^{0.4 z} \right ]^r = \left [ 1.97 \alpha^{0.6} (1-\alpha)^{0.4} \right ]^r \leq e^{-0.90 r}.
665: \end{eqnarray}
666:
667: For \(Prob \left (\mbox{\textit{{60-th}}} \, percentile < \frac{1}{3} \|H\|^2 \right ) \leq \delta\), we need \(r \geq 1.2 \ln (1/\delta)\), we have
668: \begin{equation}
669: Prob(Output \geq \|H\|^2/3) = Prob(\mbox{\textit{{60-th}}}\, percentile\,of\, N|H(t)|^2 \geq \|H\|^2/3) \geq 1-\delta.
670: \end{equation}
671: \qquad\end{proof}
672:
673: This norm estimation procedure will be used repeatedly in the group
674: testing step below.
675:
676:
677: \subsection{Isolation}
678:
679: For a significant frequency in signal $S$, isolation aims to construct
680: a series of new signals, such that this significant frequency becomes
681: predominant in at least one of the new isolation signals.
682:
683: \begin{lemma}
684: Given signals \(S\), \(S_1\), and the parameters as stated in Lemma \ref{lm:smallfilter}. Suppose \(F_1(t) = S_1(t)*\chi_1(t) = (\chi_T(t) S(t))*\chi_1(t)\), \(F(t)= S(t) * \chi_1(t)\). If \( p\geq \frac{1}{1+(N-1)\lambda \tau^2}\), then for each \(\omega\) with \(|\hat{S}(\omega)|^2 > B \lambda \|S\|^2\), isolation algorithm can create a signal $F_1^{*}$, such that
685: \begin{equation}
686: |\hat{F}_1^{*}(\omega)|^2 \geq 0.98\|F_1^{*}\|^2.
687: \end{equation}
688: \end{lemma}\addtocounter{algorithm}{+1}\label{lem:iso} \begin{proof}
689: Since \( |\hat{S}(\omega)|^2 > B \lambda \|S\|^2\), we have \(|\hat{S}(\omega)| > \sqrt{B \lambda} \|S\|\). Then there exists some \(\eta>0\), such that \(|\hat{S}(\omega)| \geq (\sqrt{\eta}+\sqrt{B \lambda})\|S\|.\)
690: Lemma \ref{lm:diffest} states that
691: \(|\hat{S}_1(\omega)-\hat{S}(\omega)| \leq \sqrt{B \lambda} \|S\|\). Therefore
692: \begin{equation}
693: |\hat{S}_1(\omega)|\geq \sqrt{\eta}\|S\|\geq \sqrt{\eta}\|S_1\|.
694: \end{equation}
695: Isolation algorithm returns \(F_1^{(0)}, \ldots, F_1^{(2k)}\) with \(k<O(\frac{1}{\eta})\), as described in \cite{GGIMS}. For any \(\omega\) with \(|\hat{S}_1(\omega)|^2 \geq \eta \|S_1\|^2\), there exists some \(j\), such that
696: \begin{equation}
697: |\hat{F}_1^{(j)}(\omega)|^2 \geq 0.98 \|F_1^{(j)}\|^2.
698: \end{equation}
699:
700: Let $F_1^{*}=F_1^{(j)}$, then
701:
702: \begin{equation}
703: |\hat{F}_1^{*}(\omega)|^2 \geq 0.98\|F_1^{*}\|^2.
704: \end{equation}
705:
706: \end{proof}
707:
708: Theoretically, in order to capture a significant mode, we need $O(1/\eta)$
709: signals. However, in practice, much fewer signals is enough to achieve
710: this goal.
711:
712:
713: \subsection{Group Testing}
714:
715: Isolation has produced several signals, one of which contains the
716: most significant frequency. Group testing uses repeated zoom-ins on
717: one of the signals, and norm testing to select where to zoom in, in
718: order to determine the frequency. The goal of group testing is thus
719: to find the most significant mode of the signal $F_{1}^{*}$ from
720: isolation. It uses recursive procedures MSB (Most Significant Bit)
721: to approach this mode gradually.
722:
723: \emph{Definition}: Denote a set $\{\omega:\,\,(2l-1)N/32\leq\omega\leq(2l+1)N/32\}$
724: by $interval_{l}$.
725:
726: Group test algorithm is given as follows.
727:
728: \begin{algorithm} \label{alg:grouptest}{\textsc{Group Testing}} \\
729: Input isolation signal \(F_1^{*}\) to \(F_1^{(0)}\), \(i=0\), \(q=1\) \\
730: While \(q<N\), in the \(i\)-th iteration,
731: \begin{enumerate}
732: \item Find the most significant bit \(v\) and the number of significant intervals \(c\) by the procedure MSB.
733: \item Update \(i=i+1\), modulate the signal \(F_1^{(i)}\) by \( \lfloor (v+0.5)N/16 \rfloor \) and dilate it by a factor of \( \lfloor 16/c \rfloor\). Store it in \(F_1^{(i+1)}\).
734: \item Call Group Test again with the new signal \(F_1^{(i)}\), denote its output by \(g\).
735: \item Update the accumulation factor \(q = q * \lfloor 16/c \rfloor \).
736: \item If \(g> N/2\), then \(g = g -N\).
737: \item return \( \lfloor g/\lfloor 16/c \rfloor + (v+1/2)N/16+0.5 \rfloor(mod\,\, N)\);
738: \end{enumerate}
739: \end{algorithm}\addtocounter{theorem}{+1}
740:
741: The MSB procedure is as follows.
742:
743: \begin{algorithm} \label{alg:msb}{\textsc{MSB (Most Significant Bit)}} \\
744: \text{}\hspace{10mm} Input: signal \(F_1^{(i)}\) with length \(N\), a threshold \(0<\eta<1\).
745: \begin{enumerate}
746: \item Get a series of new signals \(H_j(t) =F_1^{(i)}(t) \star (e^{2 \pi i j t/16} \chi_1 )\), \(j=0, \ldots, 15\).
747: \item Estimate the energies \(e_j\) of \(H_j\), \(j=0, \ldots, 15\).
748: \item for \(l=0,\ldots,15\), compare the energies \(e_l\) with all other energies \(e_j\), where \(j=(l+4)mod\,16, (l+5)mod\,16, \ldots,(l+12)mod\,16\). If \(e_l > e_j\) for all these \(j\), label it as an interval with large energy.
749: \item Find the longest consecutive intervals of large energies. Take their center as \(v \), and the number of those intervals as \(c \).
750: \item If \(c<8\), then do the original MSB in {\emph{\cite{GGIMS}}} to get \(v\) and set \(c=8\);
751: \item Return the dilation-related factor \(c\) and the most significant bit \(v\).
752: \end{enumerate} \label{alg:msb}
753: \end{algorithm}\addtocounter{theorem}{+1}
754:
755: For convenience, we denote $F_{1}^{(i)}$ by $\mathbf{F_{1}}$.
756:
757: \begin{lemma}
758: Given a \( 98\%\) pure signal \(\mathbf{F_1}\), suppose \(G_j(t) = e^{2 \pi i j t /16} \chi_1(t)\). Then Algorithm \ref{alg:grouptest}, with Algorithm \ref{alg:msb} as its subroutine, can find the significant frequency \(\omega_1\) of the signal \(\mathbf{F_1}\) with high probability.
759: \end{lemma}\addtocounter{algorithm}{+1} \begin{proof}
760: The proof is similar to that of Lemma 5 in \cite{GGIMS}, with some changes:
761:
762: Since the signal \(\mathbf{F_1}\) is \(98\%\) pure, there exist a frequency mode \(\omega_1\) and a signal \(\rho\), such that \(\mathbf{F_1}=a\phi_{\omega_1}+\rho\), where $|a|^2 \geq 0.98\|\mathbf{F_1}\|^2$ and \(\|\rho\|^2 \leq 0.02\|\mathbf{F_1}\|^2\). Without loss of generality, assume \(\omega_1 \in [-N/32, N/32]\). The whole region is divided into 16 subintervals \([jN/16-N/32, jN/16+N/32]\), where \(j=0,\ldots, 15\). To estimate \(\widehat{\mathbf{F_1}*G_0}(\omega_1)\) for \(|\omega_1|\leq N/32\), we use that \(|\hat{G}_0(\omega_1)|=|\hat{\chi}_1(\omega_1)|\geq 0.987\) for \(|\omega_1|\leq N/32\). It follows that
763: \begin{eqnarray*}
764: |\widehat{\mathbf{F_1} \ast G_{0}}(\omega_1)|^{2} = N \left|\hat{\mathbf{F}}_1(\omega_1)\hat{G}_0(\omega_1)\right|^2 \geq N 0.987^{2}|\hat{\mathbf{F}}_1(\omega_1)|^{2} \geq N 0.987^{2}0.98\| \mathbf{F_1}\|^{2} \nonumber \\
765: \geq 0.954N\|\hat{\mathbf{F}}_1\|^{2} \geq 0.954N\|\hat{\mathbf{F}}_1\hat{G_{0}}\|^{2}=0.954\|\mathbf{F_1}\ast G_{0}\|^{2}.
766: \end{eqnarray*}
767: Therefore the estimation \(X\) of \( \|\mathbf{F_1} * G_0\|\) satisfies:
768: \begin{eqnarray*}
769: X \geq \|\mathbf{F_1} * G_0\|^2/3 = \|\widehat{\mathbf{F_1}*G_0}\|^2/3 = \sum_{\omega} |\widehat{\mathbf{F_1}*G_0}(\omega)|^2 /3 \geq |\widehat{\mathbf{F_1}*G_0}(\omega_1)|^2/3 \nonumber \\
770: \geq 0.954N\|\mathbf{F_1}\|^2/3 \geq 0.318 N\|\mathbf{F_1}\|^2.
771: \end{eqnarray*}
772: Next consider the energy of \(\mathbf{F_1}*G_{4}\).
773: \begin{eqnarray*}
774: \|\hat{\rho}\hat{G_{4}}\|^{2} =\sum_{\omega}|\hat{\rho}(\omega)\hat{G_{4}}(\omega)|^{2} \nonumber \\
775: \leq \sum_{\omega}|\hat{\rho}(\omega)|^{2} = \| \rho\|^{2}\leq 0.02 \| \mathbf{F_1}\|^{2}.
776: \end{eqnarray*}
777: Since $|\hat{G}_4(\omega_1)|<0.464$, we have
778: \begin{eqnarray*}
779: |\hat{\mathbf{F}}_1(\omega_1)\hat{G}_{4}(\ \omega_1)|\leq|\hat{\mathbf{F}}_1(\omega_1)||\hat{G}_{4}(\ \omega_1)| \leq |\hat{\mathbf{F}}_1(\omega_1)|0.464 \leq 0.464 \| \mathbf{F_1}\|
780: \end{eqnarray*}
781: Also \( \|\hat{\mathbf{F}}_1 \hat{G}_{4}\|^{2}-|\hat{\mathbf{F}}_1(\omega_1) \hat{G}_{4}(\omega_1)|^{2} \leq 0.02\| \mathbf{F_1}\|^{2}\).
782: Thus
783:
784: \[ \| \hat{\mathbf{F}}_1 \hat{G}_4 \|^2 \leq 0.464^2\| \mathbf{F_1}\|^{2}+0.02\| \mathbf{F_1}\|^{2}=0.24\| \mathbf{F_1}\|^{2}. \] It follows that \[ \| \mathbf{F_1} \ast G_4 \|^2 =\| \widehat{\mathbf{F_1}*G_4}\|^2 = N \|\hat{\mathbf{F}}_1 \hat{G}_4\| \leq 0.24N\| \mathbf{F_1}\|^{2}. \]
785: Then we compare \(\| \mathbf{F_1}\ast G_{4}\|^{2}\) with the lower bound of the estimation of \(\| \mathbf{F_1}\ast G_{0}\|^{2}\), which is \[ 0.24N \| \mathbf{F_1}\|^{2} \leq 0.318N \| \mathbf{F_1}\|^{2}, \] which is less than the estimation for \(\| \mathbf{F_1}\ast G_{0}\|^{2}.\) In general, \(\omega\in interval_{j}\), for \(j\) not necessarily 0. Therefore we compare \(\| \mathbf{F_1}\ast
786: G_{j^{'}}\|^{2}\)with \(\| \mathbf{F_1}\ast G_{j}\|^{2}\), where \(|j-j^{'}|\geq4\). If there is some \(j\) with \(\| \mathbf{F_1}\ast G_{j}\|^{2}\) apparently
787: larger than \(\| \mathbf{F_1}\ast G_{j^{'}}\|^{2}\), then we conclude \(\omega_1 \notin interval_{j^{'}}\). Otherwise, possibly
788: \(\omega_1\in interval_{j^{'}}\). By the above argument, we can always eliminate 9 consecutive interval regions out of 16, leaving a cyclic interval of length at most \(7N/16\). The remaining proof is exactly the same as Lemma 8 in paper \cite{GGIMS}.
789: \end{proof}
790:
791: Remark: In \cite{Zou}, we showed that group testing works for a Box-car
792: filter with width more than $21$, i.e. $k>10$. In that case, $2k+1$
793: intervals are sufficient. A similar conclusion still holds in the
794: unevenly spaced data case. However, the lemma above proves the success
795: of group testing under different conditions. In our proof, we use
796: a Box-car filter with much shorter width, namely 3 in time domain;
797: this works well if 16 intervals are taken. In practice, we use these
798: shorter filters; we can usually (if $B$ is small) get away with using
799: much fewer intervals as well (e.g. 3 instead of 16).
800:
801:
802: \subsection{Adaptive Greedy Pursuit}
803:
804: In summary, given a signal $S$, for an accuracy $\epsilon$ and for
805: $B$ modes, we can find a very good approximation of the signal $S$
806: by using Algorithm \ref{alg:total1}.
807:
808: \begin{theorem}
809: \label{lm:totalcost} Given a signal \(S\), an accuracy \(\epsilon\), success probability \(1-\delta\), Algorithm \ref{alg:total1} can output a \(B\)-term representation \(R\) with sum-square-error \(\|S-R\|^2\leq (1+\epsilon) \|S-R_{opt}\|^2\), where \(R_{opt}\) is the \(B\)-term representation for \(S\) with the least sum-square-error, with time and space cost \(poly(B,\log(N), \frac{1}{\epsilon}, \log(1/\delta))\) for computing and \(\frac{B \log M \log N \log \delta}{\lambda log(1-(1-p)^{2q_1+2q_2+1})}\) \(+\frac{\log (1/\delta) \log M}{\lambda \log p}\) for just visiting samples.
810: \begin{proof}
811: {\textrm{We omit the proof since it is very similar to Theorem 9 in \cite{GGIMS}.}}
812: \end{proof}
813: \end{theorem}\addtocounter{algorithm}{+1}
814:
815:
816: \section{NERA$\ell$SFA with Interpolation Technique}
817:
818: The greedy algorithm described above is fast. When $p$ is sufficiently
819: large (e.g. $p>0.7$), the approach proposed and discussed in the
820: previous section works well. For smaller $p$, the amount of time
821: wasted to find available sample groups becomes unacceptably long.
822: For example, when $B=2$, $N=100$, $p=0.4$, the algorithm couldn't
823: find the signal within 200 greedy pursuit iterations. For this reason,
824: we introduced an interpolation technique to get an approximate value
825: of the missing point in the norm estimation procedure. This algorithm
826: is efficient even in smaller $p$ cases.
827:
828:
829: \subsection{Lagrange Interpolation Technique}
830:
831: The task of interpolation is to estimate $S(t)$ for arbitrary $t$
832: by drawing a smooth curve through all the known points \cite{PTVF}.
833: It is called interpolation when the desired $t$ is between the largest
834: and smallest of these $t_{i}$'s. We use Lagrange Polynomial Interpolation,
835: one of the simplest and most popular interpolation techniques.
836:
837: Generally, the number of interpolation points determines the degree
838: of a polynomial. A polynomial of higher degree is smoother with smaller
839: approximation errors at the expense of more computation. Thus we choose
840: a second degree polynomial, as a balance between computational complexity
841: and accuracy. It is given explicitly by Lagrange's classical formula.
842: If the three nearest neighbors are $(t_{1},S(t_{1}))$, $(t_{2},S(t_{2}))$,
843: $(t_{3},S(t_{3}))$, the polynomial is \begin{equation}
844: P(t)=\frac{(t-t_{2})(t-t_{3})}{(t_{1}-t_{2})(t_{1}-t_{3})}S(t_{1})+\frac{(t-t_{1})(t-t_{3})}{(t_{2}-t_{1})(t_{2}-t_{3})}S(t_{2})+\frac{(t-t_{2})(t-t_{1})}{(t_{3}-t_{2})(t_{3}-t_{1})}S(t_{3})\end{equation}
845:
846:
847: If $S(t)$ is three times differentiable in an interval $[a,b]$,
848: and the points $t_{1},t_{2},t_{3}\in[a,b]$ are different, then there
849: exists some $v\in[a,b]$, such that the approximation error is $S(t)-P(t)=\frac{S^{(3)}(v)}{3!}(t-t_{1})(t-t_{2})(t-t_{3})$.
850:
851:
852: \subsection{Estimate Norms with Interpolation}
853:
854: We introduce the interpolation scheme into estimating norms. The idea
855: is to estimate the value of a missing point by the Lagrange interpolation.
856: The detailed algorithm for estimating norms is as follows.
857:
858: \begin{algorithm} {\textsc{Estimate Norm with interpolation technique}} \\
859: Input: signal \(H\), \(k=0\), the maximum number of samples \(M\).
860: \begin{enumerate}
861: \item Randomly generate the index \(t_k\), where \(k=0,\ldots, M-1\).
862: \item For each $k$, if \(H(t_k)\) is not available, estimate \( H(t_k) \) by Lagrange interpolation; else compute \(H(t_k)\) directly.
863: \item Estimation = 60-th percentile of the sequence \(\{|H(t_k)|^2 N\} \), where \(k=0,\ldots,M-1\).
864: \end{enumerate}
865: \end{algorithm}
866:
867: Note that we use interpolation \emph{only} in norm estimation steps,
868: where precision is less critical. With less precise norm estimation,
869: the localization of important modes could still work well when iterated.
870: For coefficient estimation, which needs to be more precise, we always
871: search for available samples.
872:
873:
874: \section{Numerical Results}
875:
876: In this section, we present striking numerical results of NERA$\ell$SFA,
877: comparing to the Inverse Non-equispaced Fast Fourier Transform (INFFT)
878: algorithms. The popular benchmark software NFFT version 2.0 is used
879: to give performance of INFFT, with default CGNE\_R method and Dirichlet
880: kernel. Its time cost excludes the precomputation of samples values,
881: which takes $O(L)$. Numerical experiments show the advantage of our
882: NERA$\ell$SFA algorithm in processing large amount of data. We begin
883: in Section 5.1 with comparing NERA$\ell$SFA with INFFT for some one
884: and two dimensional examples with different length. In Section 5.2,
885: the performance for different number of modes is shown. Finally, we
886: test the capability of NERA$\ell$SFA to recover the signal in the
887: situation with a large amount of missing data and in presence of large
888: noise.
889:
890: All the experiments were run on an AMD Athlon(TM) XP1900+ machine
891: with Cache size 256KB, total memory 512 MB, Linux kernel version 2.4.20-20.9
892: and compiler gcc version 3.2.2. The numerical data is an average of
893: 10 runs of the code; errors are given in the $L^{2}$ norm.
894:
895:
896: \subsection{Experiments with Different Length of Signals}
897:
898: We ran the comparison for a 8-mode superposition signal $S(t)=\sum_{i=1}^{B}\phi_{\omega_{i}}$,
899: plus white noise $\nu$ with the standard deviation $\sigma=0.5$,
900: damped by a factor of $1/\sqrt{N}$, ( so that $\Vert\nu\Vert^{2}=\sigma^{2}=0.25$;
901: since $\Vert S\Vert^{2}=8$, this implies $SNR=20\log_{10}32\thickapprox30.1dB$).
902: Other parameters are $B=8$, $\epsilon=0.02$, $\delta=0.01$, and
903: $p=70\%$. The missing data are randomly and uniformly distributed.
904: NERA$\ell$SFA outperforms INFFT in speed when $N$ is large; see
905: Table \ref{tab:B13} and Figure \ref{fig:diffN1d}. The corresponding
906: crossover point is $N\geq2^{15}=32768$ . For example, to process
907: $2^{19}=524,288$ data, more than nineteen minutes (estimated) are
908: needed for INFFT versus approximately one second for NERA$\ell$SFA.
909: Experiments support the theoretical conclusion that NERA$\ell$SFA
910: would be faster than INFFT after some $N$ for a sparse signal; whatever
911: the sparsity, i.e. whatever the value of $B$, there always exists
912: some crossover $N$.
913:
914: %
915: \begin{table}
916: \begin{center}\begin{tabular}{|c|c|c|c|}
917: \hline
918: N &
919: INFFT&
920: NERA$\ell$SFA &
921: NERA$\ell$SFA \tabularnewline
922: &
923: &
924: (+sampling)&
925: (w/o sampling)\tabularnewline
926: \hline
927: $2^{9}$=512&
928: 0.01&
929: 0.63&
930: 0.31\tabularnewline
931: $2^{11}$=2048&
932: 0.03&
933: 0.77&
934: 0.37\tabularnewline
935: $2^{13}$=8192&
936: 0.17&
937: 0.90 &
938: 0.46\tabularnewline
939: $2^{15}$=32768&
940: 0.83&
941: 0.93 &
942: 0.49\tabularnewline
943: $2^{17}$=131072&
944: 4.30&
945: 1.03&
946: 0.51\tabularnewline
947: $2^{19}$=524288&
948: 19.94&
949: 1.20&
950: 0.61 \tabularnewline
951: \hline
952: \end{tabular}\end{center}
953:
954:
955: \caption{\label{tab:B13}Experiments with fixed $B=8$, $p=0.7$, $d=1$ (one
956: dimension), and varying length $N$ of signals; an i.i.d. white noise
957: is added with $\sigma=0.5$, or $SNR\simeq30dB$ (see text). For each
958: length of the signal, 10 different runs were carried out; the average
959: result is shown. We did all the tests for NERA$\ell$SFA with Lagrange
960: interpolation, as explained in the text. Two kinds of time costs for
961: NERA$\ell$SFA are provided. One is the total running time and another
962: is the running time excluding the sampling time. The time of INFFT
963: does not include the precomputation time for samples. }
964: \end{table}
965:
966:
967: \addtocounter{figure}{+1}
968:
969: %
970: \begin{figure}
971: \begin{center}\includegraphics[%
972: width=10cm]{figure1.eps}\end{center}
973:
974:
975: \caption{Time Comparison between INFFT and NERA$\ell$SFA for different $N$
976: with $B=8$, $p=0.7$, $d=1$. The result in Table \ref{tab:B13}
977: is shown in the form of a graph here. The $x$ coordinate is the $\log_{2}(N)$,
978: the $y$ coordinate presents the running time for each algorithm.
979: NERA$\ell$SFA without sampling surpasses INFFT at $N=2^{14}=16384$.
980: \label{fig:diffN1d}}
981: \end{figure}
982:
983:
984: \addtocounter{table}{+1}
985:
986: In two dimensions, we test a noisy 6-mode superposition signal $S(t)=\sum_{i=1}^{B}\phi_{\omega_{xi}}\phi_{\omega_{yi}}+\nu$,
987: with $B=6$, $\epsilon=0.02$, $\delta=0.01$, $p=80\%$, and $\sigma=0.1$.
988: Missing data are randomly and uniformly distributed. As the number
989: of grid points $N$ in each dimension grows, two dimensional NERA$\ell$SFA
990: outperforms two dimensional INFFT at $N\geq512$, as Table \ref{tab:2dB13}
991: and Figure \ref{fig:diffN2d} show. The crossover point becomes much
992: smaller in high dimensions situation. It would not be surprising that
993: for recovering a 6-mode three dimensional signal, NERA$\ell$SFA surpasses
994: INFFT at a hundred sampling grid points in each dimension.
995:
996: %
997: \begin{table}
998: \begin{center}\begin{tabular}{|c|c|c|c|}
999: \hline
1000: N &
1001: INFFT&
1002: NERA$\ell$SFA &
1003: NERA$\ell$SFA \tabularnewline
1004: &
1005: &
1006: (+sampling)&
1007: (w/o sampling)\tabularnewline
1008: \hline
1009: $128$&
1010: 0.13&
1011: 2.86 &
1012: 1.57\tabularnewline
1013: 256&
1014: 0.73&
1015: 2.60&
1016: 1.46\tabularnewline
1017: 512&
1018: 3.00&
1019: 3.70&
1020: 2.13\tabularnewline
1021: 1024&
1022: 11.59&
1023: 4.31&
1024: 2.94\tabularnewline
1025: $2048$&
1026: 54.94&
1027: 6.56&
1028: 4.90 \tabularnewline
1029: \hline
1030: \end{tabular}\end{center}
1031:
1032:
1033: \caption{\label{tab:2dB13}Experiments with fixed $B=6$, $p=0.8$, $d=2$
1034: (two dimensions), and varying length $N$ of signals; an i.i.d white
1035: noise is added with $\sigma=0.1$, or $SNR\simeq56dB$ (see text).
1036: For each length of the signal, 10 different runs were carried out;
1037: the average result is shown. We did all the tests for NERA$\ell$SFA
1038: with two dimensional interpolation techniques as shown in the appendix.
1039: Again, two kinds of time costs for NERA$\ell$SFA, the one with and
1040: without sampling time is provided. The time of INFFT excludes the
1041: sampling time. }
1042: \end{table}
1043:
1044:
1045: \addtocounter{figure}{+1}
1046:
1047: %
1048: \begin{figure}
1049: \begin{center}\includegraphics[%
1050: width=10cm]{figure2.eps}\end{center}
1051:
1052:
1053: \caption{\label{fig:diffN2d}Time comparison between INFFT and NERA$\ell$SFA
1054: for different $N$ with fixed $B=6$, $p=0.8$, $d=2$. The $x$ coordinate
1055: is the logarithm of length $N$ of signal in each dimension. INFFT
1056: is very fast when $N$ is relatively small and slows down quickly
1057: as $N$ increases. On the contrary, it takes NERA$\ell$SFA similar
1058: time to process small and large $N$ problem. NERA$\ell$SFA without
1059: sampling outperforms INFFT at $N=2^{8.5}$=362. }
1060: \end{figure}
1061:
1062:
1063: \addtocounter{table}{+1}
1064:
1065:
1066: \subsection{Experiments with Different Number of Modes}
1067:
1068: The number of modes has an important influence on the running time
1069: since the crossover point varies for signals with different $B$.
1070: To investigate this, we did the experiments with fixed $N=2^{18}=262144$,
1071: $p=0.6$ and varying $B$. As before, we take $S$ to be a superposition
1072: of exactly $B$ modes with white noise, i.e. $S(t)=\sum_{i=1}^{B}c_{i}\phi_{\omega_{i}}+\nu$,
1073: with standard deviation of noise $\sigma=0.05$. Available data are
1074: uniformly and randomly distributed. Table \ref{tab:diffB} and Figure
1075: \ref{fig:diffB1d} compare the running time for different $B$ using
1076: INFFT and NERA$\ell$SFA. At first, NERA$\ell$SFA takes less time
1077: because $N$ is so large. However, the execution time of INFFT keeps
1078: constant for different number of modes $B$, while that of modified
1079: RA$\ell$SFA is polynomial of higher order. INFFT is faster than NERA$\ell$SFA
1080: when $B\geq10$. The regression techniques shows empirically that
1081: the order of $B$ in NERA$\ell$SFA is greater than quadratic. This
1082: is one of the characteristics of this version of the RA$\ell$SFA
1083: algorithms and irrelevant to the nonequispaceness of the data. (A
1084: different version of RA$\ell$SFA in \cite{GMS} is linear in $B$,
1085: but maybe less easily used when not all equispaced data are available.
1086: )
1087:
1088: %
1089: \begin{table}
1090: \begin{center}\begin{tabular}{|c|c|c|c|c|}
1091: \hline
1092: number of modes&
1093: SNR&
1094: NERA$\ell$SFA &
1095: NERA$\ell$SFA &
1096: INFFT \tabularnewline
1097: $B$&
1098: (dB)&
1099: (+sampling) &
1100: (w/o sampling)&
1101: \tabularnewline
1102: \hline
1103: 2&
1104: 58&
1105: 0.06&
1106: 0.01&
1107: 1.35\tabularnewline
1108: \hline
1109: 4&
1110: 64&
1111: 0.24&
1112: 0.06&
1113: 1.35\tabularnewline
1114: \hline
1115: 6&
1116: 68&
1117: 0.61&
1118: 0.23&
1119: 1.35\tabularnewline
1120: \hline
1121: 8&
1122: 70&
1123: 1.44&
1124: 0.69&
1125: 1.35\tabularnewline
1126: \hline
1127: 10&
1128: 72&
1129: 2.45&
1130: 1.39&
1131: 1.35\tabularnewline
1132: \hline
1133: 13&
1134: 74&
1135: 5.78&
1136: 3.64&
1137: 1.35\tabularnewline
1138: \hline
1139: 16&
1140: 76&
1141: 10.03&
1142: 7.17&
1143: 1.35 \tabularnewline
1144: \hline
1145: \end{tabular}\end{center}
1146:
1147:
1148: \caption{\label{tab:diffB}Experiments with fixed $N=2^{18}$, $p=0.6$, $d=1$
1149: (one dimension), $\sigma=0.05$, and varying number of modes $B$
1150: of signals. For each length of the signal, 10 different runs were
1151: carried out; the average result is shown. We did all the tests for
1152: NERA$\ell$SFA with interpolation techniques. We present two different
1153: time costs of NERA$\ell$SFA, with and without sampling. }
1154: \end{table}
1155:
1156:
1157: \addtocounter{figure}{+1}
1158:
1159: %
1160: \begin{figure}
1161: \begin{center}\includegraphics[%
1162: width=10cm]{figure3.eps}\end{center}
1163:
1164:
1165: \caption{\label{fig:diffB1d}Time Comparison between INFFT and NERA$\ell$SFA
1166: for different $B$ with with fixed $N=2^{18}$, $p=0.6$, $d=1$ (one
1167: dimension), $\sigma=0.05$, a graph of the result in Table \ref{tab:diffB}.
1168: The $x$ coordinate is the number of modes $B$, the $y$ coordinate
1169: presents running time. The running time of NERA$\ell$SFA is polynomial
1170: to $B$. In contrast, the time of INFFT keeps constant for different
1171: $B$, excluding precomputation for the samples. NERA$\ell$SFA without
1172: sampling begins to be slower than INFFT at $B=10$ for $N=2^{18}$.}
1173: \end{figure}
1174:
1175:
1176: \addtocounter{table}{+1}
1177:
1178:
1179: \subsection{Experiments for Different Percentage of Missing Data}
1180:
1181: The advantage of interpolation techniques is to recover a signal even
1182: when a large percentage of data is missing. Table \ref{tab:B2rec}
1183: shows the recovery effect for a two-mode pure signal $c_{1}\phi_{\omega_{1}}+c_{2}\phi_{\omega_{2}}$,
1184: $N=10^{6}$ with all the other parameters $\epsilon$ and $\delta$
1185: the same as before. When the percentage of available data is large,
1186: both algorithms recover the signal well with similar running time.
1187:
1188: %
1189: \begin{table}
1190: \begin{center}\begin{tabular}{|c|c|c|c|c|}
1191: \hline
1192: p &
1193: Time of NERA$\ell$SFA&
1194: success&
1195: Time of NERA$\ell$SFA &
1196: success\tabularnewline
1197: &
1198: (with interpolation)&
1199: probability&
1200: (w/o interpolation)&
1201: probability \tabularnewline
1202: \hline
1203: 1 &
1204: 0.03 &
1205: 100 $\%$&
1206: 0.03 &
1207: 100 $\%$\tabularnewline
1208: \hline
1209: 0.8 &
1210: 0.04&
1211: 100 $\%$&
1212: 0.06 &
1213: 100 $\%$\tabularnewline
1214: \hline
1215: 0.6 &
1216: 0.05 &
1217: 100 $\%$&
1218: 0.49 &
1219: 100 $\%$\tabularnewline
1220: \hline
1221: 0.4 &
1222: 0.05&
1223: 100 $\%$&
1224: 0.45&
1225: 100 $\%$\tabularnewline
1226: \hline
1227: 0.3 &
1228: 0.06 &
1229: 100 $\%$&
1230: - &
1231: 0 $\%$\tabularnewline
1232: \hline
1233: 0.2 &
1234: 0.06 &
1235: 100 $\%$&
1236: - &
1237: 0 $\%$\tabularnewline
1238: \hline
1239: 0.1 &
1240: 0.07 &
1241: 100 $\%$&
1242: - &
1243: 0 $\%$\tabularnewline
1244: \hline
1245: $10^{-2}$&
1246: 0.11 &
1247: 100 $\%$&
1248: - &
1249: 0 $\%$\tabularnewline
1250: \hline
1251: $10^{-3}$&
1252: 0.51 &
1253: 100 $\%$&
1254: - &
1255: 0 $\%$\tabularnewline
1256: \hline
1257: $10^{-4}$&
1258: 4.58 &
1259: 100 $\%$&
1260: - &
1261: 0 $\%$\tabularnewline
1262: \hline
1263: $0.00002$&
1264: 758.22 &
1265: 97 $\%$&
1266: - &
1267: 0 $\%$ \tabularnewline
1268: \hline
1269: \end{tabular}\end{center}
1270:
1271:
1272: \caption{\label{tab:B2rec}Experiments with fixed $B=2$, $N=10^{6}$, no
1273: noise, and varying percentage of available data. Each entry is based
1274: on the average of 10 different runs. In each run, the number of iterations
1275: is limited to 200; (this also corresponds to a fixed limit to the
1276: number of samples taken.) the success probability indicates the number
1277: of runs in which all 6 modes were found. When only $30\%$ of data
1278: is available, the NERA$\ell$SFA without interpolation cannot find
1279: all two significant modes within 200 iterations.}
1280: \end{table}
1281:
1282:
1283: \addtocounter{figure}{+2}
1284:
1285: We tried another example of signal when $N=100$. NERA$\ell$SFA without
1286: interpolation techniques fails to recover the signal with high probability
1287: if more than $45\%$ data are unavailable. In contrast, with the help
1288: of interpolation technique, the NERA$\ell$SFA can always recover
1289: the signal with only $25\%$ available data.
1290:
1291: Experiments also show that for NERA$\ell$SFA with interpolation technique,
1292: the total number of available data, instead of the percentage of available
1293: data determines the success probability. On the contrary, The success
1294: of NERA$\ell$SFA without interpolation is determined by the percentage.
1295:
1296:
1297: \subsection{Experiments to Recover Noisy Signals}
1298:
1299: To recover a signal from very noisy data is a challenging problem.
1300: The following tests are done for $S(t)=\sum_{i=1}^{B}c_{i}\phi_{\omega_{i}}+\nu$,
1301: $B=6$, $\epsilon=0.02$, $N=2^{17}$, $p=0.6$, and different standard
1302: deviation $\sigma$ for noise. The amplitude of noise is still multiplied
1303: by a factor of $1/\sqrt{N}$. As Table \ref{tab:noise} shows, NERA$\ell$SFA
1304: excels at extracting information from noisy data even in the case
1305: of small signal to noise ratio.
1306:
1307: %
1308: \begin{table}
1309: \begin{center}\begin{tabular}{|c|c|c|c|c|c|}
1310: \hline
1311: $\sigma$&
1312: SNR&
1313: Time of NERA$\ell$SFA &
1314: Time of NERA$\ell$SFA &
1315: Relative Error &
1316: Success \tabularnewline
1317: &
1318: (dB)&
1319: (+sampling)&
1320: ( w/o sampling)&
1321: ($\%$)&
1322: probability \tabularnewline
1323: \hline
1324: 0 &
1325: -&
1326: 0.48&
1327: 0.21&
1328: 0.02&
1329: 100\%\tabularnewline
1330: \hline
1331: 0.5 &
1332: 27.60&
1333: 0.56&
1334: 0.22&
1335: 2.00&
1336: 100\%\tabularnewline
1337: \hline
1338: 1.0&
1339: 15.56&
1340: 0.87&
1341: 0.32&
1342: 4.50&
1343: 90\%\tabularnewline
1344: \hline
1345: 1.5&
1346: 8.53&
1347: 3.94&
1348: 1.59&
1349: 5.83&
1350: 80\%\tabularnewline
1351: \hline
1352: 2.0&
1353: 3.52&
1354: 4.78&
1355: 1.86&
1356: 7.67&
1357: 50\%\tabularnewline
1358: \hline
1359: 2.5&
1360: -0.35&
1361: 7.96&
1362: 2.14&
1363: 8.50&
1364: 30\% \tabularnewline
1365: \hline
1366: \end{tabular}\end{center}
1367:
1368:
1369: \caption{\label{tab:noise}Experiments with fixed $B=6$, $N=2^{17}$, $p=0.6$,
1370: and varying noise levels. For each noise level, 10 different runs
1371: were carried out; the average result is shown. In each run, the number
1372: of iterations is limited to 200; (this also corresponds to a fixed
1373: limit to the number of samples taken.) the success probability indicates
1374: the number of runs in which all 6 modes were found. The average relative
1375: error is the error of reconstructed signal with respect to the original
1376: signal.}
1377: \end{table}
1378:
1379:
1380: \addtocounter{table}{+1}
1381:
1382:
1383: \section{Conclusion}
1384:
1385: We provide a sublinear sampling algorithm that recovers, with high
1386: probability, a $B$-term Fourier representation for an unevenly spaced
1387: signal. It is faster than any existed methods for processing sparse
1388: signals of large size. Moreover, it recovers the signal in the situation
1389: of large percentage of missing data or small signal to noise ratio.
1390:
1391:
1392: \section{Acknowledgments}
1393:
1394: For many helpful suggestions and discussions, I would thank my adviser
1395: Ingrid Daubechies. In addition, I thank Weinan E, Anna Gilbert, Martin
1396: Strauss for their suggestions.
1397:
1398:
1399: \section*{Appendix}
1400:
1401:
1402: \subsection*{How to interpolate the two dimensional data to get values for missing
1403: points}
1404:
1405: In one dimension, values of missing points can be interpolated by
1406: its few nearest left and right available neighbors. The idea can be
1407: extended to higher dimensional cases with more techniques.
1408:
1409: For instance, in two dimensions, we first find four nearest available
1410: neighbors of a missing point in each quadrant. Suppose a missing point
1411: is $(x,y)$, its four neighbors are $(x_{1},y_{1})$, $(x_{2},y_{2})$,
1412: $(x_{3},y_{3})$, $(x_{4},y_{4})$. The weights of neighbors can be
1413: derived by solving the following linear system of equations.
1414:
1415: \begin{equation}
1416: \left(\begin{array}{cccc}
1417: x_{1} & x_{2} & x_{3} & x_{4}\\
1418: y_{1} & y_{2} & y_{3} & y_{4}\\
1419: x_{1}y_{1} & x_{2}y_{2} & x_{3}y_{3} & x_{4}y_{4}\\
1420: 1 & 1 & 1 & 1\end{array}\right)\left(\begin{array}{c}
1421: w_{1}\\
1422: w_{2}\\
1423: w_{3}\\
1424: w_{4}\end{array}\right)=\left(\begin{array}{c}
1425: x\\
1426: y\\
1427: xy\\
1428: 1\end{array}\right)\label{array1}\end{equation}
1429:
1430:
1431: However, the matrix in (\ref{array1}) could be singular. In this
1432: case we choose the three nearest neighbors in different quadrants
1433: and use the following equations:
1434:
1435: \begin{equation}
1436: \left(\begin{array}{ccc}
1437: x_{1} & x_{2} & x_{3}\\
1438: y_{1} & y_{2} & y_{3}\\
1439: 1 & 1 & 1\end{array}\right)\left(\begin{array}{c}
1440: w_{1}\\
1441: w_{2}\\
1442: w_{3}\end{array}\right)=\left(\begin{array}{c}
1443: x\\
1444: y\\
1445: 1\end{array}\right)\label{array2}\end{equation}
1446: \label{array2}
1447:
1448: The time to locate those nearest neighbors and compute corresponding
1449: weights is considered a part of precomputation and excluded from total
1450: running time.
1451:
1452: Note that we can use geometrical arguments to simplify the pre-computation
1453: of the weights. One easily sees that the system of equations (\ref{array1})
1454: is translation invariant: the two linear system of equations
1455:
1456: \[
1457: \left(\begin{array}{cccc}
1458: x_{1}+l & x_{2}+l & x_{3}+l & x_{4}+l\\
1459: y_{1}+p & y_{2}+p & y_{3}+p & y_{4}+p\\
1460: (x_{1}+l)(y_{1}+p) & (x_{2}+l)(y_{2}+p) & (x_{3}+l)(y_{3}+p) & (x_{4}+l)(y_{4}+p)\\
1461: 1 & 1 & 1 & 1\end{array}\right)\left(\begin{array}{c}
1462: w_{1}\\
1463: w_{2}\\
1464: w_{3}\\
1465: w_{4}\end{array}\right)=\left(\begin{array}{c}
1466: l\\
1467: p\\
1468: lp\\
1469: 1\end{array}\right)\]
1470: and \[
1471: \left(\begin{array}{cccc}
1472: x_{1} & x_{2} & x_{3} & x_{4}\\
1473: y_{1} & y_{2} & y_{3} & y_{4}\\
1474: x_{1}y_{1} & x_{2}y_{2} & x_{3}y_{3} & x_{4}y_{4}\\
1475: 1 & 1 & 1 & 1\end{array}\right)\left(\begin{array}{c}
1476: w_{1}\\
1477: w_{2}\\
1478: w_{3}\\
1479: w_{4}\end{array}\right)=\left(\begin{array}{c}
1480: 0\\
1481: 0\\
1482: 0\\
1483: 1\end{array}\right)\]
1484: have the same solutions for any $l$ and $p$. That means the location
1485: of the missing points does not influence the weights. Only the geometrical
1486: shape and relative distance of the available neighbors of a missing
1487: point matters.
1488:
1489: Thus, we compute weights for the geometrical shapes of available neighboring
1490: points which occur most often. As we go through every missing point,
1491: we check if the shape of its neighboring available points matches
1492: those popular ones; if it does, we can directly get the weights without
1493: computation. This saves a huge amount of work, especially when $p$
1494: is large.
1495:
1496: %
1497: \begin{figure}
1498: \includegraphics[%
1499: width=7cm]{figure4.ps}\includegraphics[%
1500: width=7cm]{figure5.ps}
1501:
1502:
1503: \caption{\label{fig:shape}Some geometrical shapes of available neighboring
1504: points that occur most often. A missing point (denoted by a small
1505: cross) is at the center of the cross. Available points are denoted
1506: by dots. Left: the four available neighbors are located in the shape
1507: of cross. The distances of each neighbor to the missing point are
1508: equal. Right: almost the same as configuration in the left side, except
1509: one point moved off to the diagonal. }
1510: \end{figure}
1511:
1512:
1513: For example, if the four neighboring points are located in the shape
1514: of a cross with the missing point as their center, as the left side
1515: of Figure \ref{fig:shape} shows, then all of the weights are equal
1516: to one quarter. This situation happens with probability $p^{4}$,
1517: which is almost $2/3$ when $p=0.9$. Another often occurring case
1518: typically has one of the four neighbors of the previous configuration
1519: moved off to the diagonal (see the right side of Figure \ref{fig:shape}),
1520: which happens with probability $4p^{4}(1-p)(2-p)$, i.e. about $28\%$
1521: when $p=0.9$. In this case, the two neighbors on the same line as
1522: the mirroring points have a weight 0.5 respectively; the other two
1523: points have weight zero. Table \ref{tab:shape} shows the probabilities
1524: of these two situations as $p$ varies.
1525:
1526: %
1527: \begin{table}
1528: \begin{center}\begin{tabular}{|c|c|c|c|}
1529: \hline
1530: $p$&
1531: $p^{4}$&
1532: $4p^{4}(1-p)(2-p)$&
1533: sum:$p^{4}+4p^{4}(1-p)(2-p)$\tabularnewline
1534: \hline
1535: 1&
1536: $100\%$&
1537: 0&
1538: $100\%$\tabularnewline
1539: \hline
1540: 0.9&
1541: $65\%$&
1542: $29\%$&
1543: $94\%$\tabularnewline
1544: \hline
1545: 0.8&
1546: $41\%$&
1547: $39\%$&
1548: $80\%$\tabularnewline
1549: \hline
1550: 0.7&
1551: $24\%$&
1552: $37\%$&
1553: $61\%$\tabularnewline
1554: \hline
1555: 0.6&
1556: $13\%$&
1557: $29\%$&
1558: $42\%$\tabularnewline
1559: \hline
1560: 0.5&
1561: $6\%$&
1562: $19\%$&
1563: $25\%$ \tabularnewline
1564: \hline
1565: \end{tabular}\end{center}
1566:
1567:
1568: \caption{\label{tab:shape}Two possibilities corresponding to the geometrical
1569: shapes in Figure 9. The parameter $p$ is the percentage of available
1570: data. The left side of Figure 9 happens with probability $p^{4}$;
1571: the right side appears with probability $4p^{4}(1-p)(2-p)$.}
1572: \end{table}
1573:
1574:
1575: \begin{thebibliography}{10}
1576: \bibitem{Bass}\textsc{R. Bass} \textsc{and} \textsc{K. Gr}\noun{\"{o}}\textsc{chenig},
1577: \emph{Random sampling of multivariate trigonometric polynomials},
1578: SIAM J. Math. Anal., Vol. 36 (2004), pp. 773-795.
1579: \bibitem{Bjork}A. B\noun{j\"{o}rck}. \emph{Numerical Methods for Least Squares
1580: Problems}. SIAM, Philadelphia, 1996.
1581: \bibitem{Boyd}\textsc{J.P. Boyd}, \emph{A fast algorithm for Chebyshev, Fourier
1582: and Sinc interpolation onto an irregular grid}, J. Comput. Phys.,
1583: 103 (1992), pp. 243-257.
1584: \bibitem{CRT}E. C\emph{\noun{andes}}, J. R\noun{omberg}, and T. T\noun{ao},
1585: Robust Uncertainty Principles: Exact Signal Reconstruction from Highly
1586: Incomplete Frequency Information, http://arxiv.org/pdf/math.CA/0411273
1587: \bibitem{Fa}H. F\noun{assbender}, \emph{On numerical methods for discrete least-squares
1588: approximation by trigonometric polynomials}, Math. Comput., 66(1997),
1589: pp719-741.
1590: \bibitem{FGS}H. \noun{Feichtinger, K}\emph{\noun{.}} \noun{Gr\"{o}chenig}
1591: \textsc{\noun{and}} \noun{T. Strohmer}, \emph{Efficient numerical
1592: methods in non-uniform sampling theory}, Numer. Math., 69 (1995),
1593: pp423-440.
1594: \bibitem{GGIKMS}A. C. G\noun{ilbert}, S. G\noun{uha}, P. I\noun{ndyk}, Y. K\noun{otidis},
1595: S. M\noun{uthukrishnan}, M. S\noun{trauss}, \emph{Fast, small-space
1596: algorithms for approximate histogram maintenance}. STOC 2002: 389-398.
1597: \bibitem{GGIMS}\textsc{A.C. Gilbert, S. Guha, P. Indyk, S. Muthukrishnan and M. Strauss},
1598: \emph{Near-Optimal Sparse Fourier Representations via Sampling}, STOC,
1599: 2002
1600: \bibitem[9]{GMS}\textsc{A.C. Gilbert, S. Muthukrishnan and M. Strauss}, Improved Time
1601: Bounds for Near-Optimal Sparse Fourier Representation, to appear.
1602: \bibitem{GL}\textsc{L. Greengard} \textsc{and J. Lee}. Accelerating the Nonuniform
1603: Fast Fourier Transform, SIAM Review, 46 (2004), pp. 443-454.
1604: \bibitem{GS}\textsc{G. Grimmett and D. Stirzaker}. \emph{Probability and Random
1605: Processes}. Oxford University Press, 2001.
1606: \bibitem{Hanke}M. H\noun{anke}. Conjugate gradient type method for ill-posed problems.
1607: Wiley, New York, 1995.
1608: \bibitem{KP}\textsc{S. Kunis and D. Potts}, \emph{Stability results for scattered
1609: data interpolation by trigonometric polynomials}, preprint.
1610: \bibitem{key-2}\textsc{S. Kunis, D. Potts}, \emph{NFFT, Software, C subroutine library,}
1611: http://www.math.uni-luebeck.de/potts/nfft, 2002-2004.
1612: \bibitem{Kunis}\textsc{S. Kunis, D. Potts, G. Steidl}, \emph{Fast Fourier transform
1613: at nonequispaced knots: A user's guide to a C-library}, Manual of
1614: NFFT 2.0 software.
1615: \bibitem{Mansour}\textsc{Y. Mansour}, \emph{Randomized interpolation and approximation
1616: of sparse polynomials} , SIAM Journal on Computing 24:2 (1995).
1617: \bibitem{OWN}\textsc{A. Oppenheim, A. Willsky with S. Nowab}. \emph{Signals and
1618: Systems}. Prentice Hall, 1998.
1619: \bibitem{PTVF}\textsc{W. Press, S. Teukolsky, W. Vetterling and B. Flannery}. \emph{Numerical
1620: Recipes in C: the art of scientific computing}. Cambridge University
1621: Press, 1992.
1622: \bibitem{Reichel}\textsc{L. Reichel, G. S. Ammar, and} \textsc{W. B. Gragg}. \emph{Discrete
1623: least squares approximation by trigonometric polynomials.} Math. Comput.,
1624: 57(1991), pp. 273-289.
1625: \bibitem{Ware}\textsc{A. F. Ware}, \emph{Fast Approximate Fourier Transforms for
1626: Irregularly Spaced Data}, SIAM Rev., 40 (1998), pp. ~838--856.
1627: \bibitem{Zou}\textsc{J. Zou, A.C. Gilbert, M. Strauss and I. Daubechies}, \emph{Theoretical
1628: and Experimental Analysis of a Randomized Algorithm for Sparse Fourier
1629: Transform Analysis}, submitted to Journal of Computational Physics.
1630: \end{thebibliography}
1631:
1632: \end{document}
1633: