0310:nlin0310033/neurus.tex

1: \documentclass[epj,final,floatfix]{svjour}

2: %\documentclass[epj,referee,floatfix]{svjour}

3: \usepackage{graphicx}

4:

5: \begin{document}

6: \title{Associative memory on a small-world neural network}

7:

8: \date{\today}

9:

10: %\author{Luis G. Morelli}

11: %\email{morelli@ictp.trieste.it}

12: %\affiliation{Abdus Salam International Center for Theoretical Physics, P.O. Box 586, 34100 Trieste, Italy}

13: %\author{Guillermo Abramson}

14: %\email{abramson@cab.cnea.gov.ar}

15: %\affiliation{Centro At\'{o}mico Bariloche, CONICET and Instituto Balseiro, 8400 S. C. de Bariloche, Argentina}

16: %\author{Marcelo N. Kuperman}

17: %\email{kuperman@cab.cnea.gov.ar}

18: %\affiliation{Centro At\'{o}mico Bariloche, CONICET and Instituto Balseiro, 8400 S. C. de Bariloche, Argentina}

19:

20: \author

21: {

22: Luis G. Morelli\inst{1,}\thanks{E-mail address: morelli@ictp.trieste.it}

23: \and Guillermo Abramson\inst{2,}\thanks{E-mail address: abramson@cab.cnea.gov.ar}

24: \and Marcelo N. Kuperman\inst{2,}\thanks{E-mail address: kuperman@cab.cnea.gov.ar}

25: }

26:

27: \institute

28: {Abdus Salam International Center for Theoretical Physics,

29: P.O. Box 586, 34100 Trieste, Italy

30: \and

31: Centro At\'{o}mico Bariloche, CONICET and Instituto Balseiro,

32: 8400 S. C. de Bariloche, Argentina

33: }

34:

35: \abstract{We study a model of associative memory based on a neural

36: network with small-world structure. The efficacy of the network to

37: retrieve one of the stored patterns exhibits a phase transition at

38: a finite value of the disorder. The more ordered networks are unable to

39: recover the patterns, and are always attracted to mixture states.

40: Besides, for a range of the number

41: of stored patterns, the efficacy has a maximum at an intermediate

42: value of the disorder. We also give a statistical characterization

43: of the attractors for all values of the disorder of the network.

44: \PACS { {84.35.+i}{Neural networks}\and

45: {89.75.Hc}{Networks and genealogical trees}\and {87.18.Sn}{Neural

46: networks} } }

47:

48: \authorrunning{L. G. Morelli, G. Abramson and M. N. Kuperman}

49:

50: \maketitle

51:

52: \section{Small-world neural networks}

53:

54: Artificial neural networks have been used as a model for

55: associative memory since the 80's, and a considerable a\-mount of work

56: has been made in the field \cite{amit,peretto}. Most of this work

57: regards both the simulation and the theory of completely connected

58: networks, as well as networks with a random dilution of the

59: connectivity. It is known that particular prescriptions for the

60: determination of the synaptic weights enable these systems to

61: successfully retrieve a pattern out of a set of memorized ones.

62: This behavior is observed in the system up to a certain value of

63: the number of stored patterns, beyond which the network becomes

64: unable to retrieve any of them. For reasons of simplicity of the

65: models and their analytical tractability, complex architectures of

66: the networks, more akin to those found in biological neural

67: systems, have been largely left out of the theoretical analysis.

68: Fortunately, since a few years ago, a class of models that has

69: come to be known as ``complex networks'' began to be thoroughly

70: studied. Complex networks seem more compatible with the

71: geometrical properties of many biological and social phenomena

72: than regular lattices, random networks, or completely connected

73: systems \cite{watts1998,barabasi99,newman00,watts}.

74: Already in the seminal work of Watts and

75: Strogatz \cite{watts1998}, whose small-world model combines

76: properties of regular and random networks, it was observed

77: that the neural system of the nematode \textit{C. elegans} shares

78: topological properties with this model networks.

79:

80: In this paper we study a neural network built upon the

81: Watts-Strogatz model for small worlds. The model interpolates

82: between regular and random networks by means of a parameter $p$,

83: which characterizes the disorder of the network. The construction,

84: as formulated in Ref. \cite{watts1998}, begins with a

85: one-dimensional regular lattice of $N$ nodes, each one linked to

86: its $K$ nearest neighbors to the right and to the left, and with

87: periodic boundary conditions. With probability $p$, each one of

88: the right-pointing links, of every node, is rewired to a randomly

89: chosen node in the network. Self connections and repeated

90: connections are not allowed. The result is a disordered network,

91: defined by the set $N$, $K$, $p$, that lies between a regular

92: lattice ($p=0$) and a random graph ($p=1$). A wide range of these

93: networks displays high local clusterization and short average

94: distance between nodes, as many real complex networks. They can be

95: defined by the \textit{connectivity matrix} $c_{ij}$, where

96: $c_{ij} = 1$ if there is a link between nodes $i$ and $j$, and

97: $c_{ij} = 0$ otherwise.

98: We use this matrix to establish the synaptic

99: connections between neurons, at variance from the traditional

100: Hopfield model, where the network is completely connected and the

101: connectivity matrix is $c_{ij}=1$, $\forall i,j$.

102: At $p=1$ it coincides with

103: the standard diluted disordered networks, that have also

104: been considered in the literature, in which randomly

105: chosen elements in the connectivity matrix are set to zero.

106:

107: The biological neuron carries out an operation on the inputs

108: provided by other neurons, and it produces an output. A

109: transformation of this continuous output into a binary variable

110: makes it possible to formulate a simplified model in which the

111: neurons are logical elements (Ref. \cite{amit}, chapter 2).

112: In this binary representation, the

113: state of each neuron is characterized by a single variable $s_i$.

114: This variable can take two values representing the active and the

115: inactive states,

116: \begin{equation}

117: \label{eqstate}

118: s_i = \left\{

119: \begin{array} {rl}

120: 1  & \, \mbox{ if the neuron is active}, \\

121: -1 & \, \mbox{ if the neuron is inactive}.

122: \end{array}

123:  \right.

124: \end{equation}

125:

126:

127: The purpose of an associative memory model, is to retrieve some patterns

128: that have been stored in the network by an unspecified learning process.

129: The stored---or \textit{memorized}---patterns are represented by network states

130: $\xi^{\mu}$, where $\mu = 1,\dots,M$ labels the different

131: patterns and $M$ is their number. As usual, the patterns are

132: generated at random, assigning with equal probability $1/2$ the

133: values $\xi^{\mu}_i = \pm 1$. The patterns are uncorrelated and

134: thus orthogonal in large networks:

135: \begin{equation}

136: \frac{1}{N} \sum_{i=1}^{N} \xi^{\mu}_i \xi^{\nu}_i =

137: \delta_{\mu\nu}.

138: \end{equation}

139:

140: The state of the neurons is updated asynchronously, as in Glauber

141: dynamics. At each simulation step, a neuron is chosen at random,

142: and its new state is determined by the local field:

143: \begin{equation} \label{eqhi}

144: h_i = \sum_{j=1}^{N}{\omega_{ij} \, s_j},

145: \end{equation}

146: according to:

147: \begin{equation} \label{eqsi}

148: s_i = \mbox{sign} \, ({h_i}).

149: \end{equation}

150:

151: The synaptic weights $\omega_{ij}$ of the connections are given by

152: Hebb's rule, restricted to the synapsis actually present in the

153: network, as given by the connectivity matrix:

154: \begin{equation} \label{eqwij}

155: w_{ij} = \frac{1}{N} \sum_{\mu = 1}^{M}{c_{ij}

156: \xi^{\mu}_{i}\xi^{\mu}_{j}}

157: \end{equation}

158: for $i,j=1,\dots,N$. Note that as the network model does not allow

159: self connections the diagonal matrix elements are null. By

160: definition, the synaptic matrix is symmetric.

161:

162: The dynamics prescribed by Eqs.~(\ref{eqhi}) and~(\ref{eqsi}) is

163: deterministic, and the network is not subject to thermal

164: fluctuations. We will only consider the effects of a small amount

165: of additive noise to verify the robustness of our results. A full

166: discussion of the effect of a finite temperature in the dynamics

167: will be left for future work. The stochastic asynchronous update,

168: though, prevents the system from having limit cycles, and the

169: only attractors are fixed points. The stored patterns $\xi^{\mu}$

170: are, by construction of the synaptic weights~(\ref{eqwij}), fixed

171: points of the dynamics due to the orthogonality condition. In the

172: model, ``memory''  is the capacity of the network to retrieve one

173: of the stored patterns from an arbitrary initial condition. As

174: in traditional models, the reversed patterns $(-\xi_i)$, as well

175: as a wealth of symmetric and asymmetric mixtures of patterns, are

176: also equilibria of the system and play a significant role in its

177: behavior as a memory device.

178:

179: \section{Effect of the disordered topology}

180:

181: We have performed extensive numerical simulations of the system,

182: starting from a random unbiased initial condition. After a

183: transient, a fixed point is reached, whence no further changes

184: occur to any neuron. In order to measure the efficacy of the

185: network to recall a number $M$ of stored random patterns, we

186: define an \textit{efficacy} $\varphi$ as the fraction of

187: realizations in which one of the stored patterns is retrieved. In

188: Fig.~\ref{npat} we plot the order parameter $\varphi$ as a

189: function of the disorder parameter $p$.  The different curves

190: correspond to different numbers of stored patterns, $M=1$, $2$,

191: $5$, $10$, and $20$. For this plot we have used $N=5 \times 10^3$ and

192: $K=100$. Averages have been taken over $10^4$ realizations. For

193: each realization we use different patterns, as well as different

194: initial conditions. Figure~\ref{npat} shows that on highly

195: ordered networks the system does not retrieve any stored pattern.

196: Then there is a transition as the disorder parameter $p$ grows,

197: and above some critical value of $p$, patterns are retrieved as

198: fixed points yielding $\varphi >0$. For $M=1$ and $M=2$, $\varphi

199: \equiv 1$ above $p\approx 0.4$. But for $M>2$ we find that

200: $\varphi$ does not grow monotonically with $p$. Instead, it

201: decays as $p$ grows after reaching a maximum value. This

202: surprising non monotonic behavior with the disorder parameter $p$

203: has been observed before in a problem of biased

204: diffusion~\cite{zanette02}, and in an Ising

205: model~\cite{sanchez2002}, both with asymmetric interactions.

206:

207: \begin{figure}

208: \centering \resizebox{\columnwidth}{!}{\rotatebox{-90}

209: {\includegraphics{npat.eps}}} \caption{Efficacy to retrieve a

210: memorized pattern, $\varphi$, as a function of the disorder $p$.

211: The curves correspond to different number of stored patterns:

212: (squares) $M=1$, (circles) $M=2$, (up triangles) $M=5$, (down

213: triangles) $M=10$,  (diamonds) $M=20$. Inset: The efficacy as a

214: function of the number of stored patterns, at $p=1$. Simulation

215: parameters: $N=5 \times 10^3$, $K=100$, $10^4$ realizations per point.}

216: \label{npat}

217: \end{figure}

218:

219: In the inset of Fig.~\ref{npat} we plot $\varphi$ vs. $M$ for a

220: disordered network with $p=1$. As the number of stored patterns

221: $M$ grows, the network is not able to retrieve them. The curve

222: also shows a non monotonic behavior with $M$. The transition as

223: the number of stored patterns grows has already been studied in

224: diluted disordered networks (Ref. \cite{amit}, chapter 7).  It is

225: known that random dilution reduces capacity of a neural network

226: in a way which is proportional to the fraction of available

227: connections. For our system (which is very diluted) the

228: transition, then, takes place at $M_c\approx 0.15 (K/N) N= 15$,

229: as observed. Nevertheless, we are mostly interested in the

230: behavior of the system regarding the different topologies

231: characterized by $p$. The fact that the transition between the

232: remembering and the non-remembering phases occurs at a finite

233: value of the disorder parameter is very interesting, since a few

234: dynamical systems based on small-world architectures show it

235: \cite{kuperman2001,zanette2002,szabo2003}. This occurs in spite

236: of the fact that the average distance between nodes, the main

237: geometrical property of the Watts-Strogatz model, has a

238: transition at $p=0$ \cite{barrat}. Indeed, for several Ising-like

239: systems, which bear some similarities with artificial neural

240: networks, a phase transition occurs at $p=0$~\cite{barrat,zhu03,herrero02,kim01}.

241:

242: In order to understand the finite size effects in the system, and

243: the behavior of the transition in the limit of an infinite system,

244: we have made simulations on systems of different sizes. We have

245: chosen to keep the connectivity parameter of the model constant

246: through all the results we show, $K=100$. In this regard, our

247: results correspond to a neural network characterized by certain

248: properties at the local level, for example the average

249: connectivity of each neuron ($2K$ in our systems). Our finite size

250: analysis shows the behavior of these networks in systems of

251: increasing size $N$ and in the limit $N\to\infty$.

252:

253: \begin{figure}

254: \centering \resizebox{\columnwidth}{!}{\rotatebox{-90}

255: {\includegraphics{fidepe.eps}}} \caption{Efficacy $\varphi$ as a

256: function of the disorder parameter $p$, for systems of different

257: sizes (as shown in the legend), and $K=100$. The number of stored

258: patterns is $M=5$, with $10^4$ realizations per point. Inset: The

259: same curves, scaled with the system size according to

260: Eq.~(\ref{eqscaling}), collapse to a single curve $\Phi$, with

261: $p_c=0.333$ and $\alpha=0.2$.} \label{fidepe}

262: \end{figure}

263:

264: The plot of $\varphi$ vs. $p$ for different values of $N$ is shown

265: in Fig.~\ref{fidepe}. For this curves we have set $K=100$ and

266: $M=5$, averaging over $10^4$ independent realizations. As seen in

267: the figure, all the curves seem to cross for the same value of the

268: disorder parameter $p=p_c\approx 0.333$.

269:

270: Based on numerical evidence, we find that the dependence of the

271: efficacy on the system size can be built into a scaling function:

272: \begin{equation}

273: \varphi \left( p,N \right) = \Phi \left[ (p-p_c) N^{\alpha}

274: \right].

275: \label{eqscaling}

276: \end{equation}

277: At the point of crossing of the curves,  $\varphi$ becomes

278: independent of $N$.

279:

280: Since the order parameter is not singular at the transition,

281: we can expand $\Phi$ as a Taylor series around the critical

282: control parameter $p_c$:

283: \begin{equation}

284: \varphi \left( p,N \right) = \Phi (0) + \Phi' (0) \, (p-p_c) \,

285: N^{\alpha},

286: \end{equation}

287: to first order in $(p-p_c)$. Defining $\tilde \varphi = \varphi -

288: \varphi (p_c)$ and $\tilde p = p - p_c$ we can write:

289: \begin{equation}

290: \left. \frac{\partial \tilde \varphi}{\partial \tilde p}(N)

291: \right|_{\tilde p = 0} = \Phi' (0) \, N^{\alpha}.

292: \end{equation}

293:

294: Plotting on a log-log scale the derivative ${\partial \tilde

295: \varphi} / {\partial \tilde p} |_0$ vs. $N$, we obtain the exponent

296: $\alpha$ as the slope of the line.

297: Using data from $N=2 \times 10^3$ to $N=10^5$,

298: we find $\alpha = 0.23 \pm 0.04$, and $\Phi'(0) = 0.096 \pm 0.016$.

299: In the inset of Fig.~\ref{fidepe}, we plot the re-scaled curves for different $N$.

300: The best data collapse is obtained with $\alpha = 0.2$, compatible with the above result.

301: Observe that the data corresponding to $N=10^3$ (squares) fail to match the scaling curve,

302: indicating a lower bound of what can be considered a ``large'' system for this model.

303:

304: Except in the relatively narrow range of $p$ where $\varphi \approx

305: 1$, the system fails to retrieve any stored pattern in a

306: significant fraction of the realizations: almost always when the

307: network is very ordered (down to $p=0$), and about 12\% of the

308: times when the network is very disordered (up to $p=1$). What

309: happens in the phase space as the network architecture changes?

310: What happens to the trajectories, and why are the patterns

311: missed? It seems natural to expect that the energy landscape is

312: different for $p=0$ than for $p=1$. To address this problem we

313: turn our attention to the properties of the overlaps of the

314: equilibrium state with the memorized patterns. Suppose that after

315: a transient the network has reached a fixed point $\zeta$. We

316: define the overlap of this fixed point with the patterns as

317: \begin{equation}

318: \theta^{\mu} = \frac{1}{N} \left|\sum_{i = 1}^{N}{\xi^{\mu}_i

319: \zeta_i}\right|.

320: \end{equation}

321:

322: \begin{figure}[b]

323: \centering

324: \resizebox{\columnwidth}{!}{\rotatebox{-90}{\includegraphics{ovldis.eps}}}

325: \caption{Distribution of overlaps $P(\theta)$ after a fixed point has been

326: achieved, between the state of the system and all stored patterns. Each curve

327: corresponds to a value of $p$, as shown in the legend, typical of the different

328: memory behaviors observed. A large peak at $\theta=1$ (perfect retrieval of a

329: pattern) is not shown for reasons of scale (see discussion in the text).}

330: \label{ovldis}

331: \end{figure}

332:

333: Note that if the fixed point is a stored pattern, $\zeta =

334: \xi^{\nu}$, then $\theta^{\nu}=1$. In order to determine the type

335: of fixed points that are reached when the network misses the

336: patterns, we measure the overlap $\theta ^{\mu}$ of the fixed

337: point with the stored patterns $\xi^{\mu}$. The probability

338: distribution $P(\theta)$ of these overlaps gives information on

339: the kind of mixture that the fixed point is. Figure~\ref{ovldis}

340: shows the overlap distributions for several levels of disorder in

341: the network. In this plots, $N=2 \times 10^3$, $K=100$, $M=5$ and $10^6$

342: realizations are used per curve. For the three higher values of

343: $p$, the distributions have a high peak at $\theta = 1$, which is

344: not shown for reasons of scale. This peak corresponds to the

345: realizations that end up in a pattern, which happens frequently

346: whenever $p>p_c$, as seen in Fig.~\ref{fidepe}. The somewhat

347: broader peak that these distributions have at low values of

348: $\theta$ has the same origin, since the overlaps with the other

349: $M-1$ patterns have a low value whenever a pattern is reached.

350: Indeed, the overlap of two uncorrelated states has a mean value

351: $\theta_0=0.022$. In the intermediate range of $\theta$, the

352: distribution presents a broad bump around $\theta = 1/2$. This

353: corresponds to symmetric mixtures of the patterns, although the

354: width of this bump suggests that asymmetric mixtures are present

355: as well. In particular, the smaller peak present around

356: $\theta\approx 0.35$ for the completely random network,

357: corresponds to asymmetric mixtures. In contrast with these three

358: cases---at and above the critical point---for ordered networks

359: with $p=0$ the overlap distribution is broad and does not have

360: peak at $\theta =1$. It has a maximum at $\theta = 0$ and decays

361: as $\theta$ grows, but large overlaps are observed in some

362: realizations as the distribution shows. This is the only curve for

363: which the complete distribution is shown. As the distribution

364: suggests, the fixed points of these systems consist of very

365: asymmetric mixtures.

366:

367: The previous analysis unveiled the structure of the phase space

368: and the difference between the low and the high $p$ regimes.

369: Still, what is the reason for the catastrophic loss of memory

370: below the critical value of disorder? We have found that, for low

371: values of disorder, the fixed points retrieve scattered pieces of

372: several stored patterns. These fixed points consist of localized

373: regions that overlap with different patterns.  Indeed, at $p=0$,

374: the network is topologically very clusterized, and there exist

375: local neighborhoods relatively isolated from each other. These

376: neighborhoods begin to disappear by the action of the shortcuts

377: provided by the random rewiring at higher values of $p$, until the

378: whole system becomes essentially a single neighborhood. Then, at

379: $p=0$, from an arbitrary initial condition, different regions of

380: the network eventually align themselves with different patterns.

381: The final result is a completely asymmetric mixture, impossible to

382: classify due to the arbitrariness of its origin and nature. These

383: are the states that the broad distribution of overlaps describes,

384: in Fig.~\ref{ovldis}, for $p=0$. The existence of asymmetric

385: mixtures as attractors in this kind of associative memory model

386: have been observed before (see for example \cite{amit}, chapter

387: 4). But since they are very rare in the completely random or in

388: the completely connected networks, they are very difficult to

389: observe. In the present context, however, they play an essential

390: role in the destruction of the ability of the system to retrieve

391: the patterns.

392:

393: In order to quantify this, we proceed to define a correlation

394: measure that provides a clear picture of the situation. We

395: introduce the difference of the fixed point $\zeta$ with a given

396: pattern:

397: \begin{equation}

398: d^{\mu}_i =  \xi^{\mu}_i \zeta_i =

399: \left\{

400: \begin{array} {rl}

401: 1  & \, \mbox{ if } \quad \xi^{\mu}_i =    \,\zeta_i,\\

402: -1 & \, \mbox{ if } \quad \xi^{\mu}_i \neq \,\zeta_i.

403: \end{array}

404:  \right.

405: \end{equation}

406: Then we define a local magnetization for the difference vector

407: $d^{\mu}$, for every node $i$:

408: \begin{equation}

409: m^{\mu}_i = \frac{1}{1+k_i} \left| d^{\mu}_i + \sum_{j \in {\mathcal V}_i}

410: {d^{\mu}_j} \right|,

411: \end{equation}

412: where ${\mathcal V}_i$ is the set of neighbors of node $i$. The

413: local magnetization $m^{\mu}_i$ measures the local alignment with

414: the $\mu$ pattern or its reversed companion. The maximum value

415: $m^{\mu}_i = 1$ arises when $d^{\mu}_j = d^{\mu}_i$ $\forall j \in

416: {\mathcal V}_i$. The presence of connected domains where the fixed

417: point $\zeta$ overlaps with the $\xi^\mu$ pattern should be detected

418: as short range correlations between the local magnetizations. The

419: correlation between the local magnetizations of the difference

420: vector with the $\mu$ pattern are then defined as:

421: \begin{equation}

422: C^{\mu} = \frac{1}{N} \sum_{i=1}^{N} {\,\,\, \frac{1}{k_i}

423: \sum_{j \in {\mathcal V}_i} {m^{\mu}_i m^{\mu}_j}}.

424: \end{equation}

425: As we intend to capture the existence of correlations in the

426: difference with patterns that appear in the mixture that makes up the fixed

427: point $\zeta$, we define the maximum correlation

428: \begin{equation}

429: C = \max_{\mu} \left\{ C^{\mu} \right\}.

430: \label{eqmaxc}

431: \end{equation}

432:

433: \begin{figure}

434: \centering

435: \resizebox{\columnwidth}{!}{\rotatebox{-90} {\includegraphics{cordis.eps}}}

436: \caption{Distribution of the local correlation that characterizes the level of

437: alignment with a stored pattern [Eq.~(\ref{eqmaxc})]. System parameters:

438: $N=2 \times 10^3$, $K=100$, $10^6$ realizations per curve. A peak at $C=1$, shared by the

439: three curves with the higher values of $p$, is not shown for reasons of scale

440: (see

441: discussion in the text).}

442: \label{cordis}

443: \end{figure}

444:

445: Figure~\ref{cordis} presents the probability distribution $P(C)$

446: for different levels of network disorder. Each distribution is

447: constructed over $10^6$ realizations of $N=2 \times 10^3$ networks, with

448: connectivity $K=100$. For $p=0$ we observe a broad peak centered

449: around $C \approx 0.3$. This is a quantitative measure of the

450: occurrence of correlations on ordered networks, as we pointed out.

451: For the other values of $p$ considered in the figure, the

452: distribution has a sharp peak at $C=1$ which we have not shown for

453: reasons of scale, corresponding to the fixed points that coincide

454: with a pattern, and consequently give the highest possible value

455: of the correlation. Besides this peak, the most disordered systems

456: show a narrow peak at $C\approx 0.25$, and the curve for $p=1$

457: also a smaller one at $C\approx 0.15$. These two peaks correspond

458: to symmetric and asymmetric mixtures, respectively. For $p=0.333$,

459: very close to the critical point, the distribution presents a very

460: small bump at $C \approx 0.3$. It is easy to see, form the

461: extended region of $P(C)$ in the curve for $p=0$, that the

462: mixtures are characterized by higher local correlation in the

463: ordered system than in the disordered ones.

464:

465: \section{Discussion}

466:

467: We have studied a model of associative memory based on neural

468: networks with a complex topology. This kind of connectivity can

469: be considered as more similar to the biological networks than the

470: completely connected or randomly diluted networks. Many of the

471: general features of these systems are preserved: the network is

472: able to retrieve a memorized pattern, up to a saturation.

473: Besides, we have found a critical dependence of the efficacy of

474: retrieval on the disorder parameter of the network: a collapse of

475: the memory capability takes place at a finite value of the

476: disorder parameter. The optimal performance of the system occurs

477: at an intermediate value of the disorder, just above the critical

478: value. This enhanced performance occurs far away from the region

479: of $p=1$, which is equivalent to the well known models of

480: completely connected or randomly connected neural networks. We

481: have characterized the different phases by the properties of the

482: mixture states, that prevent the system to reach one of the

483: memorized states.

484:

485: We have understood the failure of the more ordered networks to

486: retrieve a stored pattern due to the partition of the system into

487: arbitrary neighborhoods aligned with more than one pattern. This

488: is something that the disordered networks cannot do, and in fact

489: the distributions of the overlaps and of the correlations

490: quantify this effect. It does not escape us that we cannot, at

491: this stage, provide an explanation of the enhanced performance of

492: the intermediate region.

493:

494: We have checked the robustness of our results with respect to a

495: small amount of noise in the dynamics. This has been implemented

496: by flipping, with probability $\epsilon$, one neuron at random

497: after each deterministic step. For values of $\epsilon$ up to

498: $0.01$, the results are indistinguishable from the noiseless

499: system. For greater values of $\epsilon$ the system becomes more

500: and more ineffective to retrieve a pattern, but the general form

501: of the curves $\varphi (p)$ is preserved for the whole range of

502: $p$. A systematic analysis of the problem of a truly noisy

503: network, characterized by a temperature, remains to be done.

504:

505: \begin{acknowledgement}

506: The authors acknowledge fruitful discussions with D. H. Zanette.

507: G.A. thanks the Abdus Salam ICTP for its hospitality, and

508: Fundaci{\'o}n Antorchas for financial support.

509: \end{acknowledgement}

510:

511:

512: \begin{thebibliography}{99}

513: \bibitem {amit} D. J. Amit, \textit{Modeling brain function: the world of

514: attractor neural networks}, (Cambridge University Press,

515: Cambridge, 1989).

516:

517: \bibitem {peretto} P. Peretto, \textit{An introduction to the modeling of

518: neural networks}, (Cambridge University Press, Cambridge, 1992).

519:

520: \bibitem {watts1998} D. J. Watts and S. H. Strogatz, Nature \textbf{393}, 440 (1998).

521:

522: \bibitem {barabasi99} A-L. Barab\'asi and R. Albert, Science {\bf 286}, 509 (1999).

523:

524: \bibitem {newman00} M. E. J. Newman, J. Stat. Phys. {\bf 101}, 819 (2000).

525:

526: \bibitem {watts} D. J. Watts, {\it Small Worlds} (Princeton University Press, Princeton, 1999).

527:

528: \bibitem {zanette02} D. H. Zanette, Europhys. Lett. {\bf 60}, 945 (2002).

529: % Non-monotonic dependence on disorder in biased diffusion on small-world networks

530:

531: \bibitem {sanchez2002} A. S{\'a}nchez, J. M. L{\'o}pez and M. A.

532: Rodr{\'\i}guez, Phys. Rev. Lett. \textbf{88}, 048701 (2002).

533: % Nonequilibrium Phase Transitions in Directed Small-World Networks

534:

535: \bibitem {kuperman2001} M. Kuperman and G. Abramson, Phys. Rev. Lett. \textbf{86},

536: 2909 (2001).

537: % Small World Effect in an Epidemiological Model

538:

539: \bibitem {zanette2002} D. H. Zanette, Phys. Rev. E \textbf{65}, 041908 (2002).

540: % Dynamics of rumor propagation on small-world networks

541:

542: \bibitem {szabo2003}  G. Szabo and A. Szolnoki, preprint cond-mat/0305133 (2003).

543: % Rock-scissors-paper game on regular small-world networks

544:

545: \bibitem {barrat} A. Barrat and M. Weigt, Eur. Phys. J. B \textbf{13}, 547 (2000).

546: % On the properties of small-world network models

547:

548: \bibitem {zhu03} J. Y. Zhu and H. Zhu, Phys. Rev. E {\bf 67}, 026125 (2003).

549: % Introducing small-world network effects to critical dynamics

550:

551: \bibitem {herrero02} C. P. Herrero, Phys. Rev. E {\bf 65}, 066110 (2002).

552: % Ising model in small-world networks

553:

554: \bibitem {kim01} B. J. Kim, H. Hong, P. Holme, G. S. Jeong, P. Minnhagen, and M. Y. Choi, Phys. Rev. E {\bf 64}, 056135 (2001).

555: % XY models in small-world networks

556:

557: \end{thebibliography}

558:

559: \end{document}

560: