cs0501021/main.tex
1: \documentclass[12pt]{rspublic}
2: \usepackage[dvips,final]{graphicx}
3: \usepackage{epsfig}
4: \usepackage{epic}
5: \usepackage{eepic}
6: \usepackage{amssymb,amsmath}
7: \usepackage{times}
8: \usepackage{astron-jh}
9: \usepackage{subfigure}
10: \setlength {\textwidth}{150mm}
11: \setlength {\topmargin}{0mm}
12: \setlength {\textheight}{200mm}
13: \oddsidemargin 0mm
14: 
15: \begin{document}
16: 
17: \hyphenation{di-men-sio-nal}
18: %% LBE macros
19: \newcommand{\vect}[1]{{ {\bf #1  }}} 
20: \newcommand{\uvect}[1]{{ \hat{\bf #1  }} }
21: \newcommand{\ci}{
22: 	{
23: 		{ {\bf c}}_i
24: 	}
25: }
26: \newcommand{\fixme}[1]{
27: 
28: { \bf{ ***FIXME: #1 }}
29: 
30: }
31: \newcommand{\half}{\frac{1}{2}}
32: %collision operator
33: \newcommand{\cop}{\Omega_i^\sigma}
34: \newcommand{\copbgk}{{\Omega_i^\sigma}_{\mathrm{BGK}}}
35: \newcommand{\Dop}{\mathbb D}
36: % LBGK variables
37: \newcommand{\tausig}{\tau^\sigma}
38: \newcommand{\xsig}{x^\sigma}
39: \newcommand{\tausigb}{\tau^\bar{\sigma}}
40: \newcommand{\psisig}{\psi^\sigma}
41: \newcommand{\psisigb}{\psi^{\bar{\sigma}}}
42: \newcommand{\nusig}{\nu^\sigma}
43: \newcommand{\msig}{m^\sigma}
44: \newcommand{\nsig}{n^\sigma}
45: \newcommand{\usig}{u^\sigma}
46: \newcommand{\usiga}{u^\sigma_\alpha}
47: \newcommand{\Fsig}{F^\sigma}
48: \newcommand{\Fsiga}{F^\sigma_\alpha}
49: \newcommand{\upr}{u'}
50: \newcommand{\upra}{{u'}_\alpha}
51: \newcommand{\vsig}{v^\sigma}
52: \newcommand{\vsiga}{v^\sigma_\alpha}
53: \newcommand{\sumsig}{\sum_\sigma}
54: \newcommand{\sumsigb}{\sum_{\bar{\sigma}}}
55: \newcommand{\sumsigsigb}{\sum_{\sigma\bar{\sigma}}}
56: \newcommand{\sumi}{\sum_i}
57: \newcommand{\msi}{\msig\sum_i}
58: \newcommand{\ciao}{c_{i{\alpha_1}}}
59: \newcommand{\cian}{c_{i{\alpha_n}}}
60: \newcommand{\cia}{c_{i\alpha}}
61: \newcommand{\cib}{c_{i\beta}}
62: \newcommand{\cig}{c_{i\gamma}}
63: \newcommand{\cid}{c_{i\delta}}
64: \newcommand{\cs}{c_{\mathrm{s}}}
65: \newcommand{\rhosig}{\rho^\sigma}
66: \newcommand{\frt}{\frac{\rhosig}{\tausig}}
67: \newcommand{\xt}{(\vect{x},t)}
68: \newcommand{\xpct}{(\vect{x}+\ci,t)}
69: \newcommand{\xpc}{(\vect{x}+\ci)}
70: \newcommand{\Ua}{U_\alpha}
71: \newcommand{\fis}{f_i^\sigma}
72: \newcommand{\fisb}{\bar{f}_i^\sigma}
73: \newcommand{\Nis}{N_i^\sigma}
74: \newcommand{\NiU}{N_i^\sigma({\bf U})}
75: \newcommand{\Niu}{N_i^\sigma({\bf u})}
76: \newcommand{\Nivs}{N_i^\sigma({\bf v}^\sigma)}
77: % Lattice tensors
78: \newcommand{\Ti}{T_i}
79: \newcommand{\Ta}{{T}^{(1)}_{\alpha}}
80: \newcommand{\Tab}{{T}^{(2)}_{\alpha\beta}}
81: \newcommand{\Tabg}{{T}^{(3)}_{\alpha\beta\gamma}}
82: % Kronecker deltas
83: \newcommand{\kronab}{\delta_{\alpha\beta}}
84: \newcommand{\kronag}{\delta_{\alpha\gamma}}
85: \newcommand{\kronbg}{\delta_{\beta\gamma}}
86: % Order zero, one two, etc: to produce f^(0), f^(1) etc.
87: \newcommand{\oz}[1]{{#1}^{(0)}}
88: \newcommand{\oo}[1]{{#1}^{(1)}}
89: \newcommand{\ot}[1]{{#1}^{(2)}}
90: \newcommand{\ordn}[1]{{#1}^{(n)}}
91: % Partial derivatives
92: \newcommand{\partiald}[2]{
93: 	\frac { \partial #1 } { \partial #2 }
94: }
95: \newcommand{\partialdd}[2]{
96: 	\frac { \partial^2 #1 } { \partial {#2}^2 }
97: }
98: 
99: \newcommand{\partialop}[1]{
100: 	\frac { \partial } { \partial #1 }
101: }
102: \newcommand{\partialopop}[1]{
103: 	\frac { \partial^2 } { \partial {#1}^2 }
104: }
105: \newcommand{\dal}{\partial_\alpha}
106: \newcommand{\dbe}{\partial_\beta}
107: \newcommand{\dga}{\partial_\gamma}
108: \newcommand{\dt}{\partial_{t}}
109: \newcommand{\dit}{\partial_{1t}}
110: \newcommand{\dtt}{\partial_{2t}}
111: % Momentum tensors
112: \newcommand{\Psa}{\Pi^\sigma_\alpha}
113: \newcommand{\Psab}{\Pi^\sigma_{\alpha\beta}}
114: \newcommand{\Psabg}{\Pi^\sigma_{\alpha\beta\gamma}}
115: \newcommand{\Pa}{\Pi_\alpha}
116: \newcommand{\Pab}{\Pi_{\alpha\beta}}
117: \newcommand{\Pabg}{\Pi_{\alpha\beta\gamma}}
118: \newcommand{\ep}{\epsilon}
119: %% End LBE macros
120: 
121: 
122: 
123: \title{Large-scale lattice Boltzmann simulations of complex fluids: advances
124: through the advent of computational grids}
125: 
126: \author[J.~Harting, J.~Chin, M.~Venturoli, and P.V.~Coveney]{Jens Harting$^1$, Jonathan
127: Chin$^{2}$, Maddalena Venturoli$^{2,3}$, and Peter V. Coveney$^2$}
128: 
129: \affiliation{$^1$Institute for Computational Physics, University of
130: Stuttgart, Pfaffenwaldring 27, D-70569 Stuttgart, Germany\\ $^2$Centre for
131: Computational Science, Christopher Ingold Laboratories, University College
132: London, 20 Gordon Street, London WC1H 0AJ, UK\\ $^3$Schlumberger Cambridge
133: Research, High Cross, Madingley Road, Cambridge CB3 0EL, UK
134: }
135: \label{firstpage}
136: 
137: \maketitle
138: 
139: \begin{center}
140: \begin{abstract}{Lattice-Boltzmann, complex fluids, grid
141: computing, computational steering}
142: 
143: During the last two years the RealityGrid project has allowed us to be
144: one of the few {\it scientific} groups involved in the development of
145: computational grids. Since smoothly working production grids are not yet
146: available, we have been able to substantially influence the direction of
147: software development and grid deployment within the project.
148: In this paper we review our results from large scale three-dimensional
149: lattice Boltzmann simulations performed over the last two years. We
150: describe how the proactive use of computational steering and advanced
151: job migration and visualization techniques enabled us to do our
152: scientific work more efficiently. 
153: The projects reported on in this paper are studies of complex fluid
154: flows under shear or in porous media, as well as large-scale parameter
155: searches, and  studies of the self-organisation of liquid cubic
156: mesophases.
157: 
158: \end{abstract}
159: \end{center}
160: 
161: \section{Introduction}
162: 
163: In recent years there has emerged a class of fluid dynamical problems,
164: called ``complex fluids'',
165: which involve both hydrodynamic flow effects and complex interactions
166: between fluid particles. Computationally, such problems are too large
167: and expensive to tackle with atomistic methods such as molecular
168: dynamics, yet they require too much molecular detail for continuum
169: Navier-Stokes approaches.
170: 
171: 
172: Algorithms which work at an intermediate or ``mesoscale'' level of
173: description in order to solve these problems have been developed in
174: response, including Dissipative Particle
175: Dynamics\cite{bib:espanol-warren,bib:jury-bladon-cates,bib:flekkoy-coveney-defabritiis},
176: Lattice Gas Cellular Automata\cite{bib:rivet-boon}, the Stochastic
177: Rotation Dynamics of Malevanets and
178: Kapral\cite{bib:malevanets-kapral,bib:hashimoto-chen-ohashi,bib:sakai-chen-ohashi},
179: and the Lattice Boltzmann
180: Equation\cite{bib:succi,bib:benzi-succi-vergassola,bib:love-nekovee-coveney-chin-gonzalez-martin}.
181: In particular, the Lattice Boltzmann method has been found highly useful
182: for simulation of complex fluid flows in a wide variety of systems. This
183: algorithm, described in more detail below, is extremely well suited to
184: implementation on parallel computers, which permits very large systems
185: to be simulated, reaching hitherto inaccessible physical regimes. We
186: describe some of these calculations, and also attempts to take parallel
187: computing to a new scale, by coupling several supercomputers together
188: into a computational grid, which in turn permits easy use of techniques
189: such as computational steering, code migration, and real-time
190: visualization.
191: 
192: %\subsection{Complex Fluids}
193: The term ``simple fluid'' usually refers to a fluid which can be described
194: to a good degree of approximation by macroscopic quantities only, such
195: as the density field $\rho(\bf{x})$, velocity field $\bf{v}(\bf{x})$,
196: and perhaps temperature $T(\bf{x})$. Such fluids are governed by the
197: well-known Navier-Stokes equations\cite{bib:faber}, which, being
198: nonlinear, are difficult to solve in the most general case, with the
199: result that numerical solution of the equations has become a common
200: tool for understanding the behaviour of ``simple'' fluids, such as water
201: or air.
202: Conversely, a ``complex fluid'' is one whose macroscopic flow is affected
203: by its microscopic properties. A good example of such a fluid is blood:
204: as it flows through vessels (of order millimetres wide and centimetres
205: long), it is subjected to shear forces, which cause red blood cells (of
206: order micrometres wide) to align with the flow so that they can slide
207: over one another more easily, causing the fluid to become less viscous;
208: this change in viscosity in turn affects the flow profile. Hence, the
209: macroscopic blood flow is affected by the microscopic alignment of its
210: constituent cells.  Other examples of complex fluids include
211: biological fluids such as milk, cell organelles and cytoplasm, as
212: well as polymers and liquid crystals. In all of these cases, the density
213: and velocity fields are insufficient to describe the fluid behaviour,
214: and in order to understand this behaviour, it is necessary to treat
215: effects which occur over a very wide range of length and time scales.
216: This length and time scale gap makes complex fluids even more difficult
217: to model than ``simple'' fluids. While numerical solutions of the
218: macroscopic equations are possible for many simple fluids, such a level
219: of description may not exist for complex fluids, yet simulation of every
220: single molecule involved is computationally infeasible.
221: 
222: %\subsection{Amphiphile mesophases}
223: In a mixture containing many different fluid components, an amphiphile
224: is a kind of molecule which is composed of two parts, each part being
225: attracted towards a different fluid component. For example, soap
226: molecules are amphiphiles, containing a head group which is attracted
227: towards water, and a tail which is attracted towards oil and grease;
228: analogous molecules can also be formed from polymers. If many amphiphile
229: molecules are collected together in solution, they can exhibit highly
230: varied and complicated behaviour, often assembling to form amphiphile
231: mesophases, which are complex fluids of significant theoretical and
232: industrial importance. Some of these phases have long-range order, yet
233: remain able to flow, and are called liquid crystal mesophases. Of
234: particular interest to us are those with cubic symmetry, whose
235: properties have been studied experimentally
236: \cite{bib:seddon-templer,bib:seddon-templer-2,bib:czeslik-winter} in
237: lipid-water mixtures\cite{bib:seddon-templer}, diblock
238: copolymers\cite{bib:shefelbine-vigild-etal}, and in many biological
239: systems\cite{bib:landh}.
240: 
241: %\subsection{Mesoscale modelling}
242: Over the last decade, significant effort has been invested in
243: understanding complex fluids through computational mesoscale modelling
244: techniques. These techniques do not attempt to keep track of the state
245: of every single constituent element of a system, nor do they use an
246: entirely macroscopic description; instead, an intermediate, {\it
247: mesoscale} model of the fluid is developed, coarse-graining microscopic
248: interactions enough that they are rendered amenable to simulation and
249: analysis, but not so much that the important details are lost. Such
250: approaches include Lattice Gas
251: Automata\cite{bib:rivet-boon,bib:fhp,bib:rothman-keller,bib:love}, the
252: Lattice Boltzmann
253: equation\cite{bib:succi,bib:benzi-succi-vergassola,bib:mcnamara-zanetti,bib:higuera-jimenez,bib:higuera-succi-benzi,bib:shan-chen,bib:lamura-gonnella-yeomans,bib:chen-boghosian-coveney,bib:chin-coveney},
254: Dissipative Particle
255: Dynamics\cite{bib:hoogerbrugge-koelman,bib:espanol-warren,bib:jury-bladon-cates},
256: or the Malevanets-Kapral Real-coded Lattice
257: Gas\cite{bib:malevanets-kapral,bib:malevanets-yeomans,bib:hashimoto-chen-ohashi,bib:sakai-chen-ohashi}.
258: Recently-developed
259: techniques\cite{bib:garcia-bell-crutchfield-alder,bib:delgado-coveney}
260: which use hybrid algorithms have shown much promise.
261: %LB intro
262: \input{theory.tex}
263: 
264: \section{Technical projects}
265: Our three-dimensional lattice Boltzmann code, LB3D, is written in
266: Fortran 90 and designed to run on distributed-memory parallel computers,
267: using MPI for communication. In each simulation, the fluid is
268: discretized onto a cuboidal lattice, each lattice point containing
269: information about the fluid in the corresponding region of space. Each
270: lattice site requires about a kilobyte of memory per lattice site so
271: that, for example, a simulation on a $128^3$ lattice would require
272: around $2.2{\mathrm{GB}}$ memory.
273: The high-performance computing machines on which most of the simulation
274: work is performed are typically rather heavily used
275: The situation frequently arises that while a simulation is running on one machine, CPU time
276: becomes available on another machine which may be able to run the job
277: faster or cheaper. The LB3D program has the ability to ``checkpoint''
278: its entire state to a file. This file can then be moved to another
279: machine, and the simulation restarted there, even if the new machine has
280: a different number of CPUs or even a completely different architecture.
281: It has been verified that the simulation results are independent of the
282: machine on which the calculation runs, so that a single simulation may
283: be migrated between different machines as necessary without affecting
284: its output.
285: As a conservative rule of thumb, the code runs at over $10^4$ lattice site
286: updates per second per CPU on a fairly recent machine, and has been
287: observed to have roughly linear scaling up to order $10^3$ compute nodes.
288: A $128^3$ simulation contains around $2.1 \times 10^6$ lattice sites;
289: running it for 1000 timesteps requires  about an hour of real time, split
290: across $64$ CPUs. The largest simulation we performed used a $1024^3$
291: lattice.
292: The output from a simulation usually takes the form of a single
293: floating-point number for each lattice site, representing, for example,
294: the density of a particular fluid component at that site. Therefore, a
295: density field snapshot from a $128^3$ system would produce output files of
296: around $8{\mathrm{MB}}$. Writing data to disk is one of the bottlenecks in
297: large scale simulations. If one simulates a 1024$^3$ system, each data
298: file is $4{\mathrm{GB}}$ in size. LB3D is able to benefit from the
299: parallel filesystems available on many large machines today, by using the
300: MPI-IO based parallel HDF5 data format \cite{bib:hdf5}.
301: Our code is very robust regarding different platforms or cluster
302: interconnects: even with moderate inter-node bandwidths it achieves
303: almost linear scaling for large processor counts with the only
304: limitation being the available memory per node. The platforms our code
305: has been successfully used on include various supercomputers like the
306: IBM pSeries, SGI Altix and Origin, Cray T3E, Compaq Alpha clusters, NEC
307: SX6, as well as low cost 32- and 64-bit Linux clusters.
308: However, due to compiler or machine peculiarities it is a time consuming
309: task to achieve optimum performance on many different platforms. Porting a
310: complex Fortran code like LB3D to new platforms is often very difficult
311: and time-consuming without the assistance of well trained staff at the
312: corresponding computer centres. Some of these problems are due to
313: portability issues with the Fortran language. Also, tuning a code to take
314: full advantage of the machine on which it runs requires considerable
315: knowledge of the local system's quirks. It is hoped that some of the
316: portability issues could be solved in future by well-designed middleware.
317: Such issues include the fact that location, size, and duration of
318: temporary filespace change from machine to machine, as do the methods for
319: invoking compilers and batch queues.
320: 
321: %\subsection{Computational Steering}
322: LB3D has successfully been used to study various problems like spinodal
323: decomposition with and without shear
324: \cite{bib:gonzalez-nekovee-coveney,bib:harting-venturoli-coveney}, flow in
325: porous media \cite{bib:harting-venturoli-coveney}, the self-assembly of
326: cubic mesophases such as the 'P'-phase \cite{Maziar:2001} in binary
327: water-surfactant systems, or the cubic gyroid phase in ternary amphiphilic
328: systems \cite{bib:gonzalez-coveney,bib:gonzalez-coveney-2}.  Before we
329: were able to take advantage of computational steering techniques, our work
330: usually involved large scale parameter searches organised as taskfarming
331: jobs in order to find the areas of interest of the available parameter
332: space. 
333: The technique of computational
334: steering\cite{bib:chin-harting-jha,bib:brooke-coveney-harting,bib:love-nekovee-coveney-chin-gonzalez-martin}
335: has been used successfully in smaller-scale simulations to optimize
336: resource usage. Typically, the procedure for running a simulation of the
337: self-assembly of a mesophase would be to set up the initial conditions,
338: and then submit a batch job to run for a certain, fixed number of
339: timesteps. If the timescale for structural assembly is unknown then the
340: initial number of timesteps for which the simulation runs is, at best,
341: an educated guess. It is not uncommon to examine the results of such a
342: simulation once they return from the batch queue, only to find that a
343: simulation has not been run for sufficient time (in which case it must
344: be tediously resubmitted), or that it ran for too long, and the majority
345: of the computer time was wasted on simulation of an uninteresting
346: equilibrium system showing no dynamical behaviour.
347: Another unfortunate scenario often occurs when the phase diagram of a
348: simulated system is not well known, in which case a simulation may
349: evolve away from a situation of interest, wasting further CPU time.
350: Computational steering, the ability to watch and control a calculation
351: as it runs, can be used to avoid these difficulties: a simulation which
352: has equilibrated may be spotted and terminated, preventing wastage of
353: CPU time.
354: More powerfully, a simulation may be steered through parameter space
355: until it is unambiguously seen to be producing interesting results: this
356: technique is very powerful when searching for emergent phenomena, such
357: as the formation of surfactant micelles, which are not clearly related
358: to the underlying simulation parameters.
359: Steering is performed using the RealityGrid steering library which has
360: been developed by collaborators at the University of Manchester. The
361: library was built with the intention of making it possible to add
362: steering capabilities to existing simulation codes with as few changes
363: as possible, and in as general a manner as possible. Once the
364: application has initialized the steering library and informed it which
365: parameters are to be steered, then after every timestep of the
366: simulation, it is possible to perform tasks such as checkpointing the
367: simulation, saving output data, stopping the simulation, or restarting
368: from an existing checkpoint.
369: When a steered simulation is started, a Steering Grid Service (SGS) is
370: also created, to represent the steerable simulation on the Grid. The SGS
371: publishes its location to a Registry service, so that steering clients
372: may find it. This design means that it is possible for clients to
373: dynamically attach to and detach from running simulations.
374: %The SGS code was implemented in Perl, and communication between clients,
375: %registries, and steered simulations is performed using the SOAP
376: %protocol.
377: 
378: %\subsection{Visualization}
379: Successful computational steering requires that the simulation operators
380: have a good understanding of what the simulation is doing, in real time:
381: this in turn requires good visualization capabilities. Each running
382: simulation emits output files after certain periods of simulation time
383: have elapsed. The period between output emission is initially determined
384: by guessing a timescale over which the simulation will change in a
385: substantial way; however, this period is a steerable parameter, so that
386: the output rate can be adjusted for optimum visualization without
387: producing an excessive amount of data.
388: The LB3D code itself will only emit volumetric datasets as described
389: above; these must then be rendered into a human-comprehensible form
390: through techniques including volume-rendering, isosurfacing,
391: ray-tracing, slice planes, and Fourier transforms. The process of
392: producing such comprehensible data from the raw datasets is itself
393: computationally intensive, particularly if it is to be performed in real
394: time, as required for computational steering.
395: For this reason, we use separate visualization clusters to render the
396: data. Output volumes are sent from the
397: simulation machine to the remote visualization machine, so that the
398: simulation can proceed independently of the visualization; these are
399: then rendered using the open source VTK\cite{bib:vtk} visualization
400: library into bitmap images, which can in turn be multicast over the
401: AccessGrid, so that the state of the
402: simulation can be viewed by scientists around the globe. In particular,
403: this was demonstrated by performing and interacting with a simulation in
404: front of a live worldwide audience, as part of the SCGlobal track of the
405: SuperComputing 2004 conference.
406: The RealityGrid steering architecture was designed in a sufficiently
407: general manner that visualization services can also be represented by
408: Steering Grid Services: in order to establish a connection between the
409: visualization process and the corresponding simulation, the simulation
410: SGS can be found through the Registry, and then interrogated for the
411: information required to open the link.
412: 
413: %\subsection{Coordination}
414: In order to be able to deploy the above described components as part of
415: a usable simulation Grid, a substantial amount of coordination is
416: necessary, so that the end user is able to launch an entire simulation
417: pipeline, containing migratable simulation, visualization, and steering
418: components, from a unified interface. This requires a system for keeping
419: track of which services are available, which components are running,
420: taking care of the checkpoints and data which are generated, and to
421: harmonize communication between the different components.
422: This was achieved through the development of a Registry service,
423: implemented using the {\tt{OGSI:~\!\!\!\!:Lite}} \cite{bib:ogsilite}
424: toolkit. The RealityGrid steering library\cite{bib:chin-harting-jha}
425: communicates with the rest of the Grid by exposing itself as a
426: ``Grid Service''. Through the Registry service, steering clients
427: are able to find, dynamically attach to, communicate with, and detach from
428: steering services to control a simulation or visualization process.
429: 
430: %\subsection{Using prototype computational grids today}
431: Large lattices require a highly scalable code, access to high performance
432: computing, terascale storage facilities and high performance
433: visualisation. LB3D provides the first of these, while the others are
434: being delivered by the major computing centres.  
435: We expect to be able to run our simulations in an even more efficient
436: way due to the significant worldwide effort being invested in the
437: development of reliable computational grids. These are a collection of
438: geographically distributed and dynamically varying resources, each
439: providing services such as compute cycles, visualization, storage, or
440: even experimental facilities. The major difference between computational
441: grids and traditional distributed computing is the transparent sharing
442: and collective use of resources, which would otherwise be individual and
443: isolated facilities. Perhaps at some point computational grids will
444: offer information technology what electricity grids offer for other
445: aspects of our daily life: a transparent and reliable resource that is
446: easy to use and conforms to commonly agreed standards
447: \cite{gridbook2,bib:Berman}. Robust and smart middleware will find the
448: best available resources in a transparent way without the user having to
449: care about their location.
450: Unfortunately, reliable and robust computational grids are not available
451: yet. We used various different demonstration grids which were assembled
452: especially for a given event or were intended for use as prototyping
453: platforms rather than usable production grids.
454: These mainly included grids
455: coupling major compute resources in the UK and the biggest effort took
456: place within the TeraGyroid project
457: \cite{bib:teragyroid,bib:teragyroid-epsrc} where the main
458: machines of the UK's national HPC centres were coupled with the TeraGrid
459: facilities in the US through a custom high-performance
460: network. In total, about 5000 CPUs were part of this grid.
461: Collaborative steering sessions with active participants on two
462: continents and observers worldwide were made possible through this
463: approach.
464: 
465: \section{Scientific projects}
466: \subsection{Complex fluids under shear}
467: \label{Sec:Shear}
468: In many industrial applications, complex fluids are subject to shear
469: forces. For example, axial bearings are often filled with fluid to
470: reduce friction and transport heat away from the most vulnerable parts
471: of the device. It is very important to understand how these fluids
472: behave under high shear forces, in order to be able to build reliable
473: machines and choose the proper fluid for different applications.
474: In our simulations we use Lees-Edwards boundary conditions, which were
475: originally developed for molecular dynamics simulations in 1972
476: \cite{bib:lees-edwards} and have been used in lattice Boltzmann
477: simulations by different authors before
478: \cite{bib:wagner-yeomans-shear,bib:wagner-pagonabarraga,bib:harting-venturoli-coveney}. 
479: We applied our model to study the behaviour of binary immiscible and
480: ternary amphiphilic fluids under constant and oscillatory shear. In the
481: case of spinodal decomposition under constant shear, the first results
482: have been published in \cite{bib:harting-venturoli-coveney}. The phase
483: separation of binary immiscible fluids without shear has been studied in
484: detail by different authors, and LB3D has been shown to model the
485: underlying physics successfully \cite{bib:gonzalez-nekovee-coveney}. In
486: the non-sheared studies of spinodal decomposition it has been shown that
487: lattice sizes need to be large in order to overcome finite size effects:
488:  128$^3$ was the minimum acceptable number of lattice
489: sites \cite{bib:gonzalez-nekovee-coveney}. For high shear rates, systems
490: also have to be very long because, if the system is too small, the
491: domains interconnect across the $\bf z = 0$ and $\bf z = nz$ boundaries
492: to form interconnected lamellae in the direction of shear. Such
493: artefacts need to be eliminated from our simulations. Figure
494: \ref{fig:spinodal-shear} shows an example from a simulation with lattice
495: size 128x128x512. The volume rendered blue and red areas depict the
496: different fluid species and the arrows denote the direction of shear.
497: In the case of ternary amphiphilic fluid mixtures under shear we are
498: interested in the influence of the presence of surfactant molecules on the
499: phase separation. We also study the stress response and stability of
500: cubic mesophases such as the gyroid phase \cite{bib:gonzalez-coveney} or
501: the ``P''-phase \cite{bib:nekovee-coveney} under shear. Such complex fluids
502: are expected to exhibit non-Newtonian properties (see below).
503: Computational steering has turned out to be very useful for checking on
504: finite size effects during a sheared fluid simulation, since the human
505: eye is extremely good at spotting the sort of structures indicative of
506: such effects. Implementing an algorithm to automatically
507: recognize ``unphysical'' behaviour is a highly nontrivial task in
508: comparison.
509: 
510: \begin{figure}[h]
511: \begin{center}
512: \includegraphics[height=4cm]{shear2.eps}
513: \end{center}
514: \caption{Spinodal decomposition under shear. Differently coloured regions
515: denote the majority of the corresponding fluid. The arrows depict the
516: movement of the sheared boundaries (movie available in online version).
517: }
518: \label{fig:spinodal-shear}
519: \end{figure}
520: 
521: \subsection{Flow in porous media} \label{Sec:Porous}
522: Studying transport phenomena in porous media is of great interest in
523: fields ranging from oil recovery and water purification to industrial
524: processes like catalysis. In particular, the oilfield industry uses
525: complex, non-Newtonian, multicomponent fluids (containing polymers,
526: surfactants and/or colloids, brine, oil and/or gas), for processes like
527: fracturing, well stimulation and enhanced oil recovery. The rheology and
528: flow behaviour of these complex fluids in a rock is different from their
529: bulk properties. It is therefore of considerable interest to be able to
530: characterise and predict the flow of these fluids in porous media.
531: From the point of view of a modelling approach, the treatment of complex
532: fluids in three-dimensional complex geometries is an ambitious goal since
533: the lattice has to be large enough to resolve individual structures. The
534: advantage of lattice Boltzmann (or lattice gas) techniques is that
535: complex geometries can be modelled with ease.
536: \begin{figure}
537: \centerline{\includegraphics[height=4cm]{benth512.ps}}
538: \caption{
539: Rendering of $4.9\mu m$ resolution X-ray microtomographic data of a 
540: $512^3$ sample of Bentheimer sandstone.
541: The pore space is shown in red, while the rock is represented in blue.}
542: \label{f:Benth}
543: \end{figure}
544: Synchrotron based X-ray microtomography (XMT) imaging techniques provide
545: high resolution, three-dimensional digitised images of rock samples. By
546: using the lattice Boltzmann approach in combination with these high
547: resolution images of rocks, not only is it possible to compute
548: macroscopic transport coefficients, such as the permeability of the
549: medium, but information on local fields, such as velocity or fluid
550: densities, can also be obtained at the pore scale, providing a detailed
551: insight into local flow characterisation and supporting the
552: interpretation of experimental measurements \cite{bib:auzeraisGRL96}.
553: The XMT technique measures the linear attenuation coefficient from which
554: the mineral concentration and composition of the rock can be computed.
555: Morphological properties of the void space, such as pore size distribution
556: and tortuosity, can be derived from the tomographic image of the rock
557: volume, and the permeability and conductivity of the rock can be
558: computed \cite{bib:spannePRL96}. The tomographic data are represented by
559: a reflectivity greyscale value, where the linear size of each voxel is
560: defined by the imaging resolution, which is usually on the order of
561: microns. By introducing a threshold to discriminate between pore sites
562: and rock sites, these images can be reduced to a binary (0's and 1's)
563: representation of the rock geometry. Utilizing the lattice Boltzmann
564: method, single phase or multiphase flow can then be described in these
565: real porous media.
566: 
567: Lattice Boltzmann and lattice gas techniques have already been applied to
568: study single and multiphase flow through three-dimensional
569: microtomographic reconstruction of porous media. For example, Martys and
570: Chen \cite{bib:martys-chen} and Ferr{\'e}ol and Rothman
571: \cite{bib:ferreol-rothman} studied relative permeabilities of binary
572: mixtures in Fontainebleau sandstone. These studies validated the model
573: and the simulation techniques, but were limited to small lattice sizes, of
574: the order of $64^3$.
575: Simulating fluid flow in real rock samples allows us to compare
576: simulation data with experimental results obtained on the same, or
577: similar, pieces of rock. For a reasonable comparison, the size of the
578: rock used in lattice Boltzmann simulations should be of the same order
579: of magnitude as the system used in the experiments, or at least large
580: enough to capture the rock's topological features. The more
581: inhomogeneous the rock, the larger the sample size needs to be in order
582: to describe the correct pore distribution and connectivity. 
583: Another reason for needing to use large
584: lattice sizes is the influence of boundary conditions and lattice
585: resolution on the accuracy of the lattice Boltzmann method. It has been
586: shown (see for example \cite{bib:He97}, \cite{bib:chen-doolen} and
587: references therein) that the Bhatnagar-Gross-Krook (BGK) \cite{bib:bgk}
588: approximation of the lattice Boltzmann equation which is commonly used
589: causes so-called bounce-back boundaries to become inaccurate, resulting in
590: effects such as the computed permeability being a function of the viscosity.
591: This effect can be limited by lowering the viscosity and increasing the
592: lattice resolution.
593: To accurately describe hydrodynamic behaviour using lattice Boltzmann
594: simulations, the Knudsen number, which represents the ratio of the mean free
595: path of the fluid particles and the characteristic length scale of the
596: system (such as the pore diameter), has to be small. If the pores are
597: resolved with an insufficient number of lattice points, finite size effects
598: arise, leading to an inaccurate description of the flow field. In
599: practice, at least five to ten lattice sites are needed to resolve a
600: single pore. Therefore, in order
601: to be able to simulate realistic sample sizes, we need large lattices of
602: the order of 512$^3$.
603: 
604: Using LB3D, we are able to simulate drainage and imbibition processes in
605: a $512^3$ subsample of Bentheimer sandstone X-ray tomographic data. The
606: whole set of XMT data represented the image of a Bentheimer sample of
607: cylindrical shape with diameter 4mm and length 3mm. The XMT data were
608: obtained at the European Synchrotron Research Facility (Grenoble) at a
609: resolution of $4.9 \mu {\rm m}$, resulting in a data set of
610: approximately 816x816x612 voxels. Figure \ref{f:Benth} shows a snapshot
611: of the $512^3$ subsystem.
612: We compare simulated velocity distributions with experimentally obtained
613: magnetic resonance imaging (MRI) data of oil and brine infiltration into
614: saturated Bentheimer rock core \cite{MRISheppard}. The rock sample used in
615: these MRI experiments had a diameter of 38 mm and was 70 mm long and was
616: imaged with a resolution of 280 microns. The system simulated was smaller,
617: but still of a similar order of magnitude and large enough to represent
618: the rock geometry. On the other hand, the higher space resolution provided
619: by the simulations allows a detailed characterisation of the flow field in
620: the pore space, hence providing a useful tool to interpret the MRI
621: experiments, for example in identifying regions of stagnant fluid.
622: Figure \ref{fig:invasion} shows an example from a binary invasion study.
623: A rock which is initially fully saturated with ``water'' (blue), is
624: being invaded by ``oil'' (red) from the right side. The lattice size is
625: $512^3$ and the forcing level is set to $g_{\rm accn}$ = 0.003. In
626: figure \ref{fig:invasion}, only the invading fluid component is shown,
627: i.e. only areas where oil is the majority component are rendered.
628: Periodic boundary conditions are applied, and fluid leaving the system
629: on the left side is converted to oil before re-entering on the opposite
630: side. After 5000 timesteps, the oil has invaded about one quarter of the
631: system already and after 25000 timesteps only small regions of the rock
632: pore space are still filled with water. After 30000 timesteps, the water
633: component has been fully pushed out of the rock.
634: This example only covers binary (oil/water) mixtures of Newtonian fluids,
635: since this is a first and necessary step in the understanding of
636: multiphase fluid flow in porous media
637: \cite{bib:harting-venturoli-coveney}. However, we are able to study the
638: flow of binary immiscible fluids with an additional amphiphilic component
639: in porous media and expect results to be presented elsewhere in the near
640: future.
641: \begin{figure}
642: \centerline{\includegraphics[width=10cm]{invasion5k-30k.eps}}
643: \caption{An originally fully fluid saturated rock is being invaded by
644: another immiscible fluid using a body force $g_{\rm accn}$ = 0.003. The
645: oil slowly pushes the other fluid component out of the rock pores
646: until the rock is fully saturated by oil at $t$ = 30000. For
647: better visability only the invading fluid is shown (movie available in
648: online version).}
649: \label{fig:invasion}
650: \end{figure}
651: 
652: \subsection{The cubic gyroid mesophase}
653: It was recently shown by Gonz\'{a}lez and
654: Coveney\cite{bib:gonzalez-coveney} that the dynamical self-assembly of a
655: particular amphiphile mesophase, the gyroid, can be modelled using the
656: lattice Boltzmann method. This mesophase was observed to form from a
657: homogeneous mixture, without any external constraints imposed to bring
658: about the gyroid geometry, which is an emergent effect of the mesoscopic
659: fluid parameters.
660: It is important to note that this method allows examination of the dynamics of
661: mesophase formation, since most treatments to date have focussed on properties
662: or mathematical
663: description\cite{bib:seddon-templer-2,bib:schwarz-gompper-2,bib:gandy-klinowski,bib:grosse-brauckmann}
664: of the static equilibrium state. In addition to its biological importance,
665: there have been recent attempts\cite{bib:chan-hoffman-etal} to use
666: self-assembling gyroids to construct nanoporous materials.
667: During the gyroid self-assembly process, several small, separated
668: gyroid-phase regions or domains may start to form, and then grow. Since
669: the domains evolve independently, the independent gyroid regions will
670: in general not be identical, and can differ in orientation, position, or
671: unit cell size; grain-boundary defects arise between gyroid domains.
672: Inside a domain, there may be dislocations, or line defects,
673: corresponding to the termination of a plane of unit cells; there may
674: also be localised non-gyroid regions, corresponding to defects due to
675: contamination or inhomogeneities in the initial conditions.
676: Understanding such defects is therefore important for our knowledge of
677: the dynamics of surfactant systems, and crucial for an understanding of
678: how best to produce mesophases experimentally and industrially.
679: \begin{figure}[h]
680: \begin{center}
681: \includegraphics[height=4cm]{gyroid.eps}
682: \end{center}
683: \caption{A volume rendered dataset of a 128$^3$ system after 100000
684: simulation timesteps. Various gyroid domains have formed and the
685: close-up shows the extremely regular, crystalline, gyroid structure within
686: a domain (movies available in online version).}
687: \label{fig:gyroid}
688: \end{figure}
689: In small-scale simulations of the gyroid, the mesophase will evolve to
690: fill the simulated region perfectly, without defects. As the lattice size
691: grows, it becomes more probable that multiple gyroid domains will emerge
692: independently, so that grain boundary defects are more likely to appear,
693: and the time required for localized defects to diffuse across the lattice
694: increases, making it more likely that defects will persist. Therefore,
695: examination of the defect behaviour of surfactant mesophases requires the
696: simulation of very large systems.
697: Figure \ref{fig:gyroid} shows an example of a 128$^3$ system after 100000
698: simulation timesteps. Multiple gyroid domains have formed and the close-up
699: shows the extremely regular, crystalline, gyroid structure within a
700: domain. Figure \ref{fig:gyroid-wishbones} demonstrates some of the most
701: interesting properties of the gyroid mesophase: two labyrinths mainly
702: consisting of water and oil counterparts are enclosed by the gyroid
703: minimal surface at which the surfactant molecules accumulate. The
704: characteristic triple junctions can be seen clearly.
705: \begin{figure}[h]
706: \begin{center}
707: \includegraphics[height=4cm]{doublegyroid-wishbones.eps}
708: \end{center}
709: \caption{Structure of the two labyrinths
710: enclosed by a gyroid minimal surface, showing the characteristic triple
711: junctions.}
712: \label{fig:gyroid-wishbones}
713: \end{figure}
714: 
715: The TeraGyroid experiment\cite{bib:teragyroid,bib:teragyroid-epsrc} addressed a large scale
716: problem of genuine scientific interest and showed how intercontinental
717: grids permit the use of novel techniques in collaborative computational
718: science, which can dramatically reduce the time to insight. TeraGyroid
719: used computational steering over a Grid to study the self-assembly and
720: dynamics of gyroid mesophases using the largest set of lattice Boltzmann
721: simulations ever performed. Around the Supercomputing 2003 conference we
722: were able to simulate gyroid formation and defect behaviour harnessing
723: the compute power of a large fraction of the UK and US HPC facilities.
724: Altogether we were able to use about 400000 CPU hours and generate two
725: terabytes of simulation data.
726: \begin{figure}[h]
727: \begin{center}
728: \includegraphics[height=4cm]{colour_Gyr1024p9_t000000.eps}
729: \end{center}
730: \caption{In order to eliminate finite size effects from simulations, very
731: large lattice sizes are needed. If one is interested in the statistical
732: behaviour of defects, then the lattice size has to be increased even more,
733: since otherwise only a limited number of defects can be found in the
734: system.  This figure shows a snapshot from what we believe to be the
735: largest ternary lattice Boltzmann simulation ever performed, on a 1024$^3$
736: lattice. \label{fig:gyroid1024} }
737: \end{figure}
738: In order to make sure our simulations are virtually free of finite size
739: effects, we simulated different system sizes from 64$^3$ to 1024$^3$,
740: usually for about 100000 timesteps. In order to study the long term
741: behaviour of the gyroid mesophase, some simulations have even run for one
742: million timesteps. For 100000 timesteps we found that 256$^3$ or even
743: 128$^3$ simulations do not suffer from finite size effects, but after very
744: long simulation times we might even have to move to larger lattices.
745: Even with the longest possible simulation times, we were not able to
746: generate a ``perfect'' crystal. Instead, either differently orientated
747: domains can still be found or individual defects are still moving
748: around. It is of particular interest to study the exact behaviour of
749: the defect movement, which can be done by gathering statistics of the
750: simulation data by counting and tracking individual defects. Gathering
751: useful statistics implies large numbers of measurements and therefore
752: large lattices, which is the reason for the 512$^3$ and 1024$^3$
753: simulations performed. The memory requirements exceed the available
754: resources on most supercomputers and limits us to a small number of
755: machines. Also, it requires substantial amounts of CPU time to reach
756: suffcient simulation times. In the case of the 1024$^3$ system, 2048
757: CPUs of a recent Compaq Alpha cluster are only able to simulate about
758: 100 simulation timesteps per hour. Running for 100000 timesteps would
759: require more than two million CPU hours or 42 days and is therefore
760: unfeasible. Also, handling the data files which are $4{\mathrm{GB}}$
761: each and checkpoint files which are $0.5{\mathrm{TB}}$ each is very
762: awkward with the infrastructure available today. In order to be able to
763: gain useful data from the large simulations, we first run a 128$^3$
764: system with periodic boundary conditions, until it forms a gyroid. This
765: system is then duplicated 512 times to produce a 1024$^3$ gyroid system.
766: In order to reduce effects introduced due to the periodic upscaling, we
767: perturb the system and let it evolve. We anticipate that the unphysical
768: effects introduced by the upscaling process will decay after a
769: comparably small number of timesteps, thus resulting in a system that is
770: comparable to one that started from a random mixture of fluids. This has
771: to be justified by comparison with data obtained from test runs performed
772: on smaller systems. Figure \ref{fig:gyroid1024} shows a snapshot of a
773: volume rendered dataset from the upscaled 1024$^3$ system at 1000
774: timesteps after the upscaling process. The unphysical periodic
775: structures introduced by the individual 128$^3$ systems can still
776: clearly be seen. 
777: 
778: \begin{figure}[h]
779: \begin{center}
780: \includegraphics[height=3cm]{gyroidshear.eps}
781: \end{center}
782: \caption{a sheared gyroid mesophase: a) before the onset of shear, b)
783: at the onset of shear, c) after long shear times.
784: }
785: \label{fig:gyroid-shear}
786: \end{figure}
787: Currently, work is in progress to study the stability of the gyroid
788: mesophase. We are interested in the influence of perturbation on a
789: gyroid and the strength of the perturbation needed to break up a well
790: developed mesophase. Similar studies are performed experimentally by
791: applying constant or oscillatory shear. Here, we study the dependence of
792: the gyroid stability on the shear rate, and expect to find evidence of
793: the non-Newtonian properties of the fluid. An example from those studies
794: can be seen in figure \ref{fig:gyroid-shear}, which shows three
795: snapshots of the same simulation. The first shows the liquid crystal
796: before the onset of shear, the second only a few hundred timesteps after
797: shear has been turned on and the third image demonstrates how the gyroid
798: melts if the shear stress becomes too strong.
799: 
800: As seen before, simulation data from liquid crystal dynamics can be
801: visualized using isosurfacing or volume rendering techniques. The human
802: eye has a remarkable ability to easily distinguish between regions where
803: the crystal structure is well developed and areas where it is not.
804: However, manual analysis of large amounts of simulation data is not
805: feasible. In the case of the TeraGyroid project, about two terabytes of
806: data would have to be checked and catalogued manually. This task would
807: keep an individual busy for years. Therefore, computational methods for
808: defect detection and tracking are required. Developing algorithms to
809: detect and track defects is a non-trivial task, however, since defects can
810: occur within and between domains of varying shapes and sizes and over a
811: wide variety of length and time scales. 
812: A standard method to analyse simulation data is the calculation of the
813: three-dimensional structure function 
814: %\begin{equation}
815: $S(\mathbf{k},t)\equiv\frac{1}{V}\left|\phi^\prime_\mathbf{k}(t)\right|^2$,
816: %\end{equation}
817: where $V$ is the number of cites of the lattice,
818: $\phi^\prime_\mathbf{k}(t)$ the Fourier transform of the fluctuations of
819: the order parameter $\phi^\prime\equiv\phi-\left<\phi\right>$, and
820: $\mathbf{k}$ is the wave vector
821: \cite{bib:gonzalez-nekovee-coveney,bib:gonzalez-coveney-2}.
822: $S(\mathbf{k},t)$
823: can easily be calculated, but only gives general information about the
824: crystal development \cite{bib:hajduk,bib:laurer,bib:gonzalez-coveney}. It
825: does not allow one to detect where the defects are located or how many
826: there are, nor does it furnish access to information about the number
827: of differently oriented gyroid domains.
828: $S(\mathbf{k},t)$ is given for a 128$^3$ system at timesteps $t$=10000, 100000,
829: and 700000 in figure \ref{fig:3DFFT}. We simulate for one million
830: timesteps -- more than an order of magnitude longer than any other LB3D
831: simulation performed before the TeraGyroid \cite{bib:TeraGyroidWWW}
832: project. The initial condition of the simulation is a random mixture with
833: maximum densities of 0.7 for the immiscible fluids and 0.6 for surfactant.
834: The coupling constant $g_{ss}$ is set to -0.0045 and the coupling between
835: surfactant and the other fluids is set to $g_{cs}$=-0.006. In
836: order to compare our data to experimentally obtained SAXS data
837: \cite{bib:hajduk}, we sum the structure factor in the $x$-direction;
838: $X_{max}$ denotes the value of the largest peak normalised by the number
839: of lattice sites in the direction of summation (128 in this case)
840: \cite{bib:defect-paper}. 
841: Gyroid assembly is evident due to the eight peaks of the structure factor
842: which become higher with ongoing simulation time. At $t$=700000,
843: $X_{max}$ reaches 197.00 and most of the previously existing domains have
844: merged into a single one. Only a few defects are left of which two can be
845: spotted visually at the right corner of the volume rendered visualisation
846: and the centre of the top surface (denoted by the white arrows).
847: \begin{figure}[h]
848: \begin{center}
849: \includegraphics[height=5cm]{fft3.eps}
850: \end{center}
851: \caption{Three-dimensional structure factor of the order parameter at
852: timesteps $t$=10000, 100000, and 700000, lattice size 128$^3$ and
853: simulation parameters as given in the text. 
854: For comparability with SAXS experimental data, we display
855: the total structure factor in the $x$-direction $X$=$\sum_{k_x}
856: S(\mathbf{k},t)$. $X_{max}$ denotes the value of the largest peak divided
857: by the number of lattice sites in the direction of summation (128 in this
858: case). The lower half of the figure shows volume rendered visualizations
859: of the corresponding order parameters and the white arrows are a guide for
860: the eye to spot some defective areas at the top surface and the right
861: corner at $t$=700000.}
862: \label{fig:3DFFT}
863: \end{figure} 
864: The structure factor analysis does not provide any information about the
865: size, position or number of individual defects in the system. Therefore,
866: we developed more advanced algorithms for the detection and tracking of
867: defects.
868: As a first order approach, the data to be analyzed can be reduced by
869: cutting the three-dimensional data sets into slabs and projecting them
870: onto a two-dimensional plane. By using a raytracing algorithm for the
871: projection, we obtain regular patterns in areas where the gyroid is
872: perfectly developed and solid planes in defective areas. We developed
873: two algorithms which use the projection data to separate the defective
874: areas from the perfect crystal. The first approach is based on a
875: generic pattern recognition algorithm and should work with all liquid
876: crystals that form a regular pattern, while the second has been
877: developed with our particular problem in mind and is not known to work
878: with systems other than the gyroid mesophase. However, it is about an
879: order of magnitude faster and the general principles underlying it
880: should be applicable to different systems as well.
881: The first approach is based on the regularity or periodicity of patterns
882: and was developed by Chetverikov and Hanbury in 2001
883: \cite{bib:chetverikov} who applied it to patterns from the textile
884: industry. It is assumed that defect-free patterns are homogeneous and show
885: some periodicity. The algorithm searches for areas which are significantly
886: less regular (i.e. aperiodic) than the bulk of the dataset by computing
887: regularity features for a set of windows and identifying defects as
888: outliers. The regularity is quantified by computing the periodicity of the
889: normalised autocorrelation function in polar coordinates. In short, for
890: every window a regularity value is computed. If this value differs by more
891: than a defined threshold value from the median of all window regularity
892: values, the area is accordingly classified as a defect. For a more
893: detailed description of the algorithm see
894: \cite{bib:chetverikov,bib:chetverikov2,bib:defect-paper}.
895: The second approach encapsulates knowledge about the patterns produced by
896: regular and defect regions. As a consequence, it is an order of magnitude
897: faster than the pattern recognition code.
898: For each slab image, the algorithm creates a regular mesh in areas where
899: the gyroid structure is well developed, and an irregular mesh in
900: defective areas. The regions of regular mesh are discarded, leaving only
901: mesh that describes the perimeters of defect regions.  A flood-fill
902: algorithm is applied to these datasets to locate distinct defect
903: regions.
904: The output data of both detection algorithms for all two-dimensional
905: projections of a three-dimensional dataset can be used to reconstruct
906: three-dimensional volume data that only consists of defect regions.
907: Figure \ref{fig:strucmaskreconst} shows reconstructed datasets at
908: $t$=340000, 500000 and 999000 which have been detected using the pattern
909: recognition approach. However, the results obtained from the mesh
910: generator are similar. Even at $t$=340000 a very large region of
911: the system has not yet formed a well defined gyroid phase. 160000
912: timesteps later, the main defects are pillar shaped ones at the centre and
913: at the corners of the visualised systems. Due to the periodic boundary
914: conditions, the corner defects are connected and should be regarded as a
915: single one. As can be seen from the analysis at $t$=999000, defects in the
916: gyroid mesophase are very stable in size as well as in their position.
917: \begin{figure}[h]
918: \begin{center}
919: \includegraphics[height=4cm]{strucmaskreconst.eps}
920: \end{center}
921: \caption{Volume rendered visualization of the order parameter at t=340000,
922: 500000, 999000. Only the defects are shown
923: as they have been isolated from the full datasets using the pattern
924: recognition algorithm (movie available in onine version).}
925: \label{fig:strucmaskreconst}
926: \end{figure}
927: The pattern recognition algorithm is less efficient than mesh generation.
928: However, it is not limited to simulations of gyroid mesophases and more
929: robust with regrd to small fluctuations of the dataset. In the gyroid
930: case, it is more efficient to use the results from the mesh generator to
931: select a smaller number of datasets for post-processing using the pattern
932: recognition algorithm since the computational effort involved in the
933: pattern recognition can be substantial. For a more detailed description
934: of the algorithms see \cite{bib:defect-paper}. Currently, we are working
935: on more geometrically based algorithms to efficiently detect defects and
936: results will be published elsewhere in the near future.
937: 
938: \section{Conclusions}
939: During the last two years, we have worked on various scientific projects
940: using our lattice Boltzmann code LB3D. All of these projects reached the
941: limits of the HPC resources available to us today. However, without the
942: benefits obtained from software development within the RealityGrid
943: project, none of these projects would have been possible at all. These
944: improvements include the steering facilities, code optimizations, IO
945: optimizations as well as the platform independent checkpointing and
946: migration routines which have been contributed by various people within
947: the project. Without the lightweight Grid Service Container
948: {\tt{OGSI:~\!\!\!\!:Lite}} \cite{bib:ogsilite} projects like the
949: TeraGyroid experiment would not have been possible since existing
950: middleware toolkits such as Globus are rather heavyweight, requiring
951: substantial effort and local tuning on the part of systems administrators
952: to install and maintain. This effort cannot be expected from the average
953: scientist who is planning to use a computational
954: grid\cite{bib:lgpaper}.
955: The simulation pipeline requires
956: simulation, visualization, and storage facilities to be available
957: simultaneously, at times when their human operators can reasonably
958: expected to be around. This is often dealt with by manual reservation of
959: resources by systems administrators, but the ideal solution would
960: involve automated advance reservation and co-allocation procedures.
961: The most exciting project involving RealityGrid during the last two
962: years was the TeraGyroid experiment. Hundreds of individuals have worked
963: together to build a transcontinental grid not only as a demonstrator for
964: the grid techniques available today, but to perform a scientific
965: project. Since we would not have been able to gain as many new results
966: from the simulations performed during that period without the active use
967: of grid technologies, we have shown that the advent of computational
968: grids will be of great benefit for computational scientists.
969: 
970: \section*{Acknowledgements}
971: We would like to thank S.~Jha, M.~Harvey and G.~Giupponi (University
972: College London), A.R.~Porter, and S.M.~Pickles (University of Manchester), N.~Gonz\'{a}lez-Segredo (FOM Institute for Atomic and Molecular Physics), and
973: E.S.~Boek and J.~Crawshaw (Schlumberger Cambridge Research) for fruitful
974: discussions and E.~Breitmoser from the Edinburgh Parallel Computing Centre
975: for her contributions to our lattice Boltzmann code. 
976: We are grateful to the U.K. Engineering and Physical Sciences Research
977: Council (EPSRC) for funding much of this research through RealityGrid
978: grant GR/R67699 and to EPSRC and the National Science Foundation (NSF) for
979: funding the TeraGyroid project. 
980: This work was partially supported by the National Science Foundation under
981: NRAC grant MCA04N014 and PACI grant ASC030006P, and utilized computer
982: resources at the Pittsburgh Supercomputer Center, the National
983: Computational Science Alliance and the TeraGrid.
984: We acknowledge the European Synchrotron Radiation Facility for provision
985: of synchrotron radiation facilities and we would like to thank P.~Cloetens
986: for assistance in using beamline ID19, as well as  J.~Elliott
987: and G.~Davis of Queen Mary, University of London, for their
988: work in collecting the raw data and reconstructing the x-ray
989: microtomography data sets used in our Bentheimer sandstone images.
990: 
991: % Use custom hacked BibTeX file -- unsorted, but with abbreviated names.
992: %\bibliographystyle{abbrv-unsrt} 
993: \bibliographystyle{astron-jh} 
994: \bibliography{main}
995: 
996: \end{document}
997: