-
Notifications
You must be signed in to change notification settings - Fork 493
Expand file tree
/
Copy pathexample_taylor_green.py
More file actions
905 lines (748 loc) · 37.6 KB
/
example_taylor_green.py
File metadata and controls
905 lines (748 loc) · 37.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###########################################################################
# Example Taylor-Green Vortex
#
# Solves the 2D incompressible Navier-Stokes equations on [0,1]^2 and
# validates the numerical solution against the exact Taylor-Green vortex:
#
# u(x, y, t) = -cos(2*pi*x) * sin(2*pi*y) * exp(-8*pi^2*nu*t)
# v(x, y, t) = sin(2*pi*x) * cos(2*pi*y) * exp(-8*pi^2*nu*t)
# p(x, y, t) = -1/4*(cos(4*pi*x) + cos(4*pi*y)) * exp(-16*pi^2*nu*t)
#
# This is a rare Navier-Stokes test case with a closed-form solution: the
# nonlinear advection term (u . grad)u happens to equal a pressure gradient,
# so the vortex decays purely by viscous diffusion.
#
# Discretization
# --------------
# - **Spatial**: Taylor-Hood Q_d / Q_{d-1} mixed finite elements.
# The velocity is approximated in a vector-valued space of degree d
# (e.g. Q2 — biquadratic), and the pressure in a scalar space of
# degree d-1 (e.g. Q1 — bilinear). This pairing satisfies the
# inf-sup (LBB) stability condition, avoiding pressure oscillations.
#
# - **Temporal**: BDF2 (second-order backward differentiation) for all
# steps, with u^{n-1} initialized to the exact solution at t = -dt.
# Advection is handled semi-Lagrangian: instead of discretizing the
# material derivative Du/Dt, we trace characteristics backward in time
# to find "departure points" and evaluate the old velocity there.
# This yields an unconditionally stable treatment of the nonlinear
# convection term.
#
# - **Linear system**: Each time step solves a saddle-point system
#
# [ A B^T ] [ u ] [ f ]
# [ B 0 ] [ p ] = [ g ]
#
# where A combines the viscous stiffness and the implicit mass term,
# B is the discrete divergence, f is the advection + BC right-hand side,
# and g enforces incompressibility (div u = 0).
#
# - **Boundary conditions**: Time-dependent exact Dirichlet BCs are
# imposed on all boundary walls via projection (see Section 10 of
# fem.MD for details).
#
# warp.fem API overview (for readers new to the library)
# -------------------------------------------------------
# Warp.fem provides a high-level finite element API on top of Warp's
# GPU kernel infrastructure. The key abstractions are:
#
# - **Geometry** (e.g. ``Grid2D``, ``Trimesh2D``): the mesh.
# - **Function space** (``make_polynomial_space``): defines the shape
# functions. Each space has a set of *nodes* (DOF locations) and a
# *degree*.
# - **Field** (``space.make_field()``): a vector of DOF values living
# in a function space — represents a discrete FE function.
# - **Test / Trial** (``make_test``, ``make_trial``): lightweight wrappers
# around a space used to indicate which role a field plays during
# assembly.
# - **Domain** (``fem.Cells``, ``fem.BoundarySides``): where integrals
# are evaluated — either over all cells or over boundary facets.
# - **Integrand** (``@fem.integrand``): a function evaluated at each
# quadrature point. Inside an integrand:
# - ``s`` (``fem.Sample``) is the current quadrature point
# - ``u(s)`` evaluates a field at that point
# - ``fem.D(u, s)`` is the gradient ∇u
# - ``fem.div(u, s)`` is the divergence ∇·u
# - ``domain(s)`` returns the physical coordinates x of the point
# - ``fem.lookup(domain, x, s)`` finds the sample closest to x,
# used for semi-Lagrangian departure-point queries
# - **``fem.integrate``**: assembles an integrand over a domain.
# Depending on the fields passed (test, trial, or neither), it returns
# a matrix, vector, or scalar.
# - **``fem.interpolate``**: evaluates an integrand at the *nodes* of a
# destination field (not at quadrature points). For Lagrange elements
# this gives exact nodal values — no mass-matrix inversion needed.
###########################################################################
import warp as wp
import warp.examples.fem.utils as fem_example_utils
import warp.fem as fem
from warp.fem.linalg import array_axpy
from warp.optim.linear import cg as cg_solve
PI = wp.constant(3.141592653589793)
TWO_PI = wp.constant(2.0 * 3.141592653589793)
FOUR_PI = wp.constant(4.0 * 3.141592653589793)
@wp.func
def periodic_wrap(pos: wp.vec2) -> wp.vec2:
"""Wrap coordinates back into [0, 1]^2 for the periodic Taylor-Green solution."""
return wp.vec2(pos[0] - wp.floor(pos[0]), pos[1] - wp.floor(pos[1]))
@wp.func
def exact_velocity(x: wp.vec2, t: float, nu: float) -> wp.vec2:
decay = wp.exp(-8.0 * PI * PI * nu * t)
u = -wp.cos(TWO_PI * x[0]) * wp.sin(TWO_PI * x[1]) * decay
v = wp.sin(TWO_PI * x[0]) * wp.cos(TWO_PI * x[1]) * decay
return wp.vec2(u, v)
@wp.func
def exact_pressure(x: wp.vec2, t: float, nu: float) -> float:
decay = wp.exp(-16.0 * PI * PI * nu * t)
return -0.25 * (wp.cos(FOUR_PI * x[0]) + wp.cos(FOUR_PI * x[1])) * decay
###########################################################################
# Weak-form integrands
#
# Each @fem.integrand function is evaluated at every quadrature point
# during assembly. The first argument ``s`` (a ``fem.Sample``) identifies
# the current quadrature point. Arguments annotated ``fem.Field`` are
# FE functions that can be evaluated at ``s`` via ``u(s)`` (value),
# ``fem.D(u, s)`` (gradient), or ``fem.div(u, s)`` (divergence).
# Scalar parameters (``dt``, ``nu``, …) are passed at call time through
# the ``values=`` dictionary of ``fem.integrate``.
#
# When both a test and trial field are present, ``fem.integrate`` returns
# a sparse matrix whose (i,j) entry is the integral of the integrand
# over the domain with basis function j as trial and basis function i as
# test. With only a test field, it returns a vector (linear form).
# With no test/trial fields, it returns a scalar.
###########################################################################
@fem.integrand
def mass_form(s: fem.Sample, u: fem.Field, v: fem.Field):
"""L2 inner product ∫ u · v dx.
Used both for the velocity mass matrix and for building the boundary
projector (see Dirichlet BC section below).
"""
return wp.dot(u(s), v(s))
# ---- LHS bilinear forms (implicit part of the time discretization) ----
#
# The implicit Euler (BDF1) semi-discrete momentum equation is:
#
# (u^{n+1} - u^n_dep) / dt - nu * Laplacian(u^{n+1}) + grad(p) = 0
#
# Multiplying by a test function v, integrating by parts, and using the
# symmetric strain rate (factor 2*nu with ddot on gradients), the bilinear
# form for the velocity block of the saddle-point matrix is:
#
# a(u, v) = (1/dt) ∫ u·v dx + 2*nu ∫ D(u):D(v) dx
#
# For BDF2 the mass coefficient changes from 1/dt to 3/(2*dt).
@fem.integrand
def viscosity_and_inertia_form(s: fem.Sample, u: fem.Field, v: fem.Field, dt: float, nu: float):
"""BDF1 velocity bilinear form: (1/dt) (u, v) + 2 nu (D(u), D(v)).
``fem.D(u, s)`` is the Jacobian matrix ∂u_i/∂x_j at quadrature point s.
``wp.ddot`` is the Frobenius (component-wise) double contraction A:B.
"""
return 2.0 * nu * wp.ddot(fem.D(u, s), fem.D(v, s)) + wp.dot(u(s), v(s)) / dt
@fem.integrand
def viscosity_and_inertia_form_bdf2(s: fem.Sample, u: fem.Field, v: fem.Field, dt: float, nu: float):
"""BDF2 velocity bilinear form: (3/(2 dt)) (u, v) + 2 nu (D(u), D(v))."""
return 2.0 * nu * wp.ddot(fem.D(u, s), fem.D(v, s)) + 1.5 * wp.dot(u(s), v(s)) / dt
# ---- RHS linear forms (semi-Lagrangian advection) ----------------------
#
# Semi-Lagrangian advection replaces the nonlinear convection term
# (u · grad)u with a characteristic-tracing step. For each quadrature
# point x, we find the "departure point" X by tracing backward along the
# velocity field, then evaluate the old velocity u^n(X).
#
# The departure point is found with a predictor-corrector (midpoint) method:
# 1. Predict: x_mid = x - 0.5 * dt * u(x)
# 2. Correct: X = x_mid - 0.5 * dt * u(x_mid)
#
# ``fem.lookup(domain, pos, s)`` returns a new ``fem.Sample`` at position
# ``pos``, allowing evaluation of any field there. The last argument ``s``
# is a hint for the spatial search (speeds up the BVH query).
#
# ``periodic_wrap`` maps the backtracked position back into [0,1]^2 to
# respect the periodicity of the Taylor-Green solution.
#
# BDF1 RHS: f = (1/dt) ∫ u^n(X) · v dx
# BDF2 RHS: f = (1/dt) ∫ (2 u^n(X_1) - 0.5 u^{n-1}(X_2)) · v dx
# where X_1 is the departure point at distance dt and X_2 at distance 2*dt.
@fem.integrand
def transported_inertia_form(s: fem.Sample, domain: fem.Domain, u: fem.Field, v: fem.Field, dt: float):
"""BDF1 semi-Lagrangian RHS: (1/dt) ∫ u^n(X) · v dx."""
pos = domain(s)
vel = u(s)
# Predictor-corrector backtracking (midpoint rule, one dt step)
conv_pos = periodic_wrap(pos - 0.5 * vel * dt)
conv_s = fem.lookup(domain, conv_pos, s)
conv_vel = u(conv_s)
conv_pos = periodic_wrap(conv_pos - 0.5 * conv_vel * dt)
u_dep = u(fem.lookup(domain, conv_pos, conv_s))
return wp.dot(u_dep, v(s)) / dt
@fem.integrand
def transported_inertia_form_bdf2(
s: fem.Sample, domain: fem.Domain, u_cur: fem.Field, u_prev: fem.Field, v: fem.Field, dt: float
):
"""BDF2 semi-Lagrangian RHS: (1/dt) ∫ (2 u^n(X_1) - 1/2 u^{n-1}(X_2)) · v dx.
Two departure points are computed:
- X_1: backtrack by dt using u^n → evaluate u^n(X_1)
- X_2: backtrack by 2*dt using u^n → evaluate u^{n-1}(X_2)
"""
pos = domain(s)
vel = u_cur(s)
# Backtrack by dt to get u^n at departure point X_1
mid1 = periodic_wrap(pos - 0.5 * vel * dt)
mid1_s = fem.lookup(domain, mid1, s)
mid1 = periodic_wrap(mid1 - 0.5 * u_cur(mid1_s) * dt)
u_n_dep = u_cur(fem.lookup(domain, mid1, mid1_s))
# Backtrack by 2*dt to get u^{n-1} at departure point X_2
mid2 = periodic_wrap(pos - vel * dt)
mid2_s = fem.lookup(domain, mid2, s)
mid2 = periodic_wrap(mid2 - u_cur(mid2_s) * dt)
u_nm1_dep = u_prev(fem.lookup(domain, mid2, mid2_s))
return wp.dot(2.0 * u_n_dep - 0.5 * u_nm1_dep, v(s)) / dt
# ---- Divergence / pressure coupling ------------------------------------
#
# The incompressibility constraint div(u) = 0 is enforced weakly:
#
# b(u, q) = -∫ q div(u) dx
#
# When assembled with a velocity trial and a pressure test, this gives
# the B matrix of the saddle-point system.
@fem.integrand
def div_form(s: fem.Sample, u: fem.Field, q: fem.Field):
"""Discrete divergence operator: -∫ q div(u) dx."""
return -q(s) * fem.div(u, s)
# ---- Boundary and initial condition integrands --------------------------
@fem.integrand
def velocity_boundary_integrand(s: fem.Sample, domain: fem.Domain, v: fem.Field, t: float, nu: float):
"""Weighted boundary integral of exact velocity: ∫_∂Ω u_exact · v ds.
This is only used once at t=0 to provide an initial value for
``normalize_dirichlet_projector``. For subsequent time steps we use
``fem.interpolate`` instead (see the ``step`` method).
"""
x = domain(s)
return wp.dot(exact_velocity(x, t, nu), v(s))
@fem.integrand
def initial_velocity(s: fem.Sample, domain: fem.Domain, nu: float):
"""Exact velocity at t=0, used with ``fem.interpolate`` to set ICs."""
return exact_velocity(domain(s), 0.0, nu)
# ---- Error measurement integrands --------------------------------------
@fem.integrand
def velocity_error(s: fem.Sample, domain: fem.Domain, u: fem.Field, t: float, nu: float):
"""Pointwise squared velocity error |u_h - u_exact|^2 (integrated to get L2 norm)."""
x = domain(s)
diff = u(s) - exact_velocity(x, t, nu)
return wp.dot(diff, diff)
@fem.integrand
def pressure_mean_integrand(s: fem.Sample, p: fem.Field):
"""∫ p dx — used to compute the mean pressure for centering."""
return p(s)
@fem.integrand
def pressure_error(s: fem.Sample, domain: fem.Domain, p: fem.Field, t: float, nu: float, p_mean: float):
"""Pointwise squared pressure error (p_h - mean - p_exact)^2.
Since the pressure in an incompressible flow is determined only up to
a constant, we subtract its mean before comparing.
"""
x = domain(s)
diff = (p(s) - p_mean) - exact_pressure(x, t, nu)
return diff * diff
@fem.integrand
def exact_velocity_at_time(s: fem.Sample, domain: fem.Domain, t: float, nu: float):
"""Exact velocity at arbitrary time t — used with ``fem.interpolate``."""
return exact_velocity(domain(s), t, nu)
@fem.integrand
def exact_pressure_at_time(s: fem.Sample, domain: fem.Domain, t: float, nu: float):
"""Exact pressure at arbitrary time t — used with ``fem.interpolate``."""
return exact_pressure(domain(s), t, nu)
# ---- Passive dye advection (visualization only) -------------------------
@fem.integrand
def initial_dye(s: fem.Sample, domain: fem.Domain):
"""Stripe pattern that will be deformed by the vortex flow."""
x = domain(s)
return wp.sin(FOUR_PI * x[0])
@fem.integrand
def advect_dye(s: fem.Sample, domain: fem.Domain, dye: fem.Field, u: fem.Field, dt: float):
"""Semi-Lagrangian advection of a passive scalar dye field.
Same predictor-corrector backtracking as the velocity advection,
but here we evaluate the dye field (not velocity) at the departure
point. Used with ``fem.interpolate`` (not ``fem.integrate``), so
the result is written directly into the destination field's DOFs.
"""
pos = domain(s)
vel = u(s)
conv_pos = periodic_wrap(pos - 0.5 * vel * dt)
conv_s = fem.lookup(domain, conv_pos, s)
conv_vel = u(conv_s)
conv_pos = periodic_wrap(conv_pos - 0.5 * conv_vel * dt)
return dye(fem.lookup(domain, conv_pos, conv_s))
class Example:
"""Taylor-Green vortex solver with analytical error tracking.
The constructor assembles all constant matrices (viscosity, mass,
divergence, boundary projector), pre-computes time-dependent boundary
conditions for every frame, and captures the simulation step as a CUDA
graph for reduced launch overhead.
Args:
quiet: Suppress solver residual output and skip error computation.
degree: Polynomial degree for velocity (pressure is degree-1).
resolution: Number of cells per spatial direction.
Re: Reynolds number (viscosity = 1/Re).
mesh: Mesh type — ``"grid"`` (structured quads), ``"tri"``, or ``"quad"``
(unstructured).
"""
def __init__(self, quiet=False, degree=2, resolution=25, Re=100.0, mesh: str = "grid"):
self._quiet = quiet
# Time step size: dt = h so that CFL ~ 1 for the decaying vortex.
self.sim_dt = 1.0 / resolution
self.current_frame = 0
viscosity = 1.0 / Re
# -----------------------------------------------------------------
# 1. Mesh / geometry
# -----------------------------------------------------------------
# Grid2D creates a structured quad mesh on [0,1]^2 with the given
# resolution. Trimesh2D / Quadmesh2D create unstructured meshes.
# ``build_bvh=True`` builds a bounding-volume hierarchy needed by
# ``fem.lookup`` for the semi-Lagrangian departure-point queries.
if mesh == "tri":
positions, tri_vidx = fem_example_utils.gen_trimesh(res=wp.vec2i(resolution))
geo = fem.Trimesh2D(tri_vertex_indices=tri_vidx, positions=positions, build_bvh=True)
elif mesh == "quad":
positions, quad_vidx = fem_example_utils.gen_quadmesh(res=wp.vec2i(resolution))
geo = fem.Quadmesh2D(quad_vertex_indices=quad_vidx, positions=positions, build_bvh=True)
else:
geo = fem.Grid2D(res=wp.vec2i(resolution))
# Integration domains: all cells (for volume integrals) and boundary
# edges (for Dirichlet BC enforcement).
domain = fem.Cells(geometry=geo)
boundary = fem.BoundarySides(geo)
# -----------------------------------------------------------------
# 2. Function spaces (Taylor-Hood)
# -----------------------------------------------------------------
# Velocity: vector-valued (wp.vec2) polynomial space of the given
# degree (default Q2 — biquadratic on quads).
# Pressure: scalar polynomial space one degree lower (default Q1).
u_space = fem.make_polynomial_space(geo, degree=degree, dtype=wp.vec2)
p_space = fem.make_polynomial_space(geo, degree=degree - 1)
# -----------------------------------------------------------------
# 3. Assemble constant matrices
# -----------------------------------------------------------------
# Test and trial functions are lightweight wrappers that tell
# ``fem.integrate`` which field plays which role. Passing both a
# trial and a test produces a sparse matrix (bilinear form).
u_test = fem.make_test(space=u_space, domain=domain)
u_trial = fem.make_trial(space=u_space, domain=domain)
# Velocity block A = 3/(2 dt)*M + 2*nu*K (BDF2)
# M = mass matrix, K = stiffness matrix (viscous term).
u_matrix = fem.integrate(
viscosity_and_inertia_form_bdf2,
fields={"u": u_trial, "v": u_test},
values={"nu": viscosity, "dt": self.sim_dt},
output_dtype=wp.float64,
)
# Divergence matrix B (pressure-velocity coupling).
# Passing a velocity trial and a pressure test gives the rectangular
# matrix B such that B @ u approximates -∫ q div(u) dx.
p_test = fem.make_test(space=p_space, domain=domain)
div_matrix = fem.integrate(div_form, fields={"u": u_trial, "q": p_test}, output_dtype=wp.float64)
# -----------------------------------------------------------------
# 4. Dirichlet boundary conditions (projection method)
# -----------------------------------------------------------------
# We enforce u = u_exact on ∂Ω by modifying the linear system:
# A_proj = (I - P) A (I - P) + P
# rhs = (I - P)(f - A * u_bc) + u_bc
# where P is a projector that is identity on boundary DOFs and zero
# on interior DOFs.
#
# Building the projector:
# 1. Assemble the boundary mass matrix with ``assembly="nodal"``
# (row-sum lumping) — this gives a diagonal matrix whose
# nonzero entries correspond to boundary DOFs.
# 2. ``normalize_dirichlet_projector`` scales it so the diagonal
# entries become exactly 1, making it an idempotent projector.
#
# IMPORTANT: ``normalize_dirichlet_projector`` also divides the
# companion ``fixed_value`` vector by the raw diagonal, converting
# mass-weighted integrals into nodal values. It must be called
# exactly once per projector — calling it again on an already-
# normalized projector is a no-op on the values (see fem.MD §17.11).
u_bd_test = fem.make_test(space=u_space, domain=boundary)
u_bd_trial = fem.make_trial(space=u_space, domain=boundary)
u_bd_projector = fem.integrate(
mass_form, fields={"u": u_bd_trial, "v": u_bd_test}, assembly="nodal", output_dtype=wp.float64
)
u_bd_value_init = fem.integrate(
velocity_boundary_integrand,
fields={"v": u_bd_test},
values={"t": 0.0, "nu": viscosity},
assembly="nodal",
output_dtype=wp.vec2d,
)
fem.normalize_dirichlet_projector(u_bd_projector, u_bd_value_init)
# Save copies of the un-projected matrices — we need the originals
# to pre-compute the BC contribution to the RHS for each time step.
self._u_matrix_orig = wp.sparse.bsr_copy(u_matrix)
self._div_matrix_orig = wp.sparse.bsr_copy(div_matrix)
# Project the velocity matrix in-place.
u_bd_rhs = wp.zeros_like(u_bd_value_init)
fem.project_linear_system(u_matrix, u_bd_rhs, u_bd_projector, u_bd_value_init, normalize_projector=False)
# Project the divergence block: B_proj = B (I - P), so that B
# doesn't "see" the boundary DOFs (they're fixed, not unknowns).
div_matrix -= div_matrix @ u_bd_projector
# -----------------------------------------------------------------
# 5. Build saddle-point system
# -----------------------------------------------------------------
# SaddleSystem wraps the projected [A, B^T; B, 0] block structure
# and provides a CG-based Uzawa solver.
self._saddle_system = fem_example_utils.SaddleSystem(u_matrix, div_matrix)
# Save references needed during time stepping
self._domain = domain
self._u_bd_projector = u_bd_projector
self._u_test = u_test
self._viscosity = viscosity
# -----------------------------------------------------------------
# 6. Solution fields and initial conditions
# -----------------------------------------------------------------
# ``space.make_field()`` allocates a DOF vector and returns a Field
# object. The DOF values live in ``field.dof_values`` (a wp.array).
self._u_field = u_space.make_field()
self._u_field_prev = u_space.make_field() # u^{n-1} for BDF2
self._p_field = p_space.make_field()
# Scratch field for computing time-dependent boundary values.
self._u_bc_field = u_space.make_field()
# Set u^0 = u_exact(t=0) and u^{-1} = u_exact(t=-dt) so that BDF2
# can be used from the very first time step (it needs two levels).
fem.interpolate(initial_velocity, dest=self._u_field, values={"nu": viscosity})
fem.interpolate(exact_velocity_at_time, dest=self._u_field_prev, values={"t": -self.sim_dt, "nu": viscosity})
# Passively advected dye field for visualization (same degree as velocity).
dye_space = fem.make_polynomial_space(geo, degree=degree)
self._dye_field = dye_space.make_field()
self._dye_tmp = dye_space.make_field()
fem.interpolate(initial_dye, dest=self._dye_field)
# Error tracking
self._velocity_errors = []
self._pressure_errors = []
self._times = []
# Fields for plotting the exact solution at the final time
self._exact_u_field = u_space.make_field()
self._exact_p_field = p_space.make_field()
# Pre-allocate scratch arrays used every time step
n_u = self._u_field.space.node_count()
n_p = self._p_field.space.node_count()
self._u_bd_value = wp.zeros(n_u, dtype=wp.vec2d)
self._x_u = wp.zeros(n_u, dtype=wp.vec2d)
self._x_p = wp.zeros(n_p, dtype=wp.float64)
self._u_rhs = wp.zeros(n_u, dtype=wp.vec2d)
self._u_bd_rhs_buf = wp.zeros(n_u, dtype=wp.vec2d)
self._p_rhs = wp.zeros(n_p, dtype=wp.float64)
saddle = self._saddle_system
self._saddle_x = wp.empty(dtype=saddle.scalar_type, shape=saddle.shape[0], device=saddle.device)
self._saddle_b = wp.empty_like(self._saddle_x)
self.renderer = fem_example_utils.Plot()
self.renderer.add_field("dye", self._dye_field)
# -----------------------------------------------------------------
# 7. CUDA graph capture
# -----------------------------------------------------------------
# With BDF2 used from frame 1, simulate() is frame-independent.
# We capture it as a CUDA graph and replay it each step for
# reduced launch overhead. Before each replay, step() computes
# the time-dependent BC contributions into the working buffers
# (_u_bd_rhs_buf, _p_rhs) that the graph references.
self.use_cuda_graph = wp.get_device().is_cuda
if self.use_cuda_graph:
import gc # noqa: PLC0415
gc.disable()
try:
with wp.ScopedCapture() as capture:
self.simulate()
self.graph = capture.graph
finally:
gc.enable()
def _compute_bc(self, t):
"""Compute time-dependent Dirichlet BC contributions at time ``t``.
Fills ``_u_bd_rhs_buf`` and ``_p_rhs`` with the velocity and
pressure RHS contributions from the boundary conditions. This is
called by ``step()`` before each graph replay (or ``simulate()``
call) so that the captured graph sees the correct BC data.
"""
fem.interpolate(exact_velocity_at_time, dest=self._u_bc_field, values={"t": t, "nu": self._viscosity})
self._u_bd_value.zero_()
wp.utils.array_cast(out_array=self._u_bd_value, in_array=self._u_bc_field.dof_values)
wp.sparse.bsr_mv(self._u_bd_projector, x=self._u_bd_value, y=self._u_bd_value, alpha=1.0, beta=0.0)
wp.sparse.bsr_mv(self._u_matrix_orig, x=self._u_bd_value, y=self._u_bd_rhs_buf, alpha=1.0, beta=0.0)
wp.sparse.bsr_mv(self._u_bd_projector, x=self._u_bd_rhs_buf, y=self._u_bd_rhs_buf, alpha=1.0, beta=-1.0)
array_axpy(x=self._u_bd_value, y=self._u_bd_rhs_buf, alpha=1.0, beta=1.0)
wp.sparse.bsr_mv(self._div_matrix_orig, x=self._u_bd_value, y=self._p_rhs, alpha=-1.0, beta=0.0)
def simulate(self):
"""Run one time step of the Navier-Stokes solver.
This method is CUDA-graph-capturable: it contains no time-dependent
parameters and no host-side branches. Time-dependent BC
contributions must be loaded into ``_u_bd_rhs_buf`` and ``_p_rhs``
by ``_compute_bc()`` before each call (or graph replay).
The iterative solver uses ``check_every=0`` so that it runs a fixed
number of iterations with device-side convergence checks, making it
compatible with outer graph capture.
"""
# --- Step 1: semi-Lagrangian advection RHS (BDF2) ----------------
#
# Assemble the linear form that represents the explicit part of the
# time discretization — the old velocity evaluated at departure
# points. Passing only a test field (no trial) makes
# ``fem.integrate`` return a vector.
fem.integrate(
transported_inertia_form_bdf2,
fields={"u_cur": self._u_field, "u_prev": self._u_field_prev, "v": self._u_test},
values={"dt": self.sim_dt},
output_dtype=wp.vec2d,
output=self._u_rhs,
)
# --- Step 2: combine advection RHS with pre-loaded BC contribution
#
# u_rhs = (I - P) * u_rhs + u_bd_rhs
# = (I - P) * f - (I - P) * A * u_bc + u_bc
wp.sparse.bsr_mv(self._u_bd_projector, x=self._u_rhs, y=self._u_rhs, alpha=-1.0, beta=1.0)
array_axpy(x=self._u_bd_rhs_buf, y=self._u_rhs, alpha=1.0, beta=1.0)
# --- Step 3: solve the saddle-point system -----------------------
#
# Use the previous solution as initial guess (warm start) for the
# iterative solver. ``array_cast`` converts between float32 (field
# storage) and float64 (solver precision).
#
# We call the CG solver directly (bypassing bsr_solve_saddle /
# bsr_cg) to avoid host-side .numpy() calls that would break
# outer graph capture. With check_every=0, the solver detects
# that it is inside a graph capture and uses wp.capture_while
# for its iteration loop.
x_u = self._x_u
x_p = self._x_p
wp.utils.array_cast(out_array=x_u, in_array=self._u_field.dof_values)
wp.utils.array_cast(out_array=x_p, in_array=self._p_field.dof_values)
saddle = self._saddle_system
wp.copy(src=x_u, dest=saddle.u_slice(self._saddle_x))
wp.copy(src=x_p, dest=saddle.p_slice(self._saddle_x))
wp.copy(src=self._u_rhs, dest=saddle.u_slice(self._saddle_b))
wp.copy(src=self._p_rhs, dest=saddle.p_slice(self._saddle_b))
cg_solve(
A=saddle,
b=self._saddle_b,
x=self._saddle_x,
tol=1.0e-6,
check_every=0,
use_cuda_graph=True,
M=saddle.preconditioner,
)
wp.copy(dest=x_u, src=saddle.u_slice(self._saddle_x))
wp.copy(dest=x_p, src=saddle.p_slice(self._saddle_x))
# Save u^n before overwriting (BDF2 needs both u^n and u^{n-1}).
wp.copy(src=self._u_field.dof_values, dest=self._u_field_prev.dof_values)
# Copy solver output back into the solution fields.
wp.utils.array_cast(in_array=x_u, out_array=self._u_field.dof_values)
wp.utils.array_cast(in_array=x_p, out_array=self._p_field.dof_values)
# --- Step 4: advect dye ------------------------------------------
#
# The dye is a passive scalar with no feedback on the flow.
# ``fem.interpolate`` with the ``advect_dye`` integrand evaluates
# the dye at backtracked positions and writes the result into
# ``_dye_tmp``. We copy back instead of swapping so that the
# graph always reads from / writes to the same memory addresses.
fem.interpolate(
advect_dye,
dest=self._dye_tmp,
fields={"dye": self._dye_field, "u": self._u_field},
values={"dt": self.sim_dt},
)
wp.copy(src=self._dye_tmp.dof_values, dest=self._dye_field.dof_values)
def step(self):
"""Advance the solution by one time step.
Loads pre-computed BC data for the current frame, then either
replays the captured CUDA graph or calls ``simulate()`` directly.
Optionally computes L2 errors against the exact solution.
"""
self.current_frame += 1
t = self.current_frame * self.sim_dt
# Compute time-dependent BC contributions for this frame.
self._compute_bc(t)
if self.use_cuda_graph:
wp.capture_launch(self.graph)
else:
self.simulate()
# --- Error measurement (host-side, outside graph) ----------------
#
# Compute L2 norms of velocity and pressure errors against the
# exact Taylor-Green solution. The pressure is only determined up
# to a constant, so we subtract its mean before comparing.
# ``fem.integrate`` with no test/trial fields returns a scalar.
if not self._quiet:
l2_vel_err_sq = fem.integrate(
velocity_error,
domain=self._domain,
fields={"u": self._u_field},
values={"t": t, "nu": self._viscosity},
)
p_mean = fem.integrate(
pressure_mean_integrand,
domain=self._domain,
fields={"p": self._p_field},
)
l2_p_err_sq = fem.integrate(
pressure_error,
domain=self._domain,
fields={"p": self._p_field},
values={"t": t, "nu": self._viscosity, "p_mean": p_mean},
)
vel_err = l2_vel_err_sq**0.5
p_err = l2_p_err_sq**0.5
self._times.append(t)
self._velocity_errors.append(vel_err)
self._pressure_errors.append(p_err)
return vel_err, p_err
return None, None
def render(self):
self.renderer.begin_frame(time=self.current_frame * self.sim_dt)
self.renderer.add_field("dye", self._dye_field)
self.renderer.end_frame()
def plot_comparison(self, save=None):
"""Plot L2 error history and side-by-side field comparison at final time.
Produces a 2x3 figure:
- (0,0) Error decay over time (log scale)
- (0,1) Numerical velocity magnitude
- (0,2) Exact velocity magnitude
- (1,0) Pointwise velocity error
- (1,1) Numerical pressure (mean-centered)
- (1,2) Exact pressure
Args:
save: If not ``None``, save the figure to this path instead of
displaying it interactively.
"""
import matplotlib.pyplot as plt # noqa: PLC0415
import numpy as np # noqa: PLC0415
from matplotlib.tri import Triangulation # noqa: PLC0415
if not self._times:
return
t_final = self._times[-1]
# Interpolate exact solutions at final time
fem.interpolate(exact_velocity_at_time, dest=self._exact_u_field, values={"t": t_final, "nu": self._viscosity})
fem.interpolate(exact_pressure_at_time, dest=self._exact_p_field, values={"t": t_final, "nu": self._viscosity})
u_num = self._u_field.dof_values.numpy()
u_exact = self._exact_u_field.dof_values.numpy()
p_num = self._p_field.dof_values.numpy()
p_exact = self._exact_p_field.dof_values.numpy()
# Center numerical pressure (analytical pressure has zero mean)
p_num_centered = p_num - np.mean(p_num)
u_num_mag = np.linalg.norm(u_num, axis=1)
u_exact_mag = np.linalg.norm(u_exact, axis=1)
# Triangulations for contour plots
u_pos = self._u_field.space.node_positions().numpy()
u_tri = Triangulation(u_pos[:, 0], u_pos[:, 1], self._u_field.space.node_triangulation())
p_pos = self._p_field.space.node_positions().numpy()
p_tri = Triangulation(p_pos[:, 0], p_pos[:, 1], self._p_field.space.node_triangulation())
times = np.array(self._times)
fig, axes = plt.subplots(2, 3, figsize=(15, 9))
fig.suptitle(f"Taylor-Green Vortex: Comparison with Analytical Solution (t = {t_final:.2f})")
# --- Top row: error decay + velocity comparison ---
ax = axes[0, 0]
ax.semilogy(times, self._velocity_errors, "b-o", markersize=2, label="Velocity")
ax.semilogy(times, self._pressure_errors, "r-s", markersize=2, label="Pressure")
ax.set_xlabel("Time")
ax.set_ylabel("L2 Error")
ax.set_title("Error Decay")
ax.legend()
ax.grid(True, alpha=0.3)
# Use independent color scales so each field shows its full dynamic range
ax = axes[0, 1]
cs = ax.tricontourf(u_tri, u_num_mag, levels=20)
ax.set_title("Velocity |u| (Numerical)")
ax.set_aspect("equal")
fig.colorbar(cs, ax=ax)
ax = axes[0, 2]
cs = ax.tricontourf(u_tri, u_exact_mag, levels=20)
ax.set_title("Velocity |u| (Exact)")
ax.set_aspect("equal")
fig.colorbar(cs, ax=ax)
# --- Bottom row: velocity error + pressure comparison ---
u_err_mag = np.linalg.norm(u_num - u_exact, axis=1)
ax = axes[1, 0]
cs = ax.tricontourf(u_tri, u_err_mag, levels=20)
ax.set_title("Velocity Error |u - u_exact|")
ax.set_aspect("equal")
fig.colorbar(cs, ax=ax)
ax = axes[1, 1]
cs = ax.tricontourf(p_tri, p_num_centered, levels=20)
ax.set_title("Pressure (Numerical, centered)")
ax.set_aspect("equal")
fig.colorbar(cs, ax=ax)
ax = axes[1, 2]
cs = ax.tricontourf(p_tri, p_exact, levels=20)
ax.set_title("Pressure (Exact)")
ax.set_aspect("equal")
fig.colorbar(cs, ax=ax)
plt.tight_layout()
if save is not None:
fig.savefig(save)
else:
plt.show()
if __name__ == "__main__":
import argparse
import os
wp.set_module_options({"enable_backward": False})
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--device", type=str, default=None, help="Override the default Warp device.")
parser.add_argument("--resolution", type=int, default=64, help="Grid resolution.")
parser.add_argument("--degree", type=int, default=2, help="Polynomial degree of shape functions.")
parser.add_argument("--num-frames", type=int, default=200, help="Total number of frames.")
parser.add_argument("--Re", type=float, default=250.0, help="Reynolds number.")
parser.add_argument("--mesh", choices=("grid", "tri", "quad"), default="grid", help="Mesh type.")
parser.add_argument(
"--headless",
action="store_true",
help="Run in headless mode, suppressing the opening of any graphical windows.",
)
parser.add_argument("--quiet", action="store_true", help="Suppresses the printing out of iteration residuals.")
parser.add_argument(
"--export",
type=str,
default=None,
help="Export animation to file (e.g. animation.mp4) or directory (e.g. frames/).",
)
args = parser.parse_known_args()[0]
with wp.ScopedDevice(args.device):
example = Example(
quiet=args.quiet,
degree=args.degree,
resolution=args.resolution,
Re=args.Re,
mesh=args.mesh,
)
for _k, set_info in fem_example_utils.progress_bar(args.num_frames, quiet=args.quiet):
vel_err, p_err = example.step()
if vel_err is not None:
set_info("vel_err", f"{vel_err:.4e}")
set_info("p_err", f"{p_err:.4e}")
example.render()
if not args.headless or args.export:
# Derive a companion path for the static comparison figure
comparison_save = None
if args.export:
base, ext = os.path.splitext(args.export)
if ext:
comparison_save = base + "_comparison.png"
else:
comparison_save = args.export.rstrip("/").rstrip(os.sep) + "_comparison.png"
if not args.quiet:
example.plot_comparison(save=comparison_save)
example.renderer.plot(
options={
"dye": {"contours": {"levels": 20}},
},
save=args.export,
)