From 92b8e4e13facb4e9e9ddca2252f611e4f66f5c6d Mon Sep 17 00:00:00 2001
From: Vladimir Tomov <tomov2@llnl.gov>
Date: Wed, 22 Nov 2017 18:56:46 -0800
Subject: [PATCH 01/12] Made the initial meshes linear to avoid memory problems
 during refinement. Added two additional 3D meshes (applicable only to problem
 1).

---
 data/box01_hex.mesh        | 511 ++++++++++++++-----------------------
 data/cube01_hex.mesh       | 296 +--------------------
 data/cube_12_hex.mesh      | 183 +++++++++++++
 data/cube_24_hex.mesh      | 287 +++++++++++++++++++++
 data/rectangle01_quad.mesh | 173 +++++--------
 data/segment01.mesh        |   6 +-
 data/square01_quad.mesh    |  34 +--
 7 files changed, 734 insertions(+), 756 deletions(-)
 create mode 100644 data/cube_12_hex.mesh
 create mode 100644 data/cube_24_hex.mesh

diff --git a/data/box01_hex.mesh b/data/box01_hex.mesh
index 4bec97c8..6964d482 100644
--- a/data/box01_hex.mesh
+++ b/data/box01_hex.mesh
@@ -108,322 +108,199 @@ vertices
 
 nodes
 FiniteElementSpace
-FiniteElementCollection: H1_3D_P2
+FiniteElementCollection: Linear
 VDim: 3
-Ordering: 1
+Ordering: 0
 
-0 0 0
-1 0 0
-2 0 0
-3 0 0
-4 0 0
-5 0 0
-6 0 0
-7 0 0
-0 1 0
-1 1 0
-2 1 0
-3 1 0
-4 1 0
-5 1 0
-6 1 0
-7 1 0
-0 2 0
-1 2 0
-2 2 0
-3 2 0
-4 2 0
-5 2 0
-6 2 0
-7 2 0
-0 3 0
-1 3 0
-2 3 0
-3 3 0
-4 3 0
-5 3 0
-6 3 0
-7 3 0
-0 0 1.5
-1 0 1.5
-2 0 1.5
-3 0 1.5
-4 0 1.5
-5 0 1.5
-6 0 1.5
-7 0 1.5
-0 1 1.5
-1 1 1.5
-2 1 1.5
-3 1 1.5
-4 1 1.5
-5 1 1.5
-6 1 1.5
-7 1 1.5
-0 2 1.5
-1 2 1.5
-2 2 1.5
-3 2 1.5
-4 2 1.5
-5 2 1.5
-6 2 1.5
-7 2 1.5
-0 3 1.5
-1 3 1.5
-2 3 1.5
-3 3 1.5
-4 3 1.5
-5 3 1.5
-6 3 1.5
-7 3 1.5
-0.5 0 0
-1 0.5 0
-0.5 1 0
-0 0.5 0
-0.5 0 1.5
-1 0.5 1.5
-0.5 1 1.5
-0 0.5 1.5
-0 0 0.75
-1 0 0.75
-1 1 0.75
-0 1 0.75
-1.5 0 0
-2 0.5 0
-1.5 1 0
-1.5 0 1.5
-2 0.5 1.5
-1.5 1 1.5
-2 0 0.75
-2 1 0.75
-2.5 0 0
-3 0.5 0
-2.5 1 0
-2.5 0 1.5
-3 0.5 1.5
-2.5 1 1.5
-3 0 0.75
-3 1 0.75
-3.5 0 0
-4 0.5 0
-3.5 1 0
-3.5 0 1.5
-4 0.5 1.5
-3.5 1 1.5
-4 0 0.75
-4 1 0.75
-4.5 0 0
-5 0.5 0
-4.5 1 0
-4.5 0 1.5
-5 0.5 1.5
-4.5 1 1.5
-5 0 0.75
-5 1 0.75
-5.5 0 0
-6 0.5 0
-5.5 1 0
-5.5 0 1.5
-6 0.5 1.5
-5.5 1 1.5
-6 0 0.75
-6 1 0.75
-6.5 0 0
-7 0.5 0
-6.5 1 0
-6.5 0 1.5
-7 0.5 1.5
-6.5 1 1.5
-7 0 0.75
-7 1 0.75
-1 1.5 0
-0.5 2 0
-0 1.5 0
-1 1.5 1.5
-0.5 2 1.5
-0 1.5 1.5
-1 2 0.75
-0 2 0.75
-2 1.5 0
-1.5 2 0
-2 1.5 1.5
-1.5 2 1.5
-2 2 0.75
-3 1.5 0
-2.5 2 0
-3 1.5 1.5
-2.5 2 1.5
-3 2 0.75
-4 1.5 0
-3.5 2 0
-4 1.5 1.5
-3.5 2 1.5
-4 2 0.75
-5 1.5 0
-4.5 2 0
-5 1.5 1.5
-4.5 2 1.5
-5 2 0.75
-6 1.5 0
-5.5 2 0
-6 1.5 1.5
-5.5 2 1.5
-6 2 0.75
-7 1.5 0
-6.5 2 0
-7 1.5 1.5
-6.5 2 1.5
-7 2 0.75
-1 2.5 0
-0.5 3 0
-0 2.5 0
-1 2.5 1.5
-0.5 3 1.5
-0 2.5 1.5
-1 3 0.75
-0 3 0.75
-2 2.5 0
-1.5 3 0
-2 2.5 1.5
-1.5 3 1.5
-2 3 0.75
-3 2.5 0
-2.5 3 0
-3 2.5 1.5
-2.5 3 1.5
-3 3 0.75
-4 2.5 0
-3.5 3 0
-4 2.5 1.5
-3.5 3 1.5
-4 3 0.75
-5 2.5 0
-4.5 3 0
-5 2.5 1.5
-4.5 3 1.5
-5 3 0.75
-6 2.5 0
-5.5 3 0
-6 2.5 1.5
-5.5 3 1.5
-6 3 0.75
-7 2.5 0
-6.5 3 0
-7 2.5 1.5
-6.5 3 1.5
-7 3 0.75
-0.5 0.5 0
-0.5 0 0.75
-1 0.5 0.75
-0.5 1 0.75
-0 0.5 0.75
-0.5 0.5 1.5
-1.5 0.5 0
-1.5 0 0.75
-2 0.5 0.75
-1.5 1 0.75
-1.5 0.5 1.5
-2.5 0.5 0
-2.5 0 0.75
-3 0.5 0.75
-2.5 1 0.75
-2.5 0.5 1.5
-3.5 0.5 0
-3.5 0 0.75
-4 0.5 0.75
-3.5 1 0.75
-3.5 0.5 1.5
-4.5 0.5 0
-4.5 0 0.75
-5 0.5 0.75
-4.5 1 0.75
-4.5 0.5 1.5
-5.5 0.5 0
-5.5 0 0.75
-6 0.5 0.75
-5.5 1 0.75
-5.5 0.5 1.5
-6.5 0.5 0
-6.5 0 0.75
-7 0.5 0.75
-6.5 1 0.75
-6.5 0.5 1.5
-0.5 1.5 0
-1 1.5 0.75
-0.5 2 0.75
-0 1.5 0.75
-0.5 1.5 1.5
-1.5 1.5 0
-2 1.5 0.75
-1.5 2 0.75
-1.5 1.5 1.5
-2.5 1.5 0
-3 1.5 0.75
-2.5 2 0.75
-2.5 1.5 1.5
-3.5 1.5 0
-4 1.5 0.75
-3.5 2 0.75
-3.5 1.5 1.5
-4.5 1.5 0
-5 1.5 0.75
-4.5 2 0.75
-4.5 1.5 1.5
-5.5 1.5 0
-6 1.5 0.75
-5.5 2 0.75
-5.5 1.5 1.5
-6.5 1.5 0
-7 1.5 0.75
-6.5 2 0.75
-6.5 1.5 1.5
-0.5 2.5 0
-1 2.5 0.75
-0.5 3 0.75
-0 2.5 0.75
-0.5 2.5 1.5
-1.5 2.5 0
-2 2.5 0.75
-1.5 3 0.75
-1.5 2.5 1.5
-2.5 2.5 0
-3 2.5 0.75
-2.5 3 0.75
-2.5 2.5 1.5
-3.5 2.5 0
-4 2.5 0.75
-3.5 3 0.75
-3.5 2.5 1.5
-4.5 2.5 0
-5 2.5 0.75
-4.5 3 0.75
-4.5 2.5 1.5
-5.5 2.5 0
-6 2.5 0.75
-5.5 3 0.75
-5.5 2.5 1.5
-6.5 2.5 0
-7 2.5 0.75
-6.5 3 0.75
-6.5 2.5 1.5
-0.5 0.5 0.75
-1.5 0.5 0.75
-2.5 0.5 0.75
-3.5 0.5 0.75
-4.5 0.5 0.75
-5.5 0.5 0.75
-6.5 0.5 0.75
-0.5 1.5 0.75
-1.5 1.5 0.75
-2.5 1.5 0.75
-3.5 1.5 0.75
-4.5 1.5 0.75
-5.5 1.5 0.75
-6.5 1.5 0.75
-0.5 2.5 0.75
-1.5 2.5 0.75
-2.5 2.5 0.75
-3.5 2.5 0.75
-4.5 2.5 0.75
-5.5 2.5 0.75
-6.5 2.5 0.75
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
+1.5
diff --git a/data/cube01_hex.mesh b/data/cube01_hex.mesh
index cd9813ce..032452ea 100644
--- a/data/cube01_hex.mesh
+++ b/data/cube01_hex.mesh
@@ -57,7 +57,7 @@ vertices
 
 nodes
 FiniteElementSpace
-FiniteElementCollection: Quadratic
+FiniteElementCollection: Linear
 VDim: 3
 Ordering: 0
 
@@ -88,122 +88,6 @@ Ordering: 0
 0
 0.5
 1
-0.25
-0.5
-0.25
-0
-0.25
-0.5
-0.25
-0
-0
-0.5
-0.5
-0
-0.75
-1
-0.75
-0.75
-1
-0.75
-1
-1
-0.5
-0.25
-0
-0.5
-0.25
-0
-0.5
-0
-1
-0.75
-1
-0.75
-1
-0.25
-0.5
-0.25
-0
-0
-0.5
-0.5
-0
-0.75
-1
-0.75
-1
-1
-0.5
-0.25
-0
-0.5
-0
-1
-0.75
-1
-0.25
-0.25
-0.5
-0.25
-0
-0.25
-0.75
-0.75
-1
-0.75
-0.75
-0.25
-0.5
-0.25
-0
-0.25
-0.75
-1
-0.75
-0.75
-0.25
-0.5
-0.25
-0
-0.25
-0.75
-1
-0.75
-0.75
-0.5
-0.25
-0
-0.25
-1
-0.75
-0.75
-0.25
-0.75
-0.25
-0.75
-0.25
-0.75
-0.25
-0.75
-0
-0
-0
-0.5
-0.5
-0.5
-1
-1
-1
-0
-0
-0
-0.5
-0.5
-0.5
-1
-1
-1
 0
 0
 0
@@ -214,131 +98,29 @@ Ordering: 0
 1
 1
 0
-0.25
-0.5
-0.25
 0
-0.25
-0.5
-0.25
-0
-0
-0.5
-0.5
 0
-0.25
 0.5
-0
-0.25
 0.5
-0
 0.5
-0.75
-1
-0.75
-0.75
-1
-0.75
-1
 1
-0.75
 1
-0.75
 1
-1
-0
-0.25
-0.5
-0.25
-0
 0
-0.5
-0.5
 0
-0.25
-0.5
 0
 0.5
-0.75
-1
-0.75
-1
-1
-0.75
-1
-1
-0.25
-0
-0.25
 0.5
-0.25
-0.25
-0.25
-0
-0.25
 0.5
-0.25
-0.75
-0.75
 1
-0.75
-0.75
-0.75
-0.75
 1
-0.75
-0
-0.25
-0.5
-0.25
-0.25
-0
-0.25
-0.5
-0.25
-0.75
 1
-0.75
-0.75
-0.75
-1
-0.75
-0.25
-0.25
-0.75
-0.75
-0.25
-0.25
-0.75
-0.75
-0
-0
-0
 0
 0
 0
 0
 0
 0
-0.5
-0.5
-0.5
-0.5
-0.5
-0.5
-0.5
-0.5
-0.5
-1
-1
-1
-1
-1
-1
-1
-1
-1
-0
 0
 0
 0
@@ -346,93 +128,17 @@ Ordering: 0
 0.5
 0.5
 0.5
-0.25
-0.25
-0.25
-0.25
-0
-0
-0
-0.5
-0.5
-0.5
-0.25
-0.25
-0
-0
-0
 0.5
 0.5
 0.5
-0.25
-0.25
-0
-0
 0.5
 0.5
-0.25
-1
-1
-1
-1
-0.75
-0.75
-0.75
-0.75
-1
-1
 1
-0.75
-0.75
 1
 1
 1
-0.75
-0.75
 1
 1
-0.75
-0
-0.25
-0.25
-0.25
-0.25
-0.5
-0
-0.25
-0.25
-0.25
-0.5
-0
-0.25
-0.25
-0.25
-0.5
-0
-0.25
-0.25
-0.5
-0.75
-0.75
-0.75
-0.75
-1
-0.75
-0.75
-0.75
 1
-0.75
-0.75
-0.75
 1
-0.75
-0.75
 1
-0.25
-0.25
-0.25
-0.25
-0.75
-0.75
-0.75
-0.75
diff --git a/data/cube_12_hex.mesh b/data/cube_12_hex.mesh
new file mode 100644
index 00000000..7d21140a
--- /dev/null
+++ b/data/cube_12_hex.mesh
@@ -0,0 +1,183 @@
+MFEM mesh v1.0
+
+#
+# MFEM Geometry Types (see mesh/geom.hpp):
+#
+# POINT       = 0
+# SEGMENT     = 1
+# TRIANGLE    = 2
+# SQUARE      = 3
+# TETRAHEDRON = 4
+# CUBE        = 5
+#
+
+dimension
+3
+
+elements
+12
+1 5 0 1 5 4 12 13 17 16
+1 5 1 2 6 5 13 14 18 17
+1 5 2 3 7 6 14 15 19 18
+1 5 4 5 9 8 16 17 21 20
+1 5 5 6 10 9 17 18 22 21
+1 5 6 7 11 10 18 19 23 22
+1 5 12 13 17 16 24 25 29 28
+1 5 13 14 18 17 25 26 30 29
+1 5 14 15 19 18 26 27 31 30
+1 5 16 17 21 20 28 29 33 32
+1 5 17 18 22 21 29 30 34 33
+1 5 18 19 23 22 30 31 35 34
+
+boundary
+32
+3 3 0 4 5 1
+3 3 1 5 6 2
+3 3 2 6 7 3
+3 3 4 8 9 5
+3 3 5 9 10 6
+3 3 6 10 11 7
+3 3 24 25 29 28
+3 3 25 26 30 29
+3 3 26 27 31 30
+3 3 28 29 33 32
+3 3 29 30 34 33
+3 3 30 31 35 34
+1 3 0 12 16 4
+1 3 4 16 20 8
+1 3 12 24 28 16
+1 3 16 28 32 20
+1 3 3 7 19 15
+1 3 7 11 23 19
+1 3 15 19 31 27
+1 3 19 23 35 31
+2 3 0 1 13 12
+2 3 12 13 25 24
+2 3 1 2 14 13
+2 3 13 14 26 25
+2 3 2 3 15 14
+2 3 14 15 27 26
+2 3 8 20 21 9
+2 3 20 32 33 21
+2 3 9 21 22 10
+2 3 21 33 34 22
+2 3 10 22 23 11
+2 3 22 34 35 23
+
+vertices
+36
+
+nodes
+FiniteElementSpace
+FiniteElementCollection: Linear
+VDim: 3
+Ordering: 0
+
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0.4
+0.8
+1.2
+0
+0
+0
+0
+0.6
+0.6
+0.6
+0.6
+1.2
+1.2
+1.2
+1.2
+0
+0
+0
+0
+0.6
+0.6
+0.6
+0.6
+1.2
+1.2
+1.2
+1.2
+0
+0
+0
+0
+0.6
+0.6
+0.6
+0.6
+1.2
+1.2
+1.2
+1.2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
diff --git a/data/cube_24_hex.mesh b/data/cube_24_hex.mesh
new file mode 100644
index 00000000..402be210
--- /dev/null
+++ b/data/cube_24_hex.mesh
@@ -0,0 +1,287 @@
+MFEM mesh v1.0
+
+#
+# MFEM Geometry Types (see mesh/geom.hpp):
+#
+# POINT       = 0
+# SEGMENT     = 1
+# TRIANGLE    = 2
+# SQUARE      = 3
+# TETRAHEDRON = 4
+# CUBE        = 5
+#
+
+dimension
+3
+
+elements
+24
+1 5 0 1 6 5 20 21 26 25
+1 5 1 2 7 6 21 22 27 26
+1 5 2 3 8 7 22 23 28 27
+1 5 3 4 9 8 23 24 29 28
+1 5 5 6 11 10 25 26 31 30
+1 5 6 7 12 11 26 27 32 31
+1 5 7 8 13 12 27 28 33 32
+1 5 8 9 14 13 28 29 34 33
+1 5 10 11 16 15 30 31 36 35
+1 5 11 12 17 16 31 32 37 36
+1 5 12 13 18 17 32 33 38 37
+1 5 13 14 19 18 33 34 39 38
+1 5 20 21 26 25 40 41 46 45
+1 5 21 22 27 26 41 42 47 46
+1 5 22 23 28 27 42 43 48 47
+1 5 23 24 29 28 43 44 49 48
+1 5 25 26 31 30 45 46 51 50
+1 5 26 27 32 31 46 47 52 51
+1 5 27 28 33 32 47 48 53 52
+1 5 28 29 34 33 48 49 54 53
+1 5 30 31 36 35 50 51 56 55
+1 5 31 32 37 36 51 52 57 56
+1 5 32 33 38 37 52 53 58 57
+1 5 33 34 39 38 53 54 59 58
+
+boundary
+52
+3 3 0 5 6 1
+3 3 1 6 7 2
+3 3 2 7 8 3
+3 3 3 8 9 4
+3 3 5 10 11 6
+3 3 6 11 12 7
+3 3 7 12 13 8
+3 3 8 13 14 9
+3 3 10 15 16 11
+3 3 11 16 17 12
+3 3 12 17 18 13
+3 3 13 18 19 14
+3 3 40 41 46 45
+3 3 41 42 47 46
+3 3 42 43 48 47
+3 3 43 44 49 48
+3 3 45 46 51 50
+3 3 46 47 52 51
+3 3 47 48 53 52
+3 3 48 49 54 53
+3 3 50 51 56 55
+3 3 51 52 57 56
+3 3 52 53 58 57
+3 3 53 54 59 58
+1 3 0 20 25 5
+1 3 5 25 30 10
+1 3 10 30 35 15
+1 3 20 40 45 25
+1 3 25 45 50 30
+1 3 30 50 55 35
+1 3 4 9 29 24
+1 3 9 14 34 29
+1 3 14 19 39 34
+1 3 24 29 49 44
+1 3 29 34 54 49
+1 3 34 39 59 54
+2 3 0 1 21 20
+2 3 20 21 41 40
+2 3 1 2 22 21
+2 3 21 22 42 41
+2 3 2 3 23 22
+2 3 22 23 43 42
+2 3 3 4 24 23
+2 3 23 24 44 43
+2 3 15 35 36 16
+2 3 35 55 56 36
+2 3 16 36 37 17
+2 3 36 56 57 37
+2 3 17 37 38 18
+2 3 37 57 58 38
+2 3 18 38 39 19
+2 3 38 58 59 39
+
+vertices
+60
+
+nodes
+FiniteElementSpace
+FiniteElementCollection: Linear
+VDim: 3
+Ordering: 0
+
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0.3
+0.6
+0.9
+1.2
+0
+0
+0
+0
+0
+0.4
+0.4
+0.4
+0.4
+0.4
+0.8
+0.8
+0.8
+0.8
+0.8
+1.2
+1.2
+1.2
+1.2
+1.2
+0
+0
+0
+0
+0
+0.4
+0.4
+0.4
+0.4
+0.4
+0.8
+0.8
+0.8
+0.8
+0.8
+1.2
+1.2
+1.2
+1.2
+1.2
+0
+0
+0
+0
+0
+0.4
+0.4
+0.4
+0.4
+0.4
+0.8
+0.8
+0.8
+0.8
+0.8
+1.2
+1.2
+1.2
+1.2
+1.2
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+0.6
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
+1.2
diff --git a/data/rectangle01_quad.mesh b/data/rectangle01_quad.mesh
index 997c2cfd..c8f199ad 100644
--- a/data/rectangle01_quad.mesh
+++ b/data/rectangle01_quad.mesh
@@ -66,112 +66,71 @@ vertices
 
 nodes
 FiniteElementSpace
-FiniteElementCollection: H1_2D_P2
+FiniteElementCollection: Linear
 VDim: 2
-Ordering: 1
+Ordering: 0
 
-0 0
-1 0
-2 0
-3 0
-4 0
-5 0
-6 0
-7 0
-0 1
-1 1
-2 1
-3 1
-4 1
-5 1
-6 1
-7 1
-0 2
-1 2
-2 2
-3 2
-4 2
-5 2
-6 2
-7 2
-0 3
-1 3
-2 3
-3 3
-4 3
-5 3
-6 3
-7 3
-0.5 0
-1 0.5
-0.5 1
-0 0.5
-1.5 0
-2 0.5
-1.5 1
-2.5 0
-3 0.5
-2.5 1
-3.5 0
-4 0.5
-3.5 1
-4.5 0
-5 0.5
-4.5 1
-5.5 0
-6 0.5
-5.5 1
-6.5 0
-7 0.5
-6.5 1
-1 1.5
-0.5 2
-0 1.5
-2 1.5
-1.5 2
-3 1.5
-2.5 2
-4 1.5
-3.5 2
-5 1.5
-4.5 2
-6 1.5
-5.5 2
-7 1.5
-6.5 2
-1 2.5
-0.5 3
-0 2.5
-2 2.5
-1.5 3
-3 2.5
-2.5 3
-4 2.5
-3.5 3
-5 2.5
-4.5 3
-6 2.5
-5.5 3
-7 2.5
-6.5 3
-0.5 0.5
-1.5 0.5
-2.5 0.5
-3.5 0.5
-4.5 0.5
-5.5 0.5
-6.5 0.5
-0.5 1.5
-1.5 1.5
-2.5 1.5
-3.5 1.5
-4.5 1.5
-5.5 1.5
-6.5 1.5
-0.5 2.5
-1.5 2.5
-2.5 2.5
-3.5 2.5
-4.5 2.5
-5.5 2.5
-6.5 2.5
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+1
+2
+3
+4
+5
+6
+7
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+2
+2
+2
+2
+3
+3
+3
+3
+3
+3
+3
+3
diff --git a/data/segment01.mesh b/data/segment01.mesh
index 0ccb1c16..9e567611 100644
--- a/data/segment01.mesh
+++ b/data/segment01.mesh
@@ -29,12 +29,10 @@ vertices
 
 nodes
 FiniteElementSpace
-FiniteElementCollection: H1_1D_P2
+FiniteElementCollection: Linear
 VDim: 1
-Ordering: 1
+Ordering: 0
 
 0
 0.5
 1
-0.25
-0.75
diff --git a/data/square01_quad.mesh b/data/square01_quad.mesh
index a3f32fff..4a9ee6ad 100644
--- a/data/square01_quad.mesh
+++ b/data/square01_quad.mesh
@@ -37,7 +37,7 @@ vertices
 
 nodes
 FiniteElementSpace
-FiniteElementCollection: Quadratic
+FiniteElementCollection: Linear
 VDim: 2
 Ordering: 0
 
@@ -50,22 +50,6 @@ Ordering: 0
 0
 0.5
 1
-0.25
-0.5
-0.25
-0
-0.75
-1
-0.75
-0.5
-0.25
-0
-1
-0.75
-0.25
-0.75
-0.25
-0.75
 0
 0
 0
@@ -75,19 +59,3 @@ Ordering: 0
 1
 1
 1
-0
-0.25
-0.5
-0.25
-0
-0.25
-0.5
-0.75
-1
-0.75
-0.75
-1
-0.25
-0.25
-0.75
-0.75

From b03efd9190131b147d01db0e4df405213d6e8b11 Mon Sep 17 00:00:00 2001
From: Tomov <tomov2@madara.llnl.gov>
Date: Thu, 23 Nov 2017 00:17:15 -0800
Subject: [PATCH 02/12] Extra options for parallel partitions, updated the
 vulcan 3D script.

---
 laghos.cpp                          | 45 ++++++++++++++++++++---
 timing/collect_timings_vulcan_3D.sh | 55 ++++++++++++++++++-----------
 2 files changed, 76 insertions(+), 24 deletions(-)

diff --git a/laghos.cpp b/laghos.cpp
index 4046ae77..7c8ca1d3 100644
--- a/laghos.cpp
+++ b/laghos.cpp
@@ -92,6 +92,7 @@ int main(int argc, char *argv[])
    bool visit = false;
    bool gfprint = false;
    const char *basename = "results/Laghos";
+   int partition_type = 222;
 
    OptionsParser args(argc, argv);
    args.AddOption(&mesh_file, "-m", "--mesh",
@@ -131,6 +132,8 @@ int main(int argc, char *argv[])
                   "Enable or disable result output (files in mfem format).");
    args.AddOption(&basename, "-k", "--outputfilename",
                   "Name of the visit dump files");
+   args.AddOption(&partition_type, "-pt", "--partition",
+                  "Customized x/y/z partition of the initial mesh.");
    args.Parse();
    if (!args.Good())
    {
@@ -157,13 +160,47 @@ int main(int argc, char *argv[])
    // Parallel partitioning of the mesh.
    ParMesh *pmesh = NULL;
    const int num_tasks = mpi.WorldSize();
-   const int partitions = floor(pow(num_tasks, 1.0 / dim) + 1e-2);
    int *nxyz = new int[dim];
+   switch (partition_type)
+   {
+      case 22:
+      case 222:
+      {
+         const int part = floor(pow(num_tasks, 1.0 / dim) + 1e-2);
+         for (int d = 0; d < dim; d++) { nxyz[d] = part; }
+         break;
+      }
+      case 322: // 3D.
+      {
+         const int min_part = floor(pow(2 * num_tasks / 3, 1.0 / 3) + 1e-2);
+         nxyz[0] = 3 * min_part / 2;
+         nxyz[1] = min_part;
+         nxyz[2] = min_part;
+         break;
+      }
+      case 432: // 3D.
+      {
+         const int min_part = floor(pow(num_tasks / 3, 1.0 / 3) + 1e-2);
+         nxyz[0] = 2 * min_part;
+         nxyz[1] = 3 * min_part / 2;
+         nxyz[2] = min_part;
+         break;
+      }
+      default:
+         if (myid == 0)
+         {
+            cout << "Unknown partition type: " << partition_type << '\n';
+         }
+         delete mesh;
+         MPI_Finalize();
+         return 3;
+   }
    int product = 1;
-   for (int d = 0; d < dim; d++)
+   for (int d = 0; d < dim; d++) { product *= nxyz[d]; }
+   if (my_id == 0)
    {
-      nxyz[d] = partitions;
-      product *= partitions;
+      cout << nxyz[0] << " " << nxyz[1] << " " << nxyz[2] << " "
+           << product << " " << num_tasks << endl;
    }
    if (product == num_tasks)
    {
diff --git a/timing/collect_timings_vulcan_3D.sh b/timing/collect_timings_vulcan_3D.sh
index b15aabed..ea151134 100755
--- a/timing/collect_timings_vulcan_3D.sh
+++ b/timing/collect_timings_vulcan_3D.sh
@@ -1,20 +1,37 @@
 #!/usr/bin/env bash
 
-options=( 'pa' 'fa' )
+# User input.
+# To get Cartesian mesh partitions:
+#   with the 222 partition, use 4/32/256/2048/16384 nodes.
+#   with the 432 partition, use 12/96/768/6144(quarter machine) nodes.
+#   with the 322 partition, use 6/48/384/3072/24576(full machine) nodes.
+part_type=222
+nodes=256
+minL2dof_node=100
+maxL2dof_node=200000
+# End of user input.
+
+# Make sure that the serial mesh has at least one zone per task.
+nproc=$(( 16 * nodes ))
+sref=0
+while (( 8**(sref+1) < nproc ))
+do
+  sref=$(( sref+1 ))
+done
+sref=$(( sref+3 ))
+echo "sref: "$sref "serial_nzones: "$(( 8**(sref+1) )) "nproc: "$nproc
 
-parallel_refs=0
-maxL2dof=1000000
-nproc=8
+minL2dof=$(( minL2dof_node * nodes ))
+maxL2dof=$(( maxL2dof_node * nodes ))
 
+options=( 'pa' 'fa' )
 outfile=timings_3d
 mesh_file=../data/cube01_hex.mesh
 
-calc() { awk "BEGIN{print $*}"; }
-
 run_case()
 {
     # Pass command as all inputs
-    # Outputs: order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate
+    # Outputs: order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate total_rate
 
     "$@" | tee run.log | awk '
 BEGIN { ref = 0 }
@@ -27,26 +44,24 @@ BEGIN { ref = 0 }
 /CG \(L2\) rate/ { l2_cg_rate = $9 }
 /Forces rate/ { forces_rate = $8 }
 /UpdateQuadData rate/ { update_quad_rate = $8 }
-/Major kernels total rate/ { total_time = $11 }
-END { printf("%d %d %d %d %.8f %.8f %.8f %.8f %.8f\n", order, ref, h1_dofs, l2_dofs, h1_cg_rate, l2_cg_rate, forces_rate, update_quad_rate, total_time) }'
+/Major kernels total rate/ { total_rate = $11 }
+END { printf("%d %d %d %d %.8f %.8f %.8f %.8f %.8f\n", order, ref, h1_dofs, l2_dofs, h1_cg_rate, l2_cg_rate, forces_rate, update_quad_rate, total_rate) }'
 }
 
-[ -r $outfile ] && cp $outfile $outfile.bak
-echo "# order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate total_time" > $outfile"_"${options[0]}
-echo "# order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate total_time" > $outfile"_"${options[1]}
+
 for method in "${options[@]}"; do
+  echo "# order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate total_rate" > $outfile"_"$method
   for torder in {0..4}; do
-    for sref in {0..10}; do
-       nzones=$(( 8**(sref+1) ))
+    for pref in {0..10}; do
+       nzones=$(( 8**(pref+sref+1) ))
        nL2dof=$(( nzones*(torder+1)**3 ))
-       if (( nproc <= nzones )) && (( nL2dof < maxL2dof )) ; then
-         echo "np"$nproc "Q"$((torder+1))"Q"$torder $sref"ref" $method $outfile"_"${options[0]}
-         echo $(run_case srun -n $nproc ../laghos -$method -p 1 -tf 0.8 \
+       if (( nproc <= nzones )) && (( nL2dof > minL2dof )) && (( nL2dof < maxL2dof )) ; then
+         echo "np"$nproc "Q"$((torder+1))"Q"$torder $pref"ref" $method $outfile"_"$method
+         echo $(run_case srun -n $nproc ../laghos -$method -p 1 -tf 0.8 -pt $part_type \
                        --cg-tol 0 --cg-max-steps 50 \
-                       --max-steps 10 \
+                       --max-steps 3 \
                        --mesh $mesh_file \
-                       --refine-serial $sref \
-                       --refine-parallel $parallel_refs \
+                       --refine-serial $sref --refine-parallel $pref \
                        --order-thermo $torder \
                        --order-kinematic $((torder+1))) >> $outfile"_"$method
       fi

From 31a76a0874fdb9591436b3591b5510ef57208a82 Mon Sep 17 00:00:00 2001
From: Tomov <tomov2@madara.llnl.gov>
Date: Thu, 23 Nov 2017 21:45:33 -0800
Subject: [PATCH 03/12] Improved the counters for the execution rates.
 Accumulating big numbers was causing issues for the large runs.

---
 laghos.cpp        |  2 +-
 laghos_solver.cpp | 24 +++++++++++-------------
 laghos_solver.hpp |  8 ++++----
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/laghos.cpp b/laghos.cpp
index 7c8ca1d3..601d20b6 100644
--- a/laghos.cpp
+++ b/laghos.cpp
@@ -197,7 +197,7 @@ int main(int argc, char *argv[])
    }
    int product = 1;
    for (int d = 0; d < dim; d++) { product *= nxyz[d]; }
-   if (my_id == 0)
+   if (myid == 0)
    {
       cout << nxyz[0] << " " << nxyz[1] << " " << nxyz[2] << " "
            << product << " " << num_tasks << endl;
diff --git a/laghos_solver.cpp b/laghos_solver.cpp
index e49d629a..58767a6a 100644
--- a/laghos_solver.cpp
+++ b/laghos_solver.cpp
@@ -238,7 +238,6 @@ void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
       timer.sw_force.Start();
       ForcePA.Mult(one, rhs);
       timer.sw_force.Stop();
-      timer.dof_tstep += H1FESpace.GlobalTrueVSize();
       rhs.Neg();
 
       Operator *cVMassPA;
@@ -251,8 +250,7 @@ void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
       timer.sw_cgH1.Start();
       cg.Mult(B, X);
       timer.sw_cgH1.Stop();
-      timer.H1dof_iter += cg.GetNumIterations() *
-                          H1FESpace.GlobalTrueVSize();
+      timer.H1cg_iter += cg.GetNumIterations();
       VMassPA.RecoverFEMSolution(X, rhs, dv);
       delete cVMassPA;
    }
@@ -261,7 +259,6 @@ void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
       timer.sw_force.Start();
       Force.Mult(one, rhs);
       timer.sw_force.Stop();
-      timer.dof_tstep += H1FESpace.GlobalTrueVSize();
       rhs.Neg();
 
       HypreParMatrix A;
@@ -274,8 +271,7 @@ void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
       timer.sw_cgH1.Start();
       cg.Mult(B, X);
       timer.sw_cgH1.Stop();
-      timer.H1dof_iter += cg.GetNumIterations() *
-                          H1FESpace.GlobalTrueVSize();
+      timer.H1cg_iter += cg.GetNumIterations();
       Mv.RecoverFEMSolution(X, rhs, dv);
    }
 
@@ -296,7 +292,6 @@ void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
       timer.sw_force.Start();
       ForcePA.MultTranspose(v, e_rhs);
       timer.sw_force.Stop();
-      timer.dof_tstep += L2FESpace.GlobalTrueVSize();
 
       if (e_source) { e_rhs += *e_source; }
       for (int z = 0; z < nzones; z++)
@@ -316,7 +311,6 @@ void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
       timer.sw_force.Start();
       Force.MultTranspose(v, e_rhs);
       timer.sw_force.Stop();
-      timer.dof_tstep += L2FESpace.GlobalTrueVSize();
       if (e_source) { e_rhs += *e_source; }
       for (int z = 0; z < nzones; z++)
       {
@@ -387,26 +381,30 @@ void LagrangianHydroOperator::PrintTimingData(bool IamRoot, int steps)
    my_rt[4] = my_rt[0] + my_rt[2] + my_rt[3];
    MPI_Reduce(my_rt, rt_max, 5, MPI_DOUBLE, MPI_MAX, 0, H1FESpace.GetComm());
 
-   double mydata[2], alldata[2];
+   int mydata[2], alldata[2];
    mydata[0] = timer.L2dof_iter;
    mydata[1] = timer.quad_tstep;
-   MPI_Reduce(mydata, alldata, 2, MPI_DOUBLE, MPI_SUM, 0, H1FESpace.GetComm());
+   MPI_Reduce(mydata, alldata, 2, MPI_INT, MPI_SUM, 0, H1FESpace.GetComm());
 
    if (IamRoot)
    {
+      const int H1gsize = H1FESpace.GlobalTrueVSize(),
+                L2gsize = L2FESpace.GlobalTrueVSize();
       using namespace std;
       cout << endl;
       cout << "CG (H1) total time: " << rt_max[0] << endl;
       cout << "CG (H1) rate (megadofs x cg_iterations / second): "
-           << 1e-6 * timer.H1dof_iter / rt_max[0] << endl;
+           << 1e-6 * H1gsize * timer.H1cg_iter / rt_max[0] << endl;
       cout << endl;
       cout << "CG (L2) total time: " << rt_max[1] << endl;
       cout << "CG (L2) rate (megadofs x cg_iterations / second): "
            << 1e-6 * alldata[0] / rt_max[1] << endl;
       cout << endl;
+      // The Force operator is applied twice per time step, on the H1 and the L2
+      // vectors, respectively.
       cout << "Forces total time: " << rt_max[2] << endl;
       cout << "Forces rate (megadofs x timesteps / second): "
-           << 1e-6 * timer.dof_tstep / rt_max[2] << endl;
+           << 1e-6 * steps * (H1gsize + L2gsize) / rt_max[2] << endl;
       cout << endl;
       cout << "UpdateQuadData total time: " << rt_max[3] << endl;
       cout << "UpdateQuadData rate (megaquads x timesteps / second): "
@@ -414,7 +412,7 @@ void LagrangianHydroOperator::PrintTimingData(bool IamRoot, int steps)
       cout << endl;
       cout << "Major kernels total time (seconds): " << rt_max[4] << endl;
       cout << "Major kernels total rate (megadofs x time steps / second): "
-           << 1e-6 * H1FESpace.GlobalTrueVSize() * steps / rt_max[4] << endl;
+           << 1e-6 * H1gsize * steps / rt_max[4] << endl;
    }
 }
 
diff --git a/laghos_solver.hpp b/laghos_solver.hpp
index 789cc91c..172213f5 100644
--- a/laghos_solver.hpp
+++ b/laghos_solver.hpp
@@ -54,13 +54,13 @@ struct TimingData
    StopWatch sw_cgH1, sw_cgL2, sw_force, sw_qdata;
 
    // These accumulate the total processed dofs or quad points:
-   // #dofs  * #(CG iterations) for the CG solves (H1 and L2).
-   // #dofs  * #(RK sub steps) for the Force application and assembly.
+   // #(CG iterations) for the H1 CG solve.
+   // #dofs  * #(CG iterations) for the L2 CG solve.
    // #quads * #(RK sub steps) for the quadrature data computations.
-   long long int H1dof_iter, L2dof_iter, dof_tstep, quad_tstep;
+   int H1cg_iter, L2dof_iter, quad_tstep;
 
    TimingData()
-      : H1dof_iter(0), L2dof_iter(0), dof_tstep(0), quad_tstep(0) { }
+      : H1cg_iter(0), L2dof_iter(0), quad_tstep(0) { }
 };
 
 // Given a solutions state (x, v, e), this class performs all necessary

From be3d54a9ec244a6e914a40b1d4711b9a55358e54 Mon Sep 17 00:00:00 2001
From: Tomov <tomov2@madara.llnl.gov>
Date: Thu, 23 Nov 2017 22:58:28 -0800
Subject: [PATCH 04/12] Another change to avoid big numbers.

---
 laghos_solver.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/laghos_solver.cpp b/laghos_solver.cpp
index 58767a6a..19fc1139 100644
--- a/laghos_solver.cpp
+++ b/laghos_solver.cpp
@@ -408,7 +408,7 @@ void LagrangianHydroOperator::PrintTimingData(bool IamRoot, int steps)
       cout << endl;
       cout << "UpdateQuadData total time: " << rt_max[3] << endl;
       cout << "UpdateQuadData rate (megaquads x timesteps / second): "
-           << 1e-6 * alldata[1] / rt_max[3] << endl;
+           << 1e-6 * alldata[1] * integ_rule.GetNPoints() / rt_max[3] << endl;
       cout << endl;
       cout << "Major kernels total time (seconds): " << rt_max[4] << endl;
       cout << "Major kernels total rate (megadofs x time steps / second): "
@@ -609,7 +609,7 @@ void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
    quad_data_is_current = true;
 
    timer.sw_qdata.Stop();
-   timer.quad_tstep += nzones * nqp;
+   timer.quad_tstep += nzones;
 }
 
 } // namespace hydrodynamics

From 9b761aabe0d64f146fd75dbf25a06c0928704355 Mon Sep 17 00:00:00 2001
From: Vladimir Tomov <tomov2@vulcanlac8.llnl.gov>
Date: Sun, 26 Nov 2017 22:55:49 -0800
Subject: [PATCH 05/12] Added the new mesh options in the script.

---
 laghos_solver.cpp                   |  1 +
 timing/collect_timings_vulcan_3D.sh | 22 +++++++++++++++-------
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/laghos_solver.cpp b/laghos_solver.cpp
index 19fc1139..228524bf 100644
--- a/laghos_solver.cpp
+++ b/laghos_solver.cpp
@@ -103,6 +103,7 @@ LagrangianHydroOperator::LagrangianHydroOperator(int size,
      VMassPA(&quad_data, H1FESpace), locEMassPA(&quad_data, l2_fes),
      locCG(), timer()
 {
+   cout << nzones << endl;
    GridFunctionCoefficient rho_coeff(&rho0);
 
    // Standard local assembly and inversion for energy mass matrices.
diff --git a/timing/collect_timings_vulcan_3D.sh b/timing/collect_timings_vulcan_3D.sh
index ea151134..9ed594ac 100755
--- a/timing/collect_timings_vulcan_3D.sh
+++ b/timing/collect_timings_vulcan_3D.sh
@@ -5,28 +5,37 @@
 #   with the 222 partition, use 4/32/256/2048/16384 nodes.
 #   with the 432 partition, use 12/96/768/6144(quarter machine) nodes.
 #   with the 322 partition, use 6/48/384/3072/24576(full machine) nodes.
-part_type=222
-nodes=256
+part_type=432
+nodes=6144
 minL2dof_node=100
 maxL2dof_node=200000
 # End of user input.
 
+if (( part_type == 222 )); then
+  nzones0=8
+  mesh_file=../data/cube01_hex.mesh
+elif (( part_type == 432 )); then
+  nzones0=24
+  mesh_file=../data/cube_24_hex.mesh
+elif ((part_type == 322 )); then
+  nzones0=12
+  mesh_file=../data/cube_12_hex.mesh
+fi
+
 # Make sure that the serial mesh has at least one zone per task.
 nproc=$(( 16 * nodes ))
 sref=0
-while (( 8**(sref+1) < nproc ))
+while (( nzones0 * 8**(sref) < nproc ))
 do
   sref=$(( sref+1 ))
 done
-sref=$(( sref+3 ))
-echo "sref: "$sref "serial_nzones: "$(( 8**(sref+1) )) "nproc: "$nproc
+echo "sref: "$sref "serial_nzones: "$(( nzones0 * 8**(sref) )) "nproc: "$nproc
 
 minL2dof=$(( minL2dof_node * nodes ))
 maxL2dof=$(( maxL2dof_node * nodes ))
 
 options=( 'pa' 'fa' )
 outfile=timings_3d
-mesh_file=../data/cube01_hex.mesh
 
 run_case()
 {
@@ -48,7 +57,6 @@ BEGIN { ref = 0 }
 END { printf("%d %d %d %d %.8f %.8f %.8f %.8f %.8f\n", order, ref, h1_dofs, l2_dofs, h1_cg_rate, l2_cg_rate, forces_rate, update_quad_rate, total_rate) }'
 }
 
-
 for method in "${options[@]}"; do
   echo "# order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate total_rate" > $outfile"_"$method
   for torder in {0..4}; do

From ae540cbe78157e1344f191e7c66b68d80b6e401d Mon Sep 17 00:00:00 2001
From: Vladimir Tomov <tomov2@llnl.gov>
Date: Mon, 27 Nov 2017 11:04:49 -0800
Subject: [PATCH 06/12] Small verification.

---
 laghos.cpp        | 8 ++++++++
 laghos_solver.cpp | 1 -
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/laghos.cpp b/laghos.cpp
index 601d20b6..ed0f165b 100644
--- a/laghos.cpp
+++ b/laghos.cpp
@@ -168,6 +168,7 @@ int main(int argc, char *argv[])
       {
          const int part = floor(pow(num_tasks, 1.0 / dim) + 1e-2);
          for (int d = 0; d < dim; d++) { nxyz[d] = part; }
+         if (dim == 2) { nxyz[2] = 0; }
          break;
       }
       case 322: // 3D.
@@ -229,6 +230,13 @@ int main(int argc, char *argv[])
    // Refine the mesh further in parallel to increase the resolution.
    for (int lev = 0; lev < rp_levels; lev++) { pmesh->UniformRefinement(); }
 
+   int nzones = pmesh->GetNE(), nzones_min, nzones_max;
+   MPI_Reduce(&nzones, &nzones_min, 1, MPI_INT, MPI_MIN, 0, pmesh->GetComm());
+   MPI_Reduce(&nzones, &nzones_max, 1, MPI_INT, MPI_MAX, 0, pmesh->GetComm());
+   if (myid == 0)
+   { cout << "Zones min/max: " << nzones_min << " " << nzones_max << endl; }
+
+
    // Define the parallel finite element spaces. We use:
    // - H1 (Gauss-Lobatto, continuous) for position and velocity.
    // - L2 (Bernstein, discontinuous) for specific internal energy.
diff --git a/laghos_solver.cpp b/laghos_solver.cpp
index 228524bf..19fc1139 100644
--- a/laghos_solver.cpp
+++ b/laghos_solver.cpp
@@ -103,7 +103,6 @@ LagrangianHydroOperator::LagrangianHydroOperator(int size,
      VMassPA(&quad_data, H1FESpace), locEMassPA(&quad_data, l2_fes),
      locCG(), timer()
 {
-   cout << nzones << endl;
    GridFunctionCoefficient rho_coeff(&rho0);
 
    // Standard local assembly and inversion for energy mass matrices.

From 4ff3b55b3c482453018e390b0dfdc47e67a24373 Mon Sep 17 00:00:00 2001
From: Vladimir Tomov <tomov2@llnl.gov>
Date: Mon, 27 Nov 2017 13:22:17 -0800
Subject: [PATCH 07/12] More changes to avoid int overflows.

---
 laghos_solver.cpp                   | 9 +++++----
 timing/collect_timings_vulcan_3D.sh | 5 +++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/laghos_solver.cpp b/laghos_solver.cpp
index 19fc1139..64ba6f62 100644
--- a/laghos_solver.cpp
+++ b/laghos_solver.cpp
@@ -381,15 +381,16 @@ void LagrangianHydroOperator::PrintTimingData(bool IamRoot, int steps)
    my_rt[4] = my_rt[0] + my_rt[2] + my_rt[3];
    MPI_Reduce(my_rt, rt_max, 5, MPI_DOUBLE, MPI_MAX, 0, H1FESpace.GetComm());
 
-   int mydata[2], alldata[2];
+   HYPRE_Int mydata[2], alldata[2];
    mydata[0] = timer.L2dof_iter;
    mydata[1] = timer.quad_tstep;
-   MPI_Reduce(mydata, alldata, 2, MPI_INT, MPI_SUM, 0, H1FESpace.GetComm());
+   MPI_Reduce(mydata, alldata, 2, HYPRE_MPI_INT, MPI_SUM, 0,
+              H1FESpace.GetComm());
 
    if (IamRoot)
    {
-      const int H1gsize = H1FESpace.GlobalTrueVSize(),
-                L2gsize = L2FESpace.GlobalTrueVSize();
+      const HYPRE_Int H1gsize = H1FESpace.GlobalTrueVSize(),
+                      L2gsize = L2FESpace.GlobalTrueVSize();
       using namespace std;
       cout << endl;
       cout << "CG (H1) total time: " << rt_max[0] << endl;
diff --git a/timing/collect_timings_vulcan_3D.sh b/timing/collect_timings_vulcan_3D.sh
index 9ed594ac..6481b7c4 100755
--- a/timing/collect_timings_vulcan_3D.sh
+++ b/timing/collect_timings_vulcan_3D.sh
@@ -7,7 +7,7 @@
 #   with the 322 partition, use 6/48/384/3072/24576(full machine) nodes.
 part_type=432
 nodes=6144
-minL2dof_node=100
+minL2dof_node=50
 maxL2dof_node=200000
 # End of user input.
 
@@ -34,7 +34,8 @@ echo "sref: "$sref "serial_nzones: "$(( nzones0 * 8**(sref) )) "nproc: "$nproc
 minL2dof=$(( minL2dof_node * nodes ))
 maxL2dof=$(( maxL2dof_node * nodes ))
 
-options=( 'pa' 'fa' )
+#options=( 'pa' 'fa' )
+options=( 'pa' )
 outfile=timings_3d
 
 run_case()

From cc2189ec0b6f16a7cae75c5a5d83f81b40a39dbc Mon Sep 17 00:00:00 2001
From: Vladimir Tomov <tomov2@vulcanlac8.llnl.gov>
Date: Fri, 1 Dec 2017 17:57:21 -0800
Subject: [PATCH 08/12] Updated the vulcan 3D script.

---
 timing/collect_timings_vulcan_3D.sh | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/timing/collect_timings_vulcan_3D.sh b/timing/collect_timings_vulcan_3D.sh
index 6481b7c4..2d328a33 100755
--- a/timing/collect_timings_vulcan_3D.sh
+++ b/timing/collect_timings_vulcan_3D.sh
@@ -5,10 +5,13 @@
 #   with the 222 partition, use 4/32/256/2048/16384 nodes.
 #   with the 432 partition, use 12/96/768/6144(quarter machine) nodes.
 #   with the 322 partition, use 6/48/384/3072/24576(full machine) nodes.
-part_type=432
-nodes=6144
-minL2dof_node=50
+part_type=322
+nodes=24576
+l2orders=(1 2 3)
+minL2dof_node=0
 maxL2dof_node=200000
+steps=2
+cg_iter=50
 # End of user input.
 
 if (( part_type == 222 )); then
@@ -34,8 +37,7 @@ echo "sref: "$sref "serial_nzones: "$(( nzones0 * 8**(sref) )) "nproc: "$nproc
 minL2dof=$(( minL2dof_node * nodes ))
 maxL2dof=$(( maxL2dof_node * nodes ))
 
-#options=( 'pa' 'fa' )
-options=( 'pa' )
+options=( 'pa' 'fa' )
 outfile=timings_3d
 
 run_case()
@@ -43,7 +45,7 @@ run_case()
     # Pass command as all inputs
     # Outputs: order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate total_rate
 
-    "$@" | tee run.log | awk '
+    "$@" | tee -a run.log | awk '
 BEGIN { ref = 0 }
 /--refine-serial/ { ref += $2 }
 /--refine-parallel/ { ref += $2 }
@@ -60,15 +62,17 @@ END { printf("%d %d %d %d %.8f %.8f %.8f %.8f %.8f\n", order, ref, h1_dofs, l2_d
 
 for method in "${options[@]}"; do
   echo "# order refs h1_dofs l2_dofs h1_cg_rate l2_cg_rate forces_rate update_quad_rate total_rate" > $outfile"_"$method
-  for torder in {0..4}; do
+  for torder in ${l2orders[@]}; do
     for pref in {0..10}; do
-       nzones=$(( 8**(pref+sref+1) ))
+       nzones=$(( 8**(pref+sref)*nzones0 ))
        nL2dof=$(( nzones*(torder+1)**3 ))
        if (( nproc <= nzones )) && (( nL2dof > minL2dof )) && (( nL2dof < maxL2dof )) ; then
          echo "np"$nproc "Q"$((torder+1))"Q"$torder $pref"ref" $method $outfile"_"$method
+# bigger runs need srun --reservation=...
+#         echo $(run_case srun -n $nproc --reservation=ascasc-dat ../laghos -$method -p 1 -tf 0.8 -pt $part_type \
          echo $(run_case srun -n $nproc ../laghos -$method -p 1 -tf 0.8 -pt $part_type \
-                       --cg-tol 0 --cg-max-steps 50 \
-                       --max-steps 3 \
+                       --cg-tol 0 --cg-max-steps $cg_iter \
+                       --max-steps $steps \
                        --mesh $mesh_file \
                        --refine-serial $sref --refine-parallel $pref \
                        --order-thermo $torder \

From 9e18e83bd461557dc02bbff2318cf89ff3440a06 Mon Sep 17 00:00:00 2001
From: Tzanio <tzanio@llnl.gov>
Date: Thu, 7 Dec 2017 12:52:25 -0800
Subject: [PATCH 09/12] Updated README

---
 README.md | 65 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 219ef588..3fe7946f 100644
--- a/README.md
+++ b/README.md
@@ -17,9 +17,9 @@ discretization and explicit high-order time-stepping.
 
 Laghos is based on the discretization method described in the following article:
 
-> V. Dobrev, Tz. Kolev and R. Rieben,<br>
-> [High-order curvilinear finite element methods for Lagrangian hydrodynamics](https://doi.org/10.1137/120864672), <br>
-> *SIAM Journal on Scientific Computing*, (34) 2012, pp.B606–B641.
+> V. Dobrev, Tz. Kolev and R. Rieben <br>
+> [High-order curvilinear finite element methods for Lagrangian hydrodynamics](https://doi.org/10.1137/120864672) <br>
+> *SIAM Journal on Scientific Computing*, (34) 2012, pp. B606–B641.
 
 Laghos captures the basic structure of many compressible shock hydrocodes,
 including the [BLAST code](http://llnl.gov/casc/blast) at [Lawrence Livermore
@@ -54,10 +54,9 @@ Laghos supports two options for deriving and solving the ODE system, namely the
 algorithm of interest for high orders. For low orders (e.g. 2nd order in 3D),
 both algorithms are of interest.
 
-The full assembly options relies on constructing and utilizing global mass and
-force matrices stored in compressed sparse row (CSR) format.
-
-The [partial assembly](http://ceed.exascaleproject.org/ceed-code) option defines
+The full assembly option relies on constructing and utilizing global mass and
+force matrices stored in compressed sparse row (CSR) format.  In contrast, the
+[partial assembly](http://ceed.exascaleproject.org/ceed-code) option defines
 only the local action of those matrices, which is then used to perform all
 necessary operations. As the local action is defined by utilizing the tensor
 structure of the finite element spaces, the amount of data storage, memory
@@ -86,14 +85,14 @@ Other computational motives in Laghos include the following:
   preparation and the application costs are important for this operator.
 - Domain-decomposed MPI parallelism.
 - Optional in-situ visualization with [GLVis](http:/glvis.org) and data output
-  for visualization / data analysis with [VisIt](http://visit.llnl.gov).
+  for visualization and data analysis with [VisIt](http://visit.llnl.gov).
 
 ## Code Structure
 
 - The file `laghos.cpp` contains the main driver with the time integration loop
-  starting around line 370.
+  starting around line 422.
 - In each time step, the ODE system of interest is constructed and solved by
-  the class `LagrangianHydroOperator`, defined around line 312 of `laghos.cpp`
+  the class `LagrangianHydroOperator`, defined around line 366 of `laghos.cpp`
   and implemented in files `laghos_solver.hpp` and `laghos_solver.cpp`.
 - All quadrature-based computations are performed in the function
   `LagrangianHydroOperator::UpdateQuadratureData` in `laghos_solver.cpp`.
@@ -131,7 +130,7 @@ To build the miniapp, first download *hypre* and METIS from the links above
 and put everything on the same level as Laghos:
 ```sh
 ~> ls
-Laghos/ hypre-2.10.0b.tar.gz   metis-4.0.tar.gz
+Laghos/  hypre-2.10.0b.tar.gz  metis-4.0.tar.gz
 ```
 
 Build *hypre*:
@@ -142,6 +141,8 @@ Build *hypre*:
 ~/hypre-2.10.0b/src> make -j
 ~/hypre-2.10.0b/src> cd ../..
 ```
+For large runs (problem size above 2 billion unknowns), add the
+`--enable-bigint` option to the above `configure` line.
 
 Build METIS:
 ```sh
@@ -151,11 +152,14 @@ Build METIS:
 ~/metis-4.0.3> cd ..
 ~> ln -s metis-4.0.3 metis-4.0
 ```
+This build is optional, as MFEM can be build without METIS by specifying
+`MFEM_USE_METIS = NO` below.
 
-Clone and build the parallel version of MFEM:
+Clone and build the parallel version of MFEM starting from the `laghos-v1.0` tag:
 ```sh
 ~> git clone git@github.com:mfem/mfem.git ./mfem
 ~> cd mfem/
+~/mfem> git checkout laghos-v1.0
 ~/mfem> make parallel -j
 ~/mfem> cd ..
 ```
@@ -245,16 +249,17 @@ round-off distance from the above reference values.
 
 ## Performance Timing and FOM
 
-Each time step in Laghos contains 4 major distinct computations:
+Each time step in Laghos contains 3 major distinct computations:
 
 1. The inversion of the global kinematic mass matrix (CG H1).
-2. The inversion of the local thermodynamic mass matrices (CG L2).
-3. The force operator evaluation from degrees of freedom to quadrature points (Forces).
-4. The physics kernel in quadrature points (UpdateQuadData).
+2. The force operator evaluation from degrees of freedom to quadrature points (Forces).
+3. The physics kernel in quadrature points (UpdateQuadData).
 
 By default Laghos is instrumented to report the total execution times and rates,
-in terms of millions of degrees of freedom (megadofs), for each of these
-computational phases.
+in terms of millions of degrees of freedom per second (megadofs), for each of
+these computational phases. (The time for inversion of the local thermodynamic
+mass matrices (CG L2) is also reported, but that takes a small part of the
+overall computation.)
 
 Laghos also reports the total rate for these major kernels, which is a proposed
 **Figure of Merit (FOM)** for benchmarking purposes.  Given a computational
@@ -262,13 +267,35 @@ allocation, the FOM should be reported for different problem sizes and finite
 element orders, as illustrated in the sample scripts in the [timing](./timing)
 directory.
 
+A sample run on the [Vulcan](https://computation.llnl.gov/computers/vulcan) BG/Q
+machine at LLNL is:
+
+```
+srun -n 393216 laghos -pa -p 1 -tf 0.6 -no-vis
+                      -pt 322 -m data/cube_12_hex.mesh
+                      --cg-tol 0 --cg-max-iter 50 --max-steps 2
+                      -ok 3 -ot 2 -rs 5 -rp 3
+```
+This is Q3-Q2 3D computation on 393,216 MPI ranks (24,576 nodes) that produces
+rates of approximately 168497, 74221, and 16696 megadofs, and a total FOM of
+about 2073.
+
+To make the above run 8 times bigger, one can either weak scale by using 8 times
+as many MPI tasks and increasing the number of serial refinements: `srun -n
+3145728 ... -rs 6 -rp 3`, or use the same number of MPI tasks but increase the
+local problem on each of them by doing more parallel refinements: `srun -n
+393216 ... -rs 5 -rp 4`.
+
 ## Versions
 
 In addition to the main MPI-based CPU implementation in https://github.com/CEED/Laghos,
 the following versions of Laghos have been developed
 
 - A serial version in the [serial](./serial) directory.
-- [GPU version](https://github.com/dmed256/Laghos/tree/occa-dev) based on [OCCA](http://libocca.org/).
+- [GPU version](https://github.com/dmed256/Laghos/tree/occa-dev) based on
+  [OCCA](http://libocca.org/).
+- A [RAJA](https://software.llnl.gov/RAJA/)-based version in the
+  [raja-dev](https://github.com/CEED/Laghos/tree/raja-dev) branch.
 
 ## Contact
 

From 4fe30b6a7a4188840a28006f726cfb512b538eb4 Mon Sep 17 00:00:00 2001
From: Vladimir Tomov <tomov2@llnl.gov>
Date: Thu, 7 Dec 2017 13:53:00 -0800
Subject: [PATCH 10/12] More MPI partition options and improved comments in the
 --help.

---
 laghos.cpp | 51 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/laghos.cpp b/laghos.cpp
index ed0f165b..f3adc44d 100644
--- a/laghos.cpp
+++ b/laghos.cpp
@@ -92,7 +92,7 @@ int main(int argc, char *argv[])
    bool visit = false;
    bool gfprint = false;
    const char *basename = "results/Laghos";
-   int partition_type = 222;
+   int partition_type = 111;
 
    OptionsParser args(argc, argv);
    args.AddOption(&mesh_file, "-m", "--mesh",
@@ -133,7 +133,14 @@ int main(int argc, char *argv[])
    args.AddOption(&basename, "-k", "--outputfilename",
                   "Name of the visit dump files");
    args.AddOption(&partition_type, "-pt", "--partition",
-                  "Customized x/y/z partition of the initial mesh.");
+      "Customized x/y/z Cartesian MPI partitioning of the serial mesh.\n\t"
+      "Here x,y,z are relative task ratios in each direction.\n\t"
+      "Example: with 48 mpi tasks and -pt 321, one would get a Cartesian\n\t"
+      "partition of the serial mesh by (6,4,2) MPI tasks in (x,y,z).\n\t"
+      "NOTE: the serially refined mesh must have the appropriate number\n\t"
+      "of zones in each direction, e.g., the number of zones in direction x\n\t"
+      "must be divisible by the number of MPI tasks in direction x.\n\t"
+      "Available options: 11, 21, 111, 211, 321, 322, 432.");
    args.Parse();
    if (!args.Good())
    {
@@ -159,34 +166,36 @@ int main(int argc, char *argv[])
 
    // Parallel partitioning of the mesh.
    ParMesh *pmesh = NULL;
-   const int num_tasks = mpi.WorldSize();
+   const int num_tasks = mpi.WorldSize(); int unit;
    int *nxyz = new int[dim];
    switch (partition_type)
    {
-      case 22:
-      case 222:
-      {
-         const int part = floor(pow(num_tasks, 1.0 / dim) + 1e-2);
-         for (int d = 0; d < dim; d++) { nxyz[d] = part; }
+      case 11:
+      case 111:
+         unit = floor(pow(num_tasks, 1.0 / dim) + 1e-2);
+         for (int d = 0; d < dim; d++) { nxyz[d] = unit; }
          if (dim == 2) { nxyz[2] = 0; }
          break;
-      }
+      case 21: // 2D
+         unit = floor(pow(num_tasks / 2, 1.0 / 2) + 1e-2);
+         nxyz[0] = 2 * unit; nxyz[1] = unit; nxyz[2] = 0;
+         break;
+     case 211: // 3D.
+         unit = floor(pow(num_tasks / 2, 1.0 / 3) + 1e-2);
+         nxyz[0] = 2 * unit; nxyz[1] = unit; nxyz[2] = unit;
+         break;
+      case 321: // 3D.
+         unit = floor(pow(num_tasks / 6, 1.0 / 3) + 1e-2);
+         nxyz[0] = 3 * unit; nxyz[1] = 2 * unit; nxyz[2] = unit;
+         break;
       case 322: // 3D.
-      {
-         const int min_part = floor(pow(2 * num_tasks / 3, 1.0 / 3) + 1e-2);
-         nxyz[0] = 3 * min_part / 2;
-         nxyz[1] = min_part;
-         nxyz[2] = min_part;
+         unit = floor(pow(2 * num_tasks / 3, 1.0 / 3) + 1e-2);
+         nxyz[0] = 3 * unit / 2; nxyz[1] = unit; nxyz[2] = unit;
          break;
-      }
       case 432: // 3D.
-      {
-         const int min_part = floor(pow(num_tasks / 3, 1.0 / 3) + 1e-2);
-         nxyz[0] = 2 * min_part;
-         nxyz[1] = 3 * min_part / 2;
-         nxyz[2] = min_part;
+         unit = floor(pow(num_tasks / 3, 1.0 / 3) + 1e-2);
+         nxyz[0] = 2 * unit; nxyz[1] = 3 * unit / 2; nxyz[2] = unit;
          break;
-      }
       default:
          if (myid == 0)
          {

From aba0bdee9ae8212a3e538ad3724d072d678729da Mon Sep 17 00:00:00 2001
From: Tzanio <tzanio@llnl.gov>
Date: Thu, 7 Dec 2017 14:15:07 -0800
Subject: [PATCH 11/12] Minor adjustments

---
 README.md                    | 13 +++++++------
 laghos.cpp                   | 22 +++++++++++-----------
 laghos_assembly.cpp          | 10 +++++-----
 laghos_assembly.hpp          |  8 ++++----
 laghos_solver.cpp            |  6 +++---
 laghos_solver.hpp            |  5 ++---
 serial/laghos_assembly_s.cpp | 10 +++++-----
 serial/laghos_assembly_s.hpp | 16 ++++++----------
 serial/laghos_s.cpp          |  4 ++--
 serial/laghos_solver_s.cpp   |  6 +++---
 serial/laghos_solver_s.hpp   |  4 ++--
 11 files changed, 50 insertions(+), 54 deletions(-)

diff --git a/README.md b/README.md
index 3fe7946f..5259a8df 100644
--- a/README.md
+++ b/README.md
@@ -90,9 +90,9 @@ Other computational motives in Laghos include the following:
 ## Code Structure
 
 - The file `laghos.cpp` contains the main driver with the time integration loop
-  starting around line 422.
+  starting around line 431.
 - In each time step, the ODE system of interest is constructed and solved by
-  the class `LagrangianHydroOperator`, defined around line 366 of `laghos.cpp`
+  the class `LagrangianHydroOperator`, defined around line 375 of `laghos.cpp`
   and implemented in files `laghos_solver.hpp` and `laghos_solver.cpp`.
 - All quadrature-based computations are performed in the function
   `LagrangianHydroOperator::UpdateQuadratureData` in `laghos_solver.cpp`.
@@ -118,7 +118,7 @@ Other computational motives in Laghos include the following:
 Laghos has the following external dependencies:
 
 - *hypre*, used for parallel linear algebra, we recommend version 2.10.0b<br>
-   https://computation.llnl.gov/casc/hypre/software.html,
+   https://computation.llnl.gov/casc/hypre/software.html
 
 -  METIS, used for parallel domain decomposition (optional), we recommend [version 4.0.3](http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/OLD/metis-4.0.3.tar.gz) <br>
    http://glaros.dtc.umn.edu/gkhome/metis/metis/download
@@ -163,14 +163,15 @@ Clone and build the parallel version of MFEM starting from the `laghos-v1.0` tag
 ~/mfem> make parallel -j
 ~/mfem> cd ..
 ```
+For more details, see the [MFEM building page](http://mfem.org/building/).
 
 Build Laghos
 ```sh
 ~> cd Laghos/
-~> make
+~/Laghos> make
 ```
-
-For more details, see the [MFEM building page](http://mfem.org/building/).
+This can be followed by `make test` and `make install` to check and install the
+build respectively. See `make help` for additional options.
 
 ## Running
 
diff --git a/laghos.cpp b/laghos.cpp
index f3adc44d..3df67811 100644
--- a/laghos.cpp
+++ b/laghos.cpp
@@ -31,7 +31,7 @@
 //
 //    V. Dobrev, Tz. Kolev and R. Rieben, "High-order curvilinear finite element
 //    methods for Lagrangian hydrodynamics", SIAM Journal on Scientific
-//    Computing, (34) 2012, pp.B606–B641, https://doi.org/10.1137/120864672.
+//    Computing, (34) 2012, pp. B606–B641, https://doi.org/10.1137/120864672.
 //
 // Sample runs:
 //    mpirun -np 8 laghos -p 0 -m data/square01_quad.mesh -rs 3 -tf 0.75
@@ -133,14 +133,14 @@ int main(int argc, char *argv[])
    args.AddOption(&basename, "-k", "--outputfilename",
                   "Name of the visit dump files");
    args.AddOption(&partition_type, "-pt", "--partition",
-      "Customized x/y/z Cartesian MPI partitioning of the serial mesh.\n\t"
-      "Here x,y,z are relative task ratios in each direction.\n\t"
-      "Example: with 48 mpi tasks and -pt 321, one would get a Cartesian\n\t"
-      "partition of the serial mesh by (6,4,2) MPI tasks in (x,y,z).\n\t"
-      "NOTE: the serially refined mesh must have the appropriate number\n\t"
-      "of zones in each direction, e.g., the number of zones in direction x\n\t"
-      "must be divisible by the number of MPI tasks in direction x.\n\t"
-      "Available options: 11, 21, 111, 211, 321, 322, 432.");
+                  "Customized x/y/z Cartesian MPI partitioning of the serial mesh.\n\t"
+                  "Here x,y,z are relative task ratios in each direction.\n\t"
+                  "Example: with 48 mpi tasks and -pt 321, one would get a Cartesian\n\t"
+                  "partition of the serial mesh by (6,4,2) MPI tasks in (x,y,z).\n\t"
+                  "NOTE: the serially refined mesh must have the appropriate number\n\t"
+                  "of zones in each direction, e.g., the number of zones in direction x\n\t"
+                  "must be divisible by the number of MPI tasks in direction x.\n\t"
+                  "Available options: 11, 21, 111, 211, 321, 322, 432.");
    args.Parse();
    if (!args.Good())
    {
@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
          unit = floor(pow(num_tasks / 2, 1.0 / 2) + 1e-2);
          nxyz[0] = 2 * unit; nxyz[1] = unit; nxyz[2] = 0;
          break;
-     case 211: // 3D.
+      case 211: // 3D.
          unit = floor(pow(num_tasks / 2, 1.0 / 3) + 1e-2);
          nxyz[0] = 2 * unit; nxyz[1] = unit; nxyz[2] = unit;
          break;
@@ -407,7 +407,7 @@ int main(int argc, char *argv[])
                      "Specific Internal Energy", Wx, Wy, Ww, Wh);
    }
 
-   // Save data for VisIt visualization
+   // Save data for VisIt visualization.
    VisItDataCollection visit_dc(basename, pmesh);
    if (visit)
    {
diff --git a/laghos_assembly.cpp b/laghos_assembly.cpp
index 37db4148..6888ba46 100644
--- a/laghos_assembly.cpp
+++ b/laghos_assembly.cpp
@@ -360,7 +360,7 @@ void ForcePAOperator::MultTranspose(const Vector &vecH1, Vector &vecL2) const
    else { MFEM_ABORT("Unsupported dimension"); }
 }
 
-// Force matrix action on quadrilateral elements in 2D
+// Force matrix action on quadrilateral elements in 2D.
 void ForcePAOperator::MultQuad(const Vector &vecL2, Vector &vecH1) const
 {
    const int nH1dof1D = tensors1D->HQshape1D.Height(),
@@ -429,7 +429,7 @@ void ForcePAOperator::MultQuad(const Vector &vecL2, Vector &vecH1) const
    }
 }
 
-// Force matrix action on hexahedral elements in 3D
+// Force matrix action on hexahedral elements in 3D.
 void ForcePAOperator::MultHex(const Vector &vecL2, Vector &vecH1) const
 {
    const int nH1dof1D = tensors1D->HQshape1D.Height(),
@@ -590,7 +590,7 @@ void ForcePAOperator::MultHex(const Vector &vecL2, Vector &vecH1) const
    }
 }
 
-// Transpose force matrix action on quadrilateral elements in 2D
+// Transpose force matrix action on quadrilateral elements in 2D.
 void ForcePAOperator::MultTransposeQuad(const Vector &vecH1,
                                         Vector &vecL2) const
 {
@@ -658,7 +658,7 @@ void ForcePAOperator::MultTransposeQuad(const Vector &vecH1,
    }
 }
 
-// Transpose force matrix action on hexahedral elements in 3D
+// Transpose force matrix action on hexahedral elements in 3D.
 void ForcePAOperator::MultTransposeHex(const Vector &vecH1, Vector &vecL2) const
 {
    const int nH1dof1D = tensors1D->HQshape1D.Height(),
@@ -823,7 +823,7 @@ void MassPAOperator::Mult(const Vector &x, Vector &y) const
    }
 }
 
-// Mass matrix action on quadrilateral elements in 2D
+// Mass matrix action on quadrilateral elements in 2D.
 void MassPAOperator::MultQuad(const Vector &x, Vector &y) const
 {
    const H1_QuadrilateralElement *fe_H1 =
diff --git a/laghos_assembly.hpp b/laghos_assembly.hpp
index 7de31e3c..a35f37fd 100644
--- a/laghos_assembly.hpp
+++ b/laghos_assembly.hpp
@@ -136,14 +136,14 @@ class ForcePAOperator : public Operator
    QuadratureData *quad_data;
    ParFiniteElementSpace &H1FESpace, &L2FESpace;
 
-   // Force matrix action on quadrilateral elements in 2D
+   // Force matrix action on quadrilateral elements in 2D.
    void MultQuad(const Vector &vecL2, Vector &vecH1) const;
-   // Force matrix action on hexahedral elements in 3D
+   // Force matrix action on hexahedral elements in 3D.
    void MultHex(const Vector &vecL2, Vector &vecH1) const;
 
-   // Transpose force matrix action on quadrilateral elements in 2D
+   // Transpose force matrix action on quadrilateral elements in 2D.
    void MultTransposeQuad(const Vector &vecH1, Vector &vecL2) const;
-   // Transpose force matrix action on hexahedral elements in 3D
+   // Transpose force matrix action on hexahedral elements in 3D.
    void MultTransposeHex(const Vector &vecH1, Vector &vecL2) const;
 
 public:
diff --git a/laghos_solver.cpp b/laghos_solver.cpp
index 64ba6f62..6a4c2104 100644
--- a/laghos_solver.cpp
+++ b/laghos_solver.cpp
@@ -143,7 +143,7 @@ LagrangianHydroOperator::LagrangianHydroOperator(int size,
       }
    }
 
-   // Initial local mesh size (assumes similar cells).
+   // Initial local mesh size (assumes all mesh elements are of the same type).
    double loc_area = 0.0, glob_area;
    int loc_z_cnt = nzones, glob_z_cnt;
    ParMesh *pm = H1FESpace.GetParMesh();
@@ -452,8 +452,8 @@ void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
    *e_b   = new double[nqp_batch],
    *p_b   = new double[nqp_batch],
    *cs_b  = new double[nqp_batch];
-   // Jacobians of reference->physical transformations for all quadrature
-   // points in the batch.
+   // Jacobians of reference->physical transformations for all quadrature points
+   // in the batch.
    DenseTensor *Jpr_b = new DenseTensor[nqp_batch];
    for (int b = 0; b < nbatches; b++)
    {
diff --git a/laghos_solver.hpp b/laghos_solver.hpp
index 172213f5..2c78a417 100644
--- a/laghos_solver.hpp
+++ b/laghos_solver.hpp
@@ -40,7 +40,6 @@ void VisualizeField(socketstream &sock, const char *vishost, int visport,
                     int x = 0, int y = 0, int w = 400, int h = 400,
                     bool vec = false);
 
-
 // These are defined in laghos.cpp
 double rho0(const Vector &);
 void v0(const Vector &, Vector &);
@@ -94,7 +93,7 @@ class LagrangianHydroOperator : public TimeDependentOperator
    mutable bool quad_data_is_current;
 
    // Force matrix that combines the kinematic and thermodynamic spaces. It is
-   // assembled in each time step and then it's used to compute the final
+   // assembled in each time step and then it is used to compute the final
    // right-hand sides for momentum and specific internal energy.
    mutable MixedBilinearForm Force;
 
@@ -135,7 +134,7 @@ class LagrangianHydroOperator : public TimeDependentOperator
    // Solve for dx_dt, dv_dt and de_dt.
    virtual void Mult(const Vector &S, Vector &dS_dt) const;
 
-   // Calls UpdateQuadratureData to compute the new quad_data.dt_est.
+   // Calls UpdateQuadratureData to compute the new quad_data.dt_estimate.
    double GetTimeStepEstimate(const Vector &S) const;
    void ResetTimeStepEstimate() const;
    void ResetQuadratureData() const { quad_data_is_current = false; }
diff --git a/serial/laghos_assembly_s.cpp b/serial/laghos_assembly_s.cpp
index b99e8632..736df0b0 100644
--- a/serial/laghos_assembly_s.cpp
+++ b/serial/laghos_assembly_s.cpp
@@ -358,7 +358,7 @@ void ForcePAOperator::MultTranspose(const Vector &vecH1, Vector &vecL2) const
    else { MFEM_ABORT("Unsupported dimension"); }
 }
 
-// Force matrix action on quadrilateral elements in 2D
+// Force matrix action on quadrilateral elements in 2D.
 void ForcePAOperator::MultQuad(const Vector &vecL2, Vector &vecH1) const
 {
    const int nH1dof1D = tensors1D->HQshape1D.Height(),
@@ -427,7 +427,7 @@ void ForcePAOperator::MultQuad(const Vector &vecL2, Vector &vecH1) const
    }
 }
 
-// Force matrix action on hexahedral elements in 3D
+// Force matrix action on hexahedral elements in 3D.
 void ForcePAOperator::MultHex(const Vector &vecL2, Vector &vecH1) const
 {
    const int nH1dof1D = tensors1D->HQshape1D.Height(),
@@ -588,7 +588,7 @@ void ForcePAOperator::MultHex(const Vector &vecL2, Vector &vecH1) const
    }
 }
 
-// Transpose force matrix action on quadrilateral elements in 2D
+// Transpose force matrix action on quadrilateral elements in 2D.
 void ForcePAOperator::MultTransposeQuad(const Vector &vecH1,
                                         Vector &vecL2) const
 {
@@ -656,7 +656,7 @@ void ForcePAOperator::MultTransposeQuad(const Vector &vecH1,
    }
 }
 
-// Transpose force matrix action on hexahedral elements in 3D
+// Transpose force matrix action on hexahedral elements in 3D.
 void ForcePAOperator::MultTransposeHex(const Vector &vecH1, Vector &vecL2) const
 {
    const int nH1dof1D = tensors1D->HQshape1D.Height(),
@@ -836,7 +836,7 @@ void MassPAOperator::Mult(const Vector &x, Vector &y) const
    }
 }
 
-// Mass matrix action on quadrilateral elements in 2D
+// Mass matrix action on quadrilateral elements in 2D.
 void MassPAOperator::MultQuad(const Vector &x, Vector &y) const
 {
    const H1_QuadrilateralElement *fe_H1 =
diff --git a/serial/laghos_assembly_s.hpp b/serial/laghos_assembly_s.hpp
index 8ea37a6f..9fd6789d 100644
--- a/serial/laghos_assembly_s.hpp
+++ b/serial/laghos_assembly_s.hpp
@@ -133,14 +133,14 @@ class ForcePAOperator : public Operator
    QuadratureData *quad_data;
    FiniteElementSpace &H1FESpace, &L2FESpace;
 
-   // Force matrix action on quadrilateral elements in 2D
+   // Force matrix action on quadrilateral elements in 2D.
    void MultQuad(const Vector &vecL2, Vector &vecH1) const;
-   // Force matrix action on hexahedral elements in 3D
+   // Force matrix action on hexahedral elements in 3D.
    void MultHex(const Vector &vecL2, Vector &vecH1) const;
 
-   // Transpose force matrix action on quadrilateral elements in 2D
+   // Transpose force matrix action on quadrilateral elements in 2D.
    void MultTransposeQuad(const Vector &vecH1, Vector &vecL2) const;
-   // Transpose force matrix action on hexahedral elements in 3D
+   // Transpose force matrix action on hexahedral elements in 3D.
    void MultTransposeHex(const Vector &vecH1, Vector &vecL2) const;
 
 public:
@@ -164,13 +164,9 @@ class MassPAOperator : public Operator
    QuadratureData *quad_data;
    FiniteElementSpace &FESpace;
 
-   Array<int> *ess_tdofs;
-
-   mutable GridFunction x_gf, y_gf;
-
-   // Mass matrix action on quadrilateral elements in 2D
+   // Mass matrix action on quadrilateral elements in 2D.
    void MultQuad(const Vector &x, Vector &y) const;
-   // Mass matrix action on hexahedral elements in 3D
+   // Mass matrix action on hexahedral elements in 3D.
    void MultHex(const Vector &x, Vector &y) const;
 
 public:
diff --git a/serial/laghos_s.cpp b/serial/laghos_s.cpp
index 6c30279c..87827f60 100644
--- a/serial/laghos_s.cpp
+++ b/serial/laghos_s.cpp
@@ -31,7 +31,7 @@
 //
 //    V. Dobrev, Tz. Kolev and R. Rieben, "High-order curvilinear finite element
 //    methods for Lagrangian hydrodynamics", SIAM Journal on Scientific
-//    Computing, (34) 2012, pp.B606–B641, https://doi.org/10.1137/120864672.
+//    Computing, (34) 2012, pp. B606–B641, https://doi.org/10.1137/120864672.
 //
 // Sample runs:
 //    ./laghos -p 0 -m ../data/square01_quad.mesh -rs 3 -tf 0.75
@@ -283,7 +283,7 @@ int main(int argc, char *argv[])
                      "Specific Internal Energy", Wx, Wy, Ww, Wh);
    }
 
-   // Save data for VisIt visualization
+   // Save data for VisIt visualization.
    VisItDataCollection visit_dc(basename, mesh);
    if (visit)
    {
diff --git a/serial/laghos_solver_s.cpp b/serial/laghos_solver_s.cpp
index 182cde8f..d22d8d8c 100644
--- a/serial/laghos_solver_s.cpp
+++ b/serial/laghos_solver_s.cpp
@@ -128,7 +128,7 @@ LagrangianHydroOperator::LagrangianHydroOperator(int size,
       }
    }
 
-   // Initial local mesh size (assumes similar cells).
+   // Initial local mesh size (assumes all mesh elements are of the same type).
    double area = 0.0;
    Mesh *m = H1FESpace.GetMesh();
    for (int i = 0; i < nzones; i++) { area += m->GetElementVolume(i); }
@@ -378,8 +378,8 @@ void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
    *e_b   = new double[nqp_batch],
    *p_b   = new double[nqp_batch],
    *cs_b  = new double[nqp_batch];
-   // Jacobians of reference->physical transformations for all quadrature
-   // points in the batch.
+   // Jacobians of reference->physical transformations for all quadrature points
+   // in the batch.
    DenseTensor *Jpr_b = new DenseTensor[nqp_batch];
    for (int b = 0; b < nbatches; b++)
    {
diff --git a/serial/laghos_solver_s.hpp b/serial/laghos_solver_s.hpp
index 72c47f8f..1c77518b 100644
--- a/serial/laghos_solver_s.hpp
+++ b/serial/laghos_solver_s.hpp
@@ -73,7 +73,7 @@ class LagrangianHydroOperator : public TimeDependentOperator
    mutable bool quad_data_is_current;
 
    // Force matrix that combines the kinematic and thermodynamic spaces. It is
-   // assembled in each time step and then it's used to compute the final
+   // assembled in each time step and then it is used to compute the final
    // right-hand sides for momentum and specific internal energy.
    mutable MixedBilinearForm Force;
 
@@ -111,7 +111,7 @@ class LagrangianHydroOperator : public TimeDependentOperator
    // Solve for dx_dt, dv_dt and de_dt.
    virtual void Mult(const Vector &S, Vector &dS_dt) const;
 
-   // Calls UpdateQuadratureData to compute the new quad_data.dt_est.
+   // Calls UpdateQuadratureData to compute the new quad_data.dt_estimate.
    double GetTimeStepEstimate(const Vector &S) const;
    void ResetTimeStepEstimate() const;
    void ResetQuadratureData() const { quad_data_is_current = false; }

From d2ee50bf17d006500b054a867ea6da7354190597 Mon Sep 17 00:00:00 2001
From: Tzanio <tzanio@llnl.gov>
Date: Thu, 7 Dec 2017 14:42:19 -0800
Subject: [PATCH 12/12] Final v1.0 adjustments in README.md

---
 README.md | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 5259a8df..8ca95470 100644
--- a/README.md
+++ b/README.md
@@ -127,7 +127,7 @@ Laghos has the following external dependencies:
   https://github.com/mfem/mfem
 
 To build the miniapp, first download *hypre* and METIS from the links above
-and put everything on the same level as Laghos:
+and put everything on the same level as the `Laghos` directory:
 ```sh
 ~> ls
 Laghos/  hypre-2.10.0b.tar.gz  metis-4.0.tar.gz
@@ -155,7 +155,7 @@ Build METIS:
 This build is optional, as MFEM can be build without METIS by specifying
 `MFEM_USE_METIS = NO` below.
 
-Clone and build the parallel version of MFEM starting from the `laghos-v1.0` tag:
+Clone and build the parallel version of MFEM:
 ```sh
 ~> git clone git@github.com:mfem/mfem.git ./mfem
 ~> cd mfem/
@@ -163,7 +163,10 @@ Clone and build the parallel version of MFEM starting from the `laghos-v1.0` tag
 ~/mfem> make parallel -j
 ~/mfem> cd ..
 ```
-For more details, see the [MFEM building page](http://mfem.org/building/).
+The above uses the `laghos-v1.0` tag of MFEM, which is guaranteed to work with
+Laghos v1.0. Alternatively, one can use the latest versions of the MFEM and
+Laghos `master` branches (provided there are no conflicts. See the [MFEM
+building page](http://mfem.org/building/) for additional details.
 
 Build Laghos
 ```sh
@@ -186,7 +189,8 @@ mpirun -np 8 laghos -p 1 -m data/square01_quad.mesh -rs 3 -tf 0.8 -no-vis -pa
 mpirun -np 8 laghos -p 1 -m data/cube01_hex.mesh -rs 2 -tf 0.6 -no-vis -pa
 ```
 
-The latter produces the following density plot (when run with `-vis` instead of `-no-vis`)
+The latter produces the following density plot (when run with the `-vis` instead
+of the `-no-vis` option)
 
 ![Sedov blast image](data/sedov.png)
 
@@ -202,7 +206,8 @@ mpirun -np 8 laghos -p 0 -m data/square01_quad.mesh -rs 3 -tf 0.5 -no-vis -pa
 mpirun -np 8 laghos -p 0 -m data/cube01_hex.mesh -rs 1 -cfl 0.1 -tf 0.25 -no-vis -pa
 ```
 
-The latter produces the following velocity magnitude plot (when run with `-vis` instead of `-no-vis`)
+The latter produces the following velocity magnitude plot (when run with the
+`-vis` instead of the `-no-vis` option)
 
 ![Taylor-Green image](data/tg.png)
 
@@ -217,7 +222,8 @@ mpirun -np 8 laghos -p 3 -m data/rectangle01_quad.mesh -rs 2 -tf 2.5 -cfl 0.025
 mpirun -np 8 laghos -p 3 -m data/box01_hex.mesh -rs 1 -tf 2.5 -cfl 0.05 -no-vis -pa
 ```
 
-The latter produces the following specific internal energy plot (when run with `-vis` instead of `-no-vis`)
+The latter produces the following specific internal energy plot (when run with
+the `-vis` instead of the `-no-vis` option)
 
 ![Triple-point image](data/tp.png)
 
@@ -279,7 +285,7 @@ srun -n 393216 laghos -pa -p 1 -tf 0.6 -no-vis
 ```
 This is Q3-Q2 3D computation on 393,216 MPI ranks (24,576 nodes) that produces
 rates of approximately 168497, 74221, and 16696 megadofs, and a total FOM of
-about 2073.
+about 2073 megadofs.
 
 To make the above run 8 times bigger, one can either weak scale by using 8 times
 as many MPI tasks and increasing the number of serial refinements: `srun -n